In [None]:
!pip install chart_studio

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# plotly standard imports
import plotly.graph_objs as go
import chart_studio.plotly as py

# Cufflinks wrapper on plotly
import cufflinks as cf

# Options for pandas
pd.options.display.max_columns = 30

# Display all cell outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

from plotly.offline import iplot, init_notebook_mode
init_notebook_mode(connected=True)
cf.go_offline(connected=True)

# Set global theme
cf.set_config_file(world_readable=True, theme='pearl')


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv('/kaggle/input/corona-virus-report/full_grouped.csv')
df.head()

# Checking the shape.

In [None]:
df.shape

# Checking for Missing Values

In [None]:
df.isnull().sum()

The Data Set is a cleaned data set.

# Extracting the information

In [None]:
df.info()

# Changing the column type of Date column

In [None]:
df['Date'] = pd.to_datetime(df['Date']) 

# Statistics of the dataframe

In [None]:
df.describe()

# Checking the columns

In [None]:
df.columns

# Checking the various WHO region and countries under them

In [None]:
df['WHO Region'].value_counts()

# Confirmed cases in various WHO Region

In [None]:
df.groupby('WHO Region')[['Confirmed', 'WHO Region']].sum().sort_values('Confirmed', 
                            ascending=False)[:15].iplot(kind = 'bar',
                                                       xTitle = 'WHO Region',
                                                       yTitle = 'Confirmed Cases',
                                                       title = 'Confirmed cases in various WHO Region')

# Extracting the EURO region

In [None]:
df_euro = df.loc[df['WHO Region'] == 'Europe']
df_euro

# Creating the month from the date

In [None]:
df_euro['Month'] = pd.to_datetime(df_euro.Date).dt.strftime('%b')
df_euro.head()

# Sorting the data by Date

In [None]:
df_sort_date = df_euro.sort_values('Date', ascending = False)

In [None]:
df_sort_date.head()

# Sorting countries by new deaths

In [None]:
df_death = df_euro.sort_values('New deaths', ascending = False)
df_death.head()

# Country with most single deaths

In [None]:
df_euro.sort_values('Deaths', ascending = False)[:1]

# Determing the month with most confirmed cases

In [None]:
df_month_max = df_euro.groupby('Month').sum().sort_values('Confirmed', ascending = False)
df_month_max

# Determining the total cases per country over the period of time

In [None]:
df_euro.groupby('Country/Region')[['Confirmed','Deaths', 'Recovered','Active','New cases',
                                   'New deaths','New recovered']].sum().iplot()

# Generating the heatmap

In [None]:
import plotly.figure_factory as ff
corrs = df_euro.corr()

figure = ff.create_annotated_heatmap(
    z=corrs.values,
    x=list(corrs.columns),
    y=list(corrs.index),
    colorscale='solar',
    annotation_text=corrs.round(2).values,
    showscale=True, reversescale=True)

figure.layout.margin = dict(l=200, t=200)
figure.layout.height = 800
figure.layout.width = 1000

iplot(figure)

# Cases variation in Russia with time

In [None]:
df_russia = df_euro.loc[df_euro['Country/Region'] == 'Russia'].set_index('Date')
df_russia.head()

# Starting of cases

In [None]:
df_russia[['Confirmed','Deaths', 'Recovered','Active','New cases',
                                   'New deaths','New recovered']].iplot(
    y='Confirmed',
    mode='lines',
    secondary_y = 'Recovered',
    secondary_y_title='Recovered',
    opacity=0.8,
    size=8,
    symbol=1,
    xTitle='Date',
    yTitle='Confirmed',
    text='Active',
    title='Confirmed and Active casess Count over Time')

    

# Log Plot

In [None]:
layout = dict(
    xaxis=dict(type='log', title='Recovered'),
    yaxis=dict(title='Deaths'),
    title='Recovered vs Death Count Log Axis')

df_russia.sort_values('Recovered').iplot(
    x='Recovered',
    y='Deaths',
    layout=layout,
    text='Active',
    mode='lines',
    bestfit=True,
    bestfit_colors=['blue'])

In [None]:
df_euro.set_index('Date')[['Deaths', 'Active']].cumsum().iplot(y='Deaths', secondary_y='Active',
                                                 yTitle='Deaths', secondary_y_title='Active',
                                                 title='Deaths and Active Count Totals')

In [None]:
import plotly.express as px
fig = px.choropleth(df_euro, locations="Country/Region", 
                    locationmode='country names', color="Deaths", 
                    hover_name="Country/Region", range_color=[1,2000], 
                    color_continuous_scale='portland', 
                    title='European Countries with Deaths Cases', scope='europe', height=800)
# fig.update(layout_coloraxis_showscale=False)
fig.show()

# Nordic Countries

In [None]:
nordic_countries=df_euro.set_index('Country/Region')
nordic_countries = nordic_countries.loc[["Finland","Sweden","Norway","Denmark","Iceland"]]
nordic_countries = nordic_countries.reset_index()
nordic_countries.head()

# Cases in nordic countries

In [None]:
df_nordic_sum = nordic_countries.groupby('Country/Region').sum().sort_values('Confirmed',
                                                            ascending = False).iplot(kind = 'bar',
                                                                                    yTitle = 'Count',
                                    title = 'Count of cases in nordic countries')

# Analysis of Asia

In [None]:
df_asia = df.set_index('WHO Region')
df_asia = df_asia.loc[['South-East Asia', 'Western Pacific', 'Eastern Mediterranean']]

In [None]:
df_asia = df_asia.reset_index()

## Death vs Recovered cases over time

In [None]:
df_asia['WHO Region'].value_counts()

In [None]:
df_asia.groupby('Date')[['Deaths', 'Recovered']].sum().iplot(
        xTitle = 'Date',
        yTitle = 'Deaths',
        secondary_y = 'Recovered',
        secondary_y_title = 'Recovered',
        title = 'Death vs Recovered Cases')

## Lograthmic structure of the cases

In [None]:
layout = dict(
    yaxis = dict(type = 'log', title = 'Confirmed'),
    xaxis = dict(title = 'Date')
    )

df_asia.groupby('Date')[['Confirmed', 'Recovered']].sum().iplot(
    yTitle = 'Confirmed',
    xTitle = 'Date',
    mode = 'lines',
    bestfit = True,
    bestfit_colors = ['orange', 'blue'], 
    secondary_y = 'Recovered',
    secondary_y_title = 'Recovered',
    title = 'Log scale showing the recovered to confirmed cases'
)

In [None]:
import plotly.express as px
fig = px.choropleth(df_asia, locations="Country/Region", 
                    locationmode='country names', color="Deaths", 
                    hover_name="Country/Region", range_color=[1,20000], 
                    color_continuous_scale='portland', 
                    title='Asian Countries with Death Cases', scope='asia', height=800)
# fig.update(layout_coloraxis_showscale=False)
fig.show()

In [None]:
df_asia.groupby('WHO Region')[['WHO Region', 'Deaths']].sum().sort_values('Deaths').iplot(
    kind = 'bar',
    xTitle = 'WHO Region',
    yTitle = 'Deaths',
    colors = 'red',
    title = 'Death per WHO Region')

In [None]:
df_asia.groupby('Date')[['Confirmed', 'Deaths']].sum().iplot(
    xTitle = 'Date',
    yTitle = 'Confirmed',
    secondary_y = 'Deaths',
    secondary_y_title = 'Deaths',
    title = 'Confirmed cases and Deaths over time')

## Active cases in Asia

In [None]:
df_asia.head(1)

In [None]:
df_asia.groupby('Country/Region')[['Active', 'Country/Region']].sum().sort_values('Active', 
                                                            ascending = False)[:15].iplot(
xTitle = 'Country',
yTitle = 'Active Cases',
title = 'Active cases per country')

## Active cases over time

In [None]:
import plotly.express as px
fig = px.choropleth(df_asia, locations="Country/Region", 
                    locationmode='country names', color="Active", 
                    hover_name="Country/Region", range_color=[1000,20000], 
                    color_continuous_scale='portland', 
                    title='Asian Countries with Active Cases', scope='asia', height=800)
# fig.update(layout_coloraxis_showscale=False)
fig.show()

In [None]:
df_asia.groupby('Country/Region')[['Country/Region', 'Recovered', 
                        'New recovered']].sum().sort_values('Recovered', ascending = False)[:15].iplot(
kind = 'bar',
    xTitle = 'Country',
yTitle = 'Recovered',
secondary_y = 'New recovered',
secondary_y_title = 'New recovered',
title = 'Recovered vs New Recovered')

## Lowest deaths

In [None]:
print("Countries with Lowest Deaths")
temp = df_asia[df_asia['Confirmed']>100]
temp = temp.sort_values('Deaths', ascending=True)[['Country/Region', 'Confirmed',
                                                          'Deaths']][:16]
temp.sort_values('Confirmed', ascending=False)[['Country/Region', 'Confirmed',
                                            'Deaths']][:20].style.background_gradient(cmap='Reds')

## THE SPREAD

In [None]:
formated_gdf = df.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['Confirmed'].pow(0.3)

fig = px.scatter_geo(formated_gdf, locations="Country/Region", locationmode='country names', 
                     color="Confirmed", size='size', hover_name="Country/Region", 
                     range_color= [0, 1500], 
                     projection="natural earth", animation_frame="Date", 
                     title='COVID-19: Spread Over Time', color_continuous_scale="portland")
# fig.update(layout_coloraxis_showscale=False)
fig.show()

# Recovery

In [None]:
formated_gdf = df.groupby(['Date', 'Country/Region'])['Confirmed', 'Deaths', 'Recovered'].max()
formated_gdf = formated_gdf.reset_index()
formated_gdf['Date'] = pd.to_datetime(formated_gdf['Date'])
formated_gdf['Date'] = formated_gdf['Date'].dt.strftime('%m/%d/%Y')
formated_gdf['size'] = formated_gdf['Recovered'].pow(0.3)

fig = px.scatter_geo(formated_gdf, locations="Country/Region", locationmode='country names', 
                     color="Recovered", size='size', hover_name="Country/Region", 
                     range_color= [0, 1500], 
                     projection="natural earth", animation_frame="Date", 
                     title='COVID-19: Recovery Over Time', color_continuous_scale="peach")
# fig.update(layout_coloraxis_showscale=False)
fig.show()