In [489]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
    'filename': 'img',
    'height': 500,
    'width':1000,
    'scale': 1 , # Multiply title/legend/axis/canvas sizes by this factor,
    'scrollZoom': True
  }
}

In [490]:
df = pd.read_csv('Covid-19_clean_data.csv')
df['Date'] = pd.to_datetime(df['Date'])
df.head(6)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0
1,,Albania,41.1533,20.1683,2020-01-22,0,0,0
2,,Algeria,28.0339,1.6596,2020-01-22,0,0,0
3,,Andorra,42.5063,1.5218,2020-01-22,0,0,0
4,,Angola,-11.2027,17.8739,2020-01-22,0,0,0
5,,Antigua and Barbuda,17.0608,-61.7964,2020-01-22,0,0,0


In [491]:
print(df.info())
print(df.isna().sum())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16055 entries, 0 to 16054
Data columns (total 8 columns):
Province/State    4875 non-null object
Country/Region    16055 non-null object
Lat               16055 non-null float64
Long              16055 non-null float64
Date              16055 non-null datetime64[ns]
Confirmed         16055 non-null int64
Deaths            16055 non-null int64
Recovered         16055 non-null int64
dtypes: datetime64[ns](1), float64(2), int64(3), object(2)
memory usage: 1003.5+ KB
None
Province/State    11180
Country/Region        0
Lat                   0
Long                  0
Date                  0
Confirmed             0
Deaths                0
Recovered             0
dtype: int64


## Data Pre-processing

In [492]:
import pycountry_convert as pc

def continent(country):
    try:
        country_code = pc.country_name_to_country_alpha2(country, cn_name_format="default")
        try:
            continent_name = pc.country_alpha2_to_continent_code(country_code)

        except:
            continent_name = "Unknown"
    except:
        continent_name = "Unknown"
    return continent_name

In [493]:
df_clean = df
df_clean["Province/State"].fillna("", inplace=True)
df_clean["Active"] = df_clean["Confirmed"]-df_clean["Recovered"]-df_clean["Deaths"]
df_clean["Active"] = df_clean["Active"].fillna(0)
df_clean["Recovered"] = df_clean["Recovered"].fillna(0)
df_clean["Deaths"] = df_clean["Deaths"].fillna(0)
df_clean.loc[df_clean['Country/Region'] == 'US', 'Country/Region'] = 'USA'

for i in range(len(df_clean)) : 
  df_clean.loc[i,"Continent"]=continent(df_clean.loc[i, "Country/Region"])

df_clean.loc[df_clean['Continent'] == 'AS', 'Continent'] = 'Asia'
df_clean.loc[df_clean['Continent'] == 'EU', 'Continent'] = 'Europe'
df_clean.loc[df_clean['Continent'] == 'AF', 'Continent'] = 'Africa'
df_clean.loc[df_clean['Continent'] == 'NA', 'Continent'] = 'North America'
df_clean.loc[df_clean['Continent'] == 'SA', 'Continent'] = 'South America'
df_clean.loc[df_clean['Continent'] == 'OC', 'Continent'] = 'Australia'


df_clean = df_clean.sort_values(by="Date")
df_clean['time']=df_clean.Date.apply(lambda x: x.date()).apply(str)

df_clean.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,Confirmed,Deaths,Recovered,Active,Continent,time
0,,Afghanistan,33.0,65.0,2020-01-22,0,0,0,0,Asia,2020-01-22
157,,Mauritius,-20.2,57.5,2020-01-22,0,0,0,0,Africa,2020-01-22
158,,Mexico,23.6345,-102.5528,2020-01-22,0,0,0,0,North America,2020-01-22
159,,Moldova,47.4116,28.3699,2020-01-22,0,0,0,0,Europe,2020-01-22
160,,Monaco,43.7333,7.4167,2020-01-22,0,0,0,0,Europe,2020-01-22


In [494]:
df_group_by_country_date = df_clean.groupby(['Date','Country/Region'])['Confirmed','Recovered', 'Deaths', 'Active'].sum().reset_index()

## Latest data available till 26/03/2020

In [495]:
df_total = df_clean.groupby(['Country/Region', 'Province/State'])['Date','Confirmed', 'Deaths', 'Recovered', 'Active' ].max()
df_total['Date'].max()

Timestamp('2020-03-26 00:00:00')

## Total cases and Death Rate until 26-03-2020

In [496]:
total_cases = df_total['Confirmed'].sum()
total_deaths = df_total['Deaths'].sum()
total_recovered = df_total['Recovered'].sum()
date = df_total['Date'].max()
death_rate = (total_deaths/total_cases)*100
x = pd.DataFrame({'Date':date,'Total Confirmed':total_cases , 'Total Deaths': total_deaths, 'Total Recovered': total_recovered
             ,'Death Rate':death_rate}, index=[0])
x.head()

Unnamed: 0,Date,Total Confirmed,Total Deaths,Total Recovered,Death Rate
0,2020-03-26,529607,23979,122033,4.527697


In [497]:
df_group_by_date = df_clean.groupby(['Date'])['Confirmed','Recovered', 'Deaths', 'Active'].sum().reset_index()
df_group_by_date.head()

Unnamed: 0,Date,Confirmed,Recovered,Deaths,Active
0,2020-01-22,555,28,17,510
1,2020-01-23,654,30,18,606
2,2020-01-24,941,36,26,879
3,2020-01-25,1434,39,42,1353
4,2020-01-26,2118,52,56,2010


 ## Increasing trend of daily cases

In [498]:
temp = df_clean.groupby(['Country/Region', 'Date', ])['Confirmed', 'Deaths', 'Recovered']
temp = temp.sum().diff().reset_index()



mask = temp['Country/Region'] != temp['Country/Region'].shift(1)

temp.loc[mask, 'Confirmed'] = np.nan
temp.loc[mask, 'Deaths'] = np.nan
temp.loc[mask, 'Recovered'] = np.nan
# temp

In [499]:
import plotly.graph_objects as go
temp = temp.dropna()
x=temp['Date']
fig = go.Figure(go.Bar(x=x, y=temp['Confirmed'], name='Confirmed',marker_color='blue'))
fig.add_trace(go.Bar(x=x, y=temp['Recovered'], name='Recovered',marker_color='green'))
fig.add_trace(go.Bar(x=x, y=temp['Deaths'], name='Deaths',marker_color='red'))

fig.update_layout(barmode='stack')
# fig.update_layout(xaxis_rangeslider_visible=True)

fig.update_layout(
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="black"
    ),
    title={
        'text': "Daily new confirmed cases including deaths and recoveries",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        },
legend_font_size=16
)

config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
    'filename': '111',
    'height': 500,
    'width':1000,
    'scale': 1 , # Multiply title/legend/axis/canvas sizes by this factor,
    'scrollZoom': True
  }
}

fig.show(config=config)

In [500]:
import matplotlib.pyplot as plt
import plotly.offline
x=df_group_by_date['Date']
fig = go.Figure(go.Scatter(x=x, y=df_group_by_date['Confirmed'], name='Confirmed',marker_color='blue',mode='lines+markers'))
fig.add_trace(go.Scatter(x=x, y=df_group_by_date['Recovered'], name='Recovered',marker_color='green',mode='lines+markers'))
fig.add_trace(go.Scatter(x=x, y=df_group_by_date['Deaths'], name='Deaths',marker_color='red',mode='lines+markers'))

# fig.update_layout(barmode='stack')
fig.update_layout(
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="black"
    ),
    title={
        'text': "Cumulative Cases over period of time",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        },
legend_font_size=16
)
config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
    'filename': '112',
    'scale': 1 , # Multiply title/legend/axis/canvas sizes by this factor,
    'scrollZoom': True
  }
}
fig.show(config=config)

##   Total Confirmed cases, deaths, recovered cases and mortality rate

In [501]:
data = {'Parent':  ['Confirmed', 'Confirmed', 'Confirmed'],
        'Child': ['Active', 'Deaths','Recovered'],
         'Cases': [total_cases-total_deaths-total_recovered,total_deaths,total_recovered]
        }

dfx = pd.DataFrame (data, columns = ['Parent','Child','Cases'])
dfx
fig = px.sunburst(dfx, path=['Parent','Child'], values='Cases',
#                   color='Cases',
                  color_continuous_scale="Agsunset_r",
#                    branchvalues="remainder",
#                   insidetextorientation='radial'
                 )
fig.update_layout(
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="black"
    ),
    title={
        'text': "Breakup of total confirmed cases",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        },
legend_font_size=16
)
fig.data[0].textinfo = 'label+text+value'
fig.show(config=config)

In [502]:
df_latest = df_clean[df_clean['Date'] == max(df_clean['Date'])].reset_index()
df_latest = df_latest.groupby('Country/Region')['Confirmed', 'Deaths', 'Recovered', 'Active'].sum().reset_index()

stats = df_latest[['Country/Region','Confirmed', 'Active', 'Deaths', 'Recovered']]

stats = stats.sort_values(by='Confirmed', ascending=False)
stats = stats.reset_index(drop=True)

stats.style.background_gradient(cmap="Blues", subset=['Confirmed', 'Active'])\
            .background_gradient(cmap="Greens", subset=['Recovered'])\
            .background_gradient(cmap="Reds", subset=['Deaths'])


Unnamed: 0,Country/Region,Confirmed,Active,Deaths,Recovered
0,USA,83836,81946,1209,681
1,China,81782,4310,3291,74181
2,Italy,80589,62013,8215,10361
3,Spain,57786,46406,4365,7015
4,Germany,43938,37998,267,5673
5,France,29551,22898,1698,4955
6,Iran,29406,16715,2234,10457
7,United Kingdom,11812,11082,580,150
8,Switzerland,11811,11489,191,131
9,South Korea,9241,4966,131,4144


In [503]:
stats = stats.sort_values(by='Deaths', ascending=False).reset_index(drop=True)
stats['Death Rate %'] = (stats['Deaths']/stats['Confirmed'])*100
death_stats = stats[['Country/Region','Deaths','Death Rate %']]
death_stats.style.background_gradient(cmap="Reds", subset=['Deaths'])

Unnamed: 0,Country/Region,Deaths,Death Rate %
0,Italy,8215,10.1937
1,Spain,4365,7.55373
2,China,3291,4.02411
3,Iran,2234,7.59709
4,France,1698,5.746
5,USA,1209,1.4421
6,United Kingdom,580,4.91026
7,Netherlands,435,5.82485
8,Germany,267,0.607674
9,Belgium,220,3.52847


In [504]:
for i in range(len(df_latest)) : 
  df_latest.loc[i,"Continent"]=continent(df_latest.loc[i, "Country/Region"])

df_latest.loc[df_latest['Continent'] == 'AS', 'Continent'] = 'Asia'
df_latest.loc[df_latest['Continent'] == 'EU', 'Continent'] = 'Europe'
df_latest.loc[df_latest['Continent'] == 'AF', 'Continent'] = 'Africa'
df_latest.loc[df_latest['Continent'] == 'NA', 'Continent'] = 'North America'
df_latest.loc[df_latest['Continent'] == 'SA', 'Continent'] = 'South America'
df_latest.loc[df_latest['Continent'] == 'OC', 'Continent'] = 'Australia'

df_latest.head()

Unnamed: 0,Country/Region,Confirmed,Deaths,Recovered,Active,Continent
0,Afghanistan,94,4,2,88,Asia
1,Albania,174,6,17,151,Europe
2,Algeria,367,25,29,313,Africa
3,Andorra,224,3,1,220,Europe
4,Angola,4,0,0,4,Africa


In [505]:
df_group_by_country_date = df_group_by_country_date.sort_values(by="Date")
df_group_by_country_date['time']=df_group_by_country_date.Date.apply(lambda x: x.date()).apply(str)

for i in range(len(df_group_by_country_date)) : 
  df_group_by_country_date.loc[i,"Continent"]=continent(df_group_by_country_date.loc[i, "Country/Region"])

df_group_by_country_date.loc[df_group_by_country_date['Continent'] == 'AS', 'Continent'] = 'Asia'
df_group_by_country_date.loc[df_group_by_country_date['Continent'] == 'EU', 'Continent'] = 'Europe'
df_group_by_country_date.loc[df_group_by_country_date['Continent'] == 'AF', 'Continent'] = 'Africa'
df_group_by_country_date.loc[df_group_by_country_date['Continent'] == 'NA', 'Continent'] = 'North America'
df_group_by_country_date.loc[df_group_by_country_date['Continent'] == 'SA', 'Continent'] = 'South America'
df_group_by_country_date.loc[df_group_by_country_date['Continent'] == 'OC', 'Continent'] = 'Australia'


## Composition of Cases

In [506]:
import plotly.express as px
import numpy as np

fig = px.sunburst(df_latest, path=['Continent','Country/Region'], values='Confirmed',
                  color='Confirmed',
                  color_continuous_scale=px.colors.diverging.BrBG
                 )
fig.data[0].textinfo = 'label+text+value'

fig.update_layout(
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="black"
    ),
    title={
        'text': "Composition of Cases",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        }
    ,
legend_font_size=16
)
fig.show(config=config)


In [507]:
df_latest_Province = df_clean[df_clean['Date'] == max(df_clean['Date'])].reset_index()

fig = px.treemap(df_latest_Province.sort_values(by='Confirmed', ascending=False).reset_index(drop=True), 
                 path=["Country/Region","Province/State"], values="Confirmed", height=700,
                 title='Number of Confirmed Cases',
                color_discrete_sequence = px.colors.sequential.Rainbow,
#                 color_continuous_scale=px.colors.diverging.delta
                )
fig.data[0].textinfo = 'label+text+value'
fig.update_layout(
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="black"
    ),
    title={
        'text': "Confirmed cases country-wise and district-wise",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        },
legend_font_size=16
)
fig.show()

## Spread of cases and shift of pandemic epicentre 

In [508]:
df_group_by_country_date['size'] = df_group_by_country_date['Confirmed'].pow(0.3)
fig = px.scatter_geo(df_group_by_country_date, locations="Country/Region", locationmode='country names', 
                     color="Confirmed", size='size', hover_name="Country/Region", 
                     range_color= [0, max(df_group_by_country_date['Confirmed'])+2], animation_frame="time", 
                     )
fig.update(layout_coloraxis_showscale=True)
fig.update_layout(
     font=dict(
        family="Courier New, monospace",
        size=18,
        color="black"
    ),
    title={
        'text': "World-wide spread over time",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        },
legend_font_size=14)
config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
    'filename': '111',
    'height': 500,
    'width':1000,
    'scale': 1 , # Multiply title/legend/axis/canvas sizes by this factor,
    'scrollZoom': True
  }
}
fig.show(config=config)
fig.write_image("spread.pdf",height= 500,
    width= 1000)

In [509]:
fig = go.Figure(data=go.Choropleth(
    locations = df_latest['Country/Region'],
    locationmode = 'country names',
    z = np.log(df_latest["Confirmed"]),
    text = df_latest["Confirmed"],
    hoverinfo ='location+text',
    
    colorscale = 'Thermal',
    reversescale=True,
    marker_line_color='darkgray',
    marker_line_width=0.5,
    showscale = True,
    colorbar_title = '<b>Confirmed <br>Cases</b> <br>log(Confirmed)',
))

fig.update_layout(
     font=dict(
        family="Courier New, monospace",
        size=18,
        color="black"
    ),
    title={
        'text': "Confirmed cases all over world",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        },
legend_font_size=14
)
config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
    'filename': '111',
    'height': 500,
    'width':1000,
    'scale': 1 , # Multiply title/legend/axis/canvas sizes by this factor,
    'scrollZoom': True
  }
}
fig.show(config=config)

In [510]:
import plotly.express as px


fig = px.bar(df_group_by_country_date, x="Continent", y="Confirmed", color="Continent",
  animation_frame="time", animation_group="Country/Region",range_y=[0,300000])
fig.update_layout(
    font=dict(
        family="Courier New, monospace",
        size=12,
        color="black"
    ),
    title={
        'text': "Confirmed Cases continent-wise and shift in epicentre ",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        },
legend_font_size=16
)

config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
    'filename': '111',
    'height': 500,
    'width':1000,
    'scale': 1 , # Multiply title/legend/axis/canvas sizes by this factor,
    'scrollZoom': True
  }
}

fig.show(config=config)

## Trajectories of most affected countries

In [511]:
stats = df_latest[['Country/Region','Confirmed', 'Active', 'Deaths', 'Recovered']]

stats = stats.sort_values(by='Confirmed', ascending=False)
stats = stats.reset_index(drop=True)
stats.head()

fig = go.Figure()
for i in range((10)):
    y= df_group_by_country_date[df_group_by_country_date['Country/Region']==stats.loc[i,"Country/Region"]]
#     y=df_group_by_country_date[df_group_by_country_date['Country/Region']=='Italy']
#     y
    x=y['Date']


    fig.add_trace(go.Scatter(x=x, y=y['Confirmed'], name=stats.loc[i,"Country/Region"],mode='lines'))
# fig.add_trace(go.Scatter(x=x, y=df_group_by_date['Deaths'], name='Deaths',marker_color='red',mode='lines'))

fig.update_layout(
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="black"
    ),
    title={
        'text': "Trajectories of confirmed cases in most affected countries",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        },
legend_font_size=16
)

config = {
  'toImageButtonOptions': {
    'format': 'svg', # one of png, svg, jpeg, webp
    'filename': '111',
    'height': 500,
    'width':1000,
    'scale': 1 , # Multiply title/legend/axis/canvas sizes by this factor,
    'scrollZoom': True
  }
}

fig.update_layout(xaxis_rangeslider_visible=True)
fig.show(config=config)

##  Comparison between top 3 affected countries

In [512]:
import plotly.graph_objects as go

categories = ['Confirmed','Deaths','Recovered','Confirmed']
color = ["red","green","blue"]
fig = go.Figure()
for i in range((3)):
    y= stats.loc[i,"Country/Region"]
    fig.add_trace(go.Scatterpolar(
          r=[np.log(stats.loc[i,"Confirmed"]),np.log(stats.loc[i,"Deaths"]),np.log(stats.loc[i,"Recovered"]),np.log(stats.loc[i,"Confirmed"])],
          theta=categories,
          name=stats.loc[i,"Country/Region"],
         mode='lines+markers', 
         text="Points + Fills",
         line_color=color[i],
    ))
    

fig.update_layout(
     font=dict(
        family="Courier New, monospace",
        size=18,
        color="black"
    ),
  polar=dict(
    radialaxis=dict(
      visible=True
    )),
    title={
        'text': "Comparison between China, Italy and USA",
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        },
legend_font_size=16,
    annotations = [dict(
        x=1,
        y=0.1,
        text='* All measurements are in logarithmic scale',
        showarrow = False,
        bordercolor ="black",
         font=dict(
        family="Courier New, monospace",
        size=10,
        color="black"
    ),
    )]
)

fig.show(config=config)