In [1]:
import pandas as pd
import numpy as np
import scipy as sp
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import plotly.express as px
pd.options.mode.chained_assignment = None  # default='warn'

In [2]:
confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv').groupby('Country/Region', as_index=False).sum()
death = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv').groupby('Country/Region', as_index=False).sum()
recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv').groupby('Country/Region', as_index=False).sum()
reported_countries = len(confirmed['Country/Region'])
print(confirmed['Country/Region'].isin(death['Country/Region']).value_counts())
print(confirmed['Country/Region'].isin(recovered['Country/Region']).value_counts())

True    188
Name: Country/Region, dtype: int64
True    188
Name: Country/Region, dtype: int64


In [3]:
confirmed_daily = confirmed.drop(['Lat','Long'],1).transpose().reset_index()
confirmed_daily = pd.DataFrame(confirmed_daily.values[1:], columns=confirmed_daily.iloc[0,:])
confirmed_daily['Global'] = confirmed_daily.iloc[:,1:].sum(axis=1)
confirmed_daily = confirmed_daily.rename(columns={'Country/Region':'Date'})
death_daily = death.drop(['Lat','Long'],1).transpose().reset_index()
death_daily = pd.DataFrame(death_daily.values[1:], columns=death_daily.iloc[0,:])
death_daily['Global'] = death_daily.iloc[:,1:].sum(axis=1)
death_daily = death_daily.rename(columns={'Country/Region':'Date'})
recovered_daily = recovered.drop(['Lat','Long'],1).transpose().reset_index()
recovered_daily = pd.DataFrame(recovered_daily.values[1:], columns=recovered_daily.iloc[0,:])
recovered_daily['Global'] = recovered_daily.iloc[:,1:].sum(axis=1)
recovered_daily = recovered_daily.rename(columns={'Country/Region':'Date'})

In [4]:
table = confirmed[['Country/Region','Long','Lat']]
table['ConfirmedCases'] = confirmed.iloc[:,-1]
table['NewConfirmedCases'] = confirmed.iloc[:,-1] - confirmed.iloc[:,-2]
table['DeathCases'] = death.iloc[:,-1]
table['NewDeathCases'] = death.iloc[:,-1] - death.iloc[:,-2]
table['RecoveredCases'] = recovered.iloc[:,-1]
table['NewRecoveredCases'] = recovered.iloc[:,-1] - recovered.iloc[:,-2]
table = table.fillna(0)
table['ActiveCases'] = table['ConfirmedCases'] - table['DeathCases'] - table['RecoveredCases']
table['NewActiveCases'] = table['NewConfirmedCases'] - table['NewDeathCases'] - table['NewRecoveredCases']
table = table.sort_values(by='Country/Region')
table

Unnamed: 0,Country/Region,Long,Lat,ConfirmedCases,NewConfirmedCases,DeathCases,NewDeathCases,RecoveredCases,NewRecoveredCases,ActiveCases,NewActiveCases
0,Afghanistan,65.000000,33.000000,5639,413,136,4,691,43,4812,366
1,Albania,20.168300,41.153300,898,18,31,0,694,6,173,12
2,Algeria,1.659600,28.033900,6442,189,529,7,3158,100,2755,82
3,Andorra,1.521800,42.506300,761,1,49,0,596,20,116,-19
4,Angola,17.873900,-11.202700,48,3,2,0,14,0,32,3
...,...,...,...,...,...,...,...,...,...,...,...
183,West Bank and Gaza,35.233200,31.952200,375,0,2,0,310,0,63,0
184,Western Sahara,-12.885800,24.215500,6,0,0,0,6,0,0,0
185,Yemen,48.516388,15.552727,85,15,12,0,1,0,72,15
186,Zambia,28.283300,-15.416700,654,208,7,0,124,0,523,208


In [5]:
active_total = table['ActiveCases'].sum()
active_new_total = table['NewActiveCases'].sum()
confirmed_total = table['ConfirmedCases'].sum()
confirmed_new_total = table['NewConfirmedCases'].sum()
death_total = table['DeathCases'].sum()
death_new_total = table['NewDeathCases'].sum()
recovered_total = table['RecoveredCases'].sum()
recovered_new_total = table['NewRecoveredCases'].sum()
summary_total = {'Cases':['Active','Confirmed','Death','Recovered'],'Total':[f'{active_total:,}',f'{confirmed_total:,}',f'{death_total:,}',f'{recovered_total:,}'],'New':[f'{active_new_total:,}',f'{confirmed_new_total:,}',f'{death_new_total:,}',f'{recovered_new_total:,}']}
summary_total = pd.DataFrame(summary_total, columns = ['Cases','Total','New'])
summary_total

Unnamed: 0,Cases,Total,New
0,Active,2551852,50578
1,Confirmed,4442163,95145
2,Death,302418,5221
3,Recovered,1587893,39346


In [6]:
confirmed_table = table[['ConfirmedCases','Country/Region']].groupby('Country/Region', as_index=False).agg({"ConfirmedCases": "sum"})
death_table = table[['DeathCases','Country/Region']].groupby('Country/Region', as_index=False).agg({"DeathCases": "sum"})
recovered_table = table[['RecoveredCases','Country/Region']].groupby('Country/Region', as_index=False).agg({"RecoveredCases": "sum"})

In [7]:
c_fig = px.scatter_mapbox(table, lat="Lat", lon="Long", hover_name="Country/Region", hover_data=["ConfirmedCases"],
                        color_discrete_sequence=["fuchsia"], zoom=0.5, width=900, height=500, color="ConfirmedCases", size="ConfirmedCases")
c_fig.update_layout(mapbox_style="open-street-map")
c_fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
c_fig.show()

In [8]:
d_fig = px.scatter_mapbox(table, lat="Lat", lon="Long", hover_name="Country/Region", hover_data=["DeathCases"],
                        color_discrete_sequence=["fuchsia"], zoom=0.5, width=900, height=500, color="DeathCases", size="DeathCases")
d_fig.update_layout(mapbox_style="open-street-map")
d_fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
d_fig.show()

In [9]:
r_fig = px.scatter_mapbox(table, lat="Lat", lon="Long", hover_name="Country/Region", hover_data=["RecoveredCases"],
                        color_discrete_sequence=["fuchsia"], zoom=0.5, width=900, height=500, color="RecoveredCases", size="RecoveredCases")
r_fig.update_layout(mapbox_style="open-street-map")
r_fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
r_fig.show()    

In [10]:
a = pd.melt(confirmed_daily, id_vars='Date')
a['Cases'] = 'Confirmed'
b = pd.melt(death_daily, id_vars='Date')
b['Cases'] = 'Death'
c = pd.melt(recovered_daily, id_vars='Date')
c['Cases'] = 'Recovered'
daily = [a,b,c]
daily = pd.concat(daily)
daily['Date'] = pd.to_datetime(daily['Date'])
daily.columns = ['Date','Country/Region','NumberOfCases','Category']
daily['NumberOfCases'] = daily['NumberOfCases'].astype(int)
daily['NewCases'] = (daily['NumberOfCases'] - daily['NumberOfCases'].shift(1)).fillna(method='bfill')

In [11]:
def cum_cases(country):
    dfc = daily[daily['Country/Region'] == country]
    fig = px.line(dfc, x="Date", y='NumberOfCases', color='Category', title=country+' Cumulative Cases')
    return fig.show()
def new_cases(country):
    dfc = daily[daily['Country/Region'] == country]
    dfc = dfc[dfc['Category'] == 'Confirmed']
    fig = px.bar(dfc, x="Date", y='NewCases', title=country+' New Cases')
    return fig.show()

In [12]:
cum_cases('Global')
new_cases('Global')