In [2]:
import numpy as np
import geopandas as gpd
import pandas as pd
from functools import reduce

In [7]:
# 1.1 Downloading csv into dataframe
df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')
df_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
df_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

In [9]:
# 1.2 Tidying the data
# Using melt() command in pandas (similar to gather() in R's tidyr)
id_list = df_confirmed.columns.to_list()[:4]
vars_list = df_confirmed.columns.to_list()[4:]
confirmed_tidy = pd.melt(df_confirmed, id_vars=id_list,value_vars=vars_list, var_name='Date', value_name='Confirmed')
deaths_tidy = pd.melt(df_deaths, id_vars=id_list,value_vars=vars_list, var_name='Date', value_name='Deaths')
recovered_tidy = pd.melt(df_recovered, id_vars=id_list,value_vars=vars_list, var_name='Date', value_name='recovered')

# 1.3 Merging the three dataframes into one
data_frames = [confirmed_tidy, deaths_tidy, recovered_tidy]          
df_corona = reduce(lambda left, right: pd.merge(left, right, on =id_list+['Date'], how='outer'), data_frames)

# 1.4 Each row should only represent one observation
id_vars = df_corona.columns[:5]
data_type = ['Confirmed', 'Deaths', 'recovered']
df_corona = pd.melt(df_corona, id_vars=id_vars,value_vars=data_type, var_name='type', value_name='Count')
df_corona['Date'] = pd.to_datetime(df_corona['Date'],format='%m/%d/%y', errors='raise')

In [10]:
df_confirmed.head(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20,4/2/20
0,,Afghanistan,33.0,65.0,0,0,0,0,0,0,...,1,2,2,2,2,2,2,5,5,10
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,10,17,17,31,31,33,44,52,67,76
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,24,65,29,29,31,31,37,46,61,61
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,1,1,1,1,1,1,10,10,10,10
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,1


In [11]:
df_corona.head(5)

Unnamed: 0,Province/State,Country/Region,Lat,Long,Date,type,Count
0,,Afghanistan,33.0,65.0,2020-01-22,Confirmed,0.0
1,,Albania,41.1533,20.1683,2020-01-22,Confirmed,0.0
2,,Algeria,28.0339,1.6596,2020-01-22,Confirmed,0.0
3,,Andorra,42.5063,1.5218,2020-01-22,Confirmed,0.0
4,,Angola,-11.2027,17.8739,2020-01-22,Confirmed,0.0


In [12]:
corona_sums = df_corona.groupby(['type', 'Date'],as_index=False).agg({'Count':'sum'})

In [21]:
import plotly_express as px
def plot_timeseries(df):
    fig = px.line(df, x='Date', y='Count', color='type',template='plotly_dark')
    fig.update_layout(legend_orientation="h")
    return(fig)

fig = plot_timeseries(corona_sums)
fig.show()