In [6]:
from ipywidgets import IntSlider, widgets, interactive, interact_manual, HTML

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


# data_path = '/home/stefan/projects/COVID-19/csse_covid_19_data/csse_covid_19_time_series/'
data_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
df = pd.read_csv(data_path + 'time_series_19-covid-Confirmed.csv')
dates = [c for c in df.columns if c not in ['Province/State', 'Country/Region', 'Lat', 'Long']]

all_countries = df['Country/Region'].unique()

## Restructure the Data

The original data adds a new column every day and countries are represented as rows. Some countries are subidived into states. We will reshape the data so that a new row is added for each new day and countries are represented as columns and are not further subdivided and its states cases are summed up.

In [7]:
def build_dataframe(df):
    
    dates = [c for c in df.columns if c not in ['Province/State', 'Country/Region', 'Lat', 'Long']]

    grouped_df = df.groupby('Country/Region', as_index='Country/Region')[dates].sum()

    new_df = pd.DataFrame(
        np.transpose(np.array([np.array(grouped_df.loc[grouped_df.index==country])[0] for country in df['Country/Region'].unique()])),
        index=dates,
        columns=df['Country/Region'].unique())
    
    
    return new_df


new_df = build_dataframe(df)
new_df.tail()

Unnamed: 0,Thailand,Japan,Singapore,Nepal,Malaysia,Canada,Australia,Cambodia,Sri Lanka,Germany,...,Zimbabwe,Cape Verde,East Timor,Eritrea,Uganda,Dominica,Grenada,Mozambique,Syria,Timor-Leste
3/18/20,212,889,313,1,790,657,568,35,51,12327,...,0,0,0,0,0,0,0,0,0,0
3/19/20,272,924,345,1,900,800,681,37,60,15320,...,0,0,0,0,0,0,0,0,0,0
3/20/20,322,963,385,1,1030,943,791,51,73,19848,...,1,0,0,0,0,0,0,0,0,0
3/21/20,411,1007,432,1,1183,1278,1071,53,77,22213,...,3,1,1,1,1,0,0,0,0,0
3/22/20,599,1086,455,2,1306,1465,1314,84,82,24873,...,3,1,1,1,1,1,1,1,1,1


In [8]:
def calulate_growth_factors(cases):
    """ Calculate growth rates bases on confirmed cases. """
    diffs = [cases.values[i]-cases.values[i-1] for i in range(1, len(cases.values))]
    growth_factors = [0,0] + [diffs[i]/(diffs[i-1]+1) for i in range(1, len(diffs))]
    
    return np.array(growth_factors)

In [9]:
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (14, 7)

def multiplot(country, date):
    fig, axes = plt.subplots(nrows=2,ncols=1,figsize=(14,14))
    # confirmed cases
    axes[0].scatter(new_df.index, new_df[country], color='purple')
    axes[0].scatter(new_df.loc[new_df.index==date].index[0], 0, color='orange', s=100, marker='s')
    axes[0].set_xticklabels(dates[1:len(dates)], rotation=45)
    
    # growth factors
    growth_factors = calulate_growth_factors(new_df[country])
    axes[1].scatter(new_df.index, growth_factors)
    axes[1].set_xticklabels(dates[1:len(dates)], rotation=45)
    axes[1].scatter(new_df.loc[new_df.index==date].index[0], 0, color='orange', s=100, marker='s')
    axes[1].set_ylim(-0.5,5)
    axes[1].grid(linestyle='-', linewidth='0.5')

    
    

interactive_plot = interactive(multiplot, df = new_df, country=all_countries, date=dates)
output = interactive_plot.children[-1]


interactive_plot

interactive(children=(Dropdown(description='country', options=('Thailand', 'Japan', 'Singapore', 'Nepal', 'Mal…