In [1]:
from ipywidgets import IntSlider, widgets, interactive, interact_manual, HTML

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


data_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
df = pd.read_csv(data_path + 'time_series_covid19_confirmed_global.csv')
dates = [c for c in df.columns if c not in ['Province/State', 'Country/Region', 'Lat', 'Long']]

all_countries = df['Country/Region'].unique()

## Restructure the Data

The original data adds a new column every day and countries are represented as rows. Some countries are subidived into states. We will reshape the data so that a new row is added for each new day and countries are represented as columns and are not further subdivided and its states cases are summed up.

In [2]:
def build_dataframe(df):
    
    dates = [c for c in df.columns if c not in ['Province/State', 'Country/Region', 'Lat', 'Long']]

    grouped_df = df.groupby('Country/Region', as_index='Country/Region')[dates].sum()

    new_df = pd.DataFrame(
        np.transpose(np.array([np.array(grouped_df.loc[grouped_df.index==country])[0] for country in df['Country/Region'].unique()])),
        index=dates,
        columns=df['Country/Region'].unique())
    
    
    return new_df


new_df = build_dataframe(df)
new_df.tail()

Unnamed: 0,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,Belize,Laos,Libya,West Bank and Gaza,Guinea-Bissau,Mali,Saint Kitts and Nevis,Kosovo,Burma,MS Zaandam
3/25/20,84,146,302,188,3,3,387,265,2364,5588,...,2,3,1,59,2,2,2,0,0,0
3/26/20,94,174,367,224,4,7,502,290,2810,6909,...,2,6,1,84,2,4,2,71,0,0
3/27/20,110,186,409,267,4,7,589,329,3143,7657,...,2,6,1,91,2,11,2,86,8,0
3/28/20,110,197,454,308,5,7,690,407,3640,8271,...,2,8,3,98,2,18,2,91,8,2
3/29/20,120,212,511,334,7,7,745,424,3984,8788,...,2,8,8,109,2,18,2,94,10,2


In [3]:
def calulate_growth_factors(cases):
    """ Calculate growth rates bases on confirmed cases. """
    diffs = [cases.values[i]-cases.values[i-1] for i in range(1, len(cases.values))]
    growth_factors = [0,0] + [diffs[i]/(diffs[i-1]+1) for i in range(1, len(diffs))]
    
    return np.array(growth_factors)

In [4]:
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (14, 7)

def multiplot(country, date):
    fig, axes = plt.subplots(nrows=2,ncols=1,figsize=(14,14))
    # confirmed cases
    axes[0].scatter(new_df.index, new_df[country], color='purple')
    axes[0].scatter(new_df.loc[new_df.index==date].index[0], 0, color='orange', s=100, marker='s')
    axes[0].set_xticklabels(dates[1:len(dates)], rotation=45)
    
    # growth factors
    growth_factors = calulate_growth_factors(new_df[country])
    axes[1].scatter(new_df.index, growth_factors)
    axes[1].set_xticklabels(dates[1:len(dates)], rotation=45)
    axes[1].scatter(new_df.loc[new_df.index==date].index[0], 0, color='orange', s=100, marker='s')
    axes[1].set_ylim(-0.5,5)
    axes[1].grid(linestyle='-', linewidth='0.5')

    
    

interactive_plot = interactive(multiplot, df = new_df, country=all_countries, date=dates)
output = interactive_plot.children[-1]


interactive_plot

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra',…