# Import Libraries and Data
We will use some libraries in order to make our live easier. Those include ipywidgets (for interactice plots), matplotlib, seaborn (both for plotting) and pandas to work with data. 

In [3]:
from ipywidgets import IntSlider, widgets, interactive, interact_manual, HTML

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns



data_path = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
df = pd.read_csv(data_path + 'time_series_covid19_confirmed_global.csv')
dates = [c for c in df.columns if c not in ['Province/State', 'Country/Region', 'Lat', 'Long']]

all_countries = df['Country/Region'].unique()

## Restructure the Data

The original data adds a new column every day and countries are represented as rows. Some countries are subidived into states. We will reshape the data so that a new row is added for each new day and countries are represented as columns and are not further subdivided and its states cases are summed up.

In [4]:
def build_dataframe(df):
    """
    Restructure data, so every date is a row, and every country is its own column.
    Countries with multiple areas in original data (like the US) are accumulated.
    """
    dates = [c for c in df.columns if c not in ['Province/State', 'Country/Region', 'Lat', 'Long']]

    grouped_df = df.groupby('Country/Region', as_index='Country/Region')[dates].sum()

    new_df = pd.DataFrame(
        np.transpose(np.array([np.array(grouped_df.loc[grouped_df.index==country])[0] for country in df['Country/Region'].unique()])),
        index=dates,
        columns=df['Country/Region'].unique())
    
    
    return new_df


new_df = build_dataframe(df)
new_df.tail()

Unnamed: 0,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Austria,...,United Kingdom,Uruguay,Uzbekistan,Venezuela,Vietnam,West Bank and Gaza,Western Sahara,Yemen,Zambia,Zimbabwe
9/12/20,38641,11185,48007,1344,3335,95,546481,45675,26651,32696,...,367592,1780,46721,59630,1060,29906,10,2009,13466,7508
9/13/20,38716,11353,48254,1344,3388,95,555537,45862,26692,33159,...,370930,1808,47287,60540,1063,30574,10,2011,13539,7526
9/14/20,38772,11520,48496,1438,3439,95,565446,45969,26739,33541,...,373555,1812,47836,61569,1063,31362,10,2013,13720,7531
9/15/20,38815,11672,48734,1438,3569,95,577338,46119,26778,34305,...,376670,1827,48429,62655,1063,32250,10,2016,13819,7576
9/16/20,38855,11816,48966,1483,3675,95,589012,46376,26813,35073,...,380677,1856,49015,63416,1063,33006,10,2019,13887,7598


Our data only provides us with the total number of infections. Luckily, we can compute all kinds of different factors from this.

In [5]:
def calulate_growth_factors(cases):
    """ Calculate growth rates bases on confirmed cases. """
    diffs = [cases.values[i]-cases.values[i-1] for i in range(1, len(cases.values))]
    growth_factors = [0,0] + [diffs[i]/(diffs[i-1]+1) for i in range(1, len(diffs))]
    
    return np.array(growth_factors)

def calulate_diffs(cases):
    """ Calculate growth rates bases on confirmed cases. """
    diffs = [cases.values[i]-cases.values[i-1] for i in range(1, len(cases.values))]
    
    return np.array([0] + diffs)



# Visualizing the Data
Finally, we can create some interactive plots that show the data neatly. By using the dropdown menu we can select country which we want to investigate. By selecting a date, we can mark a certain data, where, i.e. measures have been imposed. 

In [6]:
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (14, 7)

sns.set_style('darkgrid')

def multiplot(country, date):
    """ 
    Here, we can define the plots, we want to display.
    Function arguments can be input from a drop down menu.
    """
    fig, axes = plt.subplots(nrows=3,ncols=1,figsize=(14,21))
    
    # total number of infections
    axes[0].plot(new_df.index, new_df[country], color='#AC454A')
    axes[0].scatter(new_df.loc[new_df.index==date].index[0], new_df[country][date], color='#4999F2', s=60, marker='o')
    axes[0].set_xticklabels(dates[1:len(dates)], rotation=45)
    xticks = axes[0].xaxis.get_major_ticks()
    for i in range(len(xticks)):
        if i % 7 != 0:
            xticks[i].set_visible(False)
    axes[0].set_title('Total Number of Infections')
    
    # growth factors
    growth_factors = calulate_growth_factors(new_df[country])
    axes[1].scatter(new_df.index, growth_factors, s=5, color='#F67941')
    axes[1].set_xticklabels(dates[1:len(dates)], rotation=45)
    axes[1].scatter(new_df.loc[new_df.index==date].index[0], 0, color='#4999F2', s=60, marker='o')
    axes[1].set_ylim(-0.5,5)
    xticks = axes[1].xaxis.get_major_ticks()
    for i in range(len(xticks)):
        if i % 7 != 0:
            xticks[i].set_visible(False)
    axes[1].set_title('Growth Factors')

    # new infections
    diffs = calulate_diffs(new_df[country])
    axes[2].scatter(new_df.index, diffs, s=5, color='#F67941')
    axes[2].set_xticklabels(dates[1:len(dates)], rotation=45)
    axes[2].scatter(new_df.loc[new_df.index==date].index[0], 0, color='#4999F2', s=60, marker='o')
    xticks = axes[2].xaxis.get_major_ticks()
    for i in range(len(xticks)):
        if i % 7 != 0:
            xticks[i].set_visible(False)
    axes[2].set_title('New Infections')

    
interactive_plot = interactive(multiplot, df = new_df, country=all_countries, date=dates)
output = interactive_plot.children[-1]

interactive_plot # show plot inline

interactive(children=(Dropdown(description='country', options=('Afghanistan', 'Albania', 'Algeria', 'Andorra',…