
# Little Epidemiologist
Here are some interactive analysis if you fancy being an amateur epidemiologist

In [28]:
# First some needed imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import datetime
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
font = { 'size'   : 16}

%matplotlib inline

In [29]:
# Let's download newest data, and filter out all the province/state entries
df = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', sep=',')
df = df[df['Province/State'].isnull()]
# Transpose the data frame, so country data are columns instead of rows
# and get rid of unnecessary headers
# also reformat this ugly American date into something more ISO
dft = df.T
dft.columns = df['Country/Region']
dft.drop(dft.index[:4], inplace=True)
dft.rename_axis('Index', inplace=True, axis='columns')
dft['Data'] = [str(datetime.date(2020,1,22)+datetime.timedelta(i)) for i in range(len(dft))]
dft.reset_index(inplace=True, drop=True)
dft.tail()

Index,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Austria,Azerbaijan,...,Burma,MS Zaandam,Botswana,Burundi,Sierra Leone,Malawi,South Sudan,Western Sahara,Sao Tome and Principe,Data
72,281,304,1171,439,8,15,1265,736,11524,443,...,20,9,4,3,2,3,0,0,0,2020-04-03
73,299,333,1251,466,10,15,1451,770,11781,521,...,21,9,4,3,4,4,0,0,0,2020-04-04
74,349,361,1320,501,14,15,1451,822,12051,584,...,21,9,6,3,6,4,1,4,0,2020-04-05
75,367,377,1423,525,16,15,1554,833,12297,641,...,22,9,6,3,6,5,1,4,4,2020-04-06
76,423,383,1468,545,17,19,1628,853,12639,717,...,22,9,6,3,6,8,2,4,4,2020-04-07


In [38]:
# Let's interpolate some missing data
italy = dft['Italy'].values
italy[50]=14648
dft['Italy'] = italy

In [45]:

@interact_manual
def show_delay(Country_1=['Italy', 'US', 'Poland', 'Sweden'], 
               Country_2=['Poland', 'US', 'Italy', 'Sweden'],
               Delay_2=(0,len(dft.index),1),
               Country_3=['US', 'Poland', 'Italy', 'Sweden'],
               Delay_3=(0,len(dft.index),1)):
                
    matplotlib.rc('font', **font)
    plt.figure(figsize=(18,10))
    plt.plot(dft[Country_1], 
             label=Country_1 + ' - Infected',
             color='red',
             marker='x')
    plt.plot(-Delay_2+dft.index, dft[Country_2], 
             label=Country_2 + ' - Infected; '+str(Delay_2)+' Days Delayed',
             color='green',
             marker='x')
    plt.plot(-Delay_3+dft.index, dft[Country_3], 
             label=Country_3 + ' - Infected; '+str(Delay_3)+' Days Delayed',
             color='blue',
             marker='x')
    plt.grid(True, which='major', color='gray', linestyle='-')
    plt.grid(True, which='minor', color='gray', linestyle='--')
    plt.xlim(left=0, right=len(dft))
    plt.legend()
    plt.xticks(dft.index[::3], dft['Data'][::3], rotation='vertical')
    plt.ylim(bottom=10)
    plt.yscale('log')
    

interactive(children=(Dropdown(description='Country_1', options=('Italy', 'US', 'Poland', 'Sweden'), value='It…