In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib as mpl
import seaborn as sns
from matplotlib import pyplot as plt
from datetime import datetime

import plotly.graph_objects as go
mpl.rcParams['figure.figsize'] = (20,16)
pd.set_option('display.max_rows', 500)

![CRISP_DM](../reports/figures/CRISP_DM.png)

# Data Load

In [2]:
df_analyse = pd.read_csv('../data/processed/COVID_small_flat_table.csv', sep=';', parse_dates=[0])

df_analyse.sort_values('date', ascending=True).tail()

Unnamed: 0,date,Italy,US,Spain,Germany,"Korea, South"
227,2020-09-05,276338,6244970,498989,251058,21177
228,2020-09-06,277634,6276365,498989,251728,21296
229,2020-09-07,278784,6300622,525549,253626,21432
230,2020-09-08,280153,6327009,534513,254957,21588
231,2020-09-09,281583,6361265,543379,256433,21743


# Helper Functions

In [3]:
def quick_plot(x_in, df_input,y_scale='log',slider=False):
    """ Quick basic plot for quick static evaluation of a time series
    
        you can push selective columns of your data frame by .iloc[:,[0,6,7,8]]
        
        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider
    
        
        Returns:
        ----------
        
    """
    fig = go.Figure()

    for each in df_input.columns:
        fig.add_trace(go.Scatter(
                        x=x_in,
                        y=df_input[each],
                        name=each,
                        opacity=0.8))
    
    fig.update_layout(autosize=True,
        width=1024,
        height=768,
        font=dict(
            family="PT Sans, monospace",
            size=18,
            color="#7f7f7f"
            )
        )
    fig.update_yaxes(type=y_scale),
    fig.update_xaxes(tickangle=-45,
                 nticks=20,
                 tickfont=dict(size=14,color="#7f7f7f")
                )
    if slider==True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()

In [4]:
threshold = 100


In [5]:
compare_list = []

for pos, country in enumerate(df_analyse.columns[1:]):
    compare_list.append(np.array(df_analyse[country][df_analyse[country]>threshold]))

In [6]:
pd_sync_timelines = pd.DataFrame(compare_list, index=df_analyse.columns[1:]).T


In [7]:
pd_sync_timelines['date']=np.arange(pd_sync_timelines.shape[0])

In [8]:
pd_sync_timelines.head()

Unnamed: 0,Italy,US,Spain,Germany,"Korea, South",date
0,155.0,103.0,120.0,130.0,104.0,0
1,229.0,172.0,165.0,159.0,204.0,1
2,322.0,215.0,222.0,196.0,433.0,2
3,453.0,337.0,259.0,262.0,602.0,3
4,655.0,450.0,400.0,482.0,833.0,4


In [11]:
quick_plot(pd_sync_timelines.date,
           pd_sync_timelines.iloc[:,:-1],
           y_scale='log',
           slider=True)

$N(t) = N_0*2^{t/T}$ 

In [12]:
def doubling_rate(N_0,t,T_d):
    return N_0*np.power(2,t/T_d)

In [20]:
max_days = 20


norm_slopes = {
    'doubling every day':doubling_rate(100, np.arange(max_days),1),
    'doubling every two days':doubling_rate(100, np.arange(max_days),2),
    'doubling every 4 days':doubling_rate(100, np.arange(max_days),4),
    'doubling every 10 days':doubling_rate(100, np.arange(max_days),10)}

In [21]:
pd_sync_timelines_w_slope =  pd.concat([pd.DataFrame(norm_slopes), pd_sync_timelines], axis=1)

In [23]:
quick_plot(pd_sync_timelines_w_slope.date,
           pd_sync_timelines_w_slope.iloc[:,0:5],
           y_scale ='log',
           slider=True)