In [62]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
import scipy

%matplotlib inline
mpl.rcParams['figure.figsize'] = (16, 10)
pd.set_option('display.max_rows', 500)

import plotly.graph_objects as go

In [12]:
df_analyse = pd.read_csv('covid-19/data/processed/COVID_small_flat_table.csv',\
                         sep= '\t', parse_dates=[0])
df_analyse.sort_values('date', ascending = True).tail()

Unnamed: 0,date,Italy,US,Spain,Germany,"Korea,South"
94,2020-04-25,195351,938154,223759,156513,0
95,2020-04-26,197675,965785,226629,157770,0
96,2020-04-27,199414,988197,229422,158758,0
97,2020-04-28,201505,1012582,232128,159912,0
98,2020-04-29,203591,1039909,236899,161539,0


In [13]:
# helper functions
def quick_plot(x_in, df_input,y_scale='log',slider=False):
    """ Quick basic plot for quick static evaluation of a time series
    
        you can push selective columns of your data frame by .iloc[:,[0,6,7,8]]
        
        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider
    
        
        Returns:
        ----------
        
    """
    fig = go.Figure()

    for each in df_input.columns:
        fig.add_trace(go.Scatter(
                        x=x_in,
                        y=df_input[each],
                        name=each,
                        opacity=0.8))
    
    fig.update_layout(autosize=True,
        width=1024,
        height=768,
        font=dict(
            family="PT Sans, monospace",
            size=18,
            color="#7f7f7f"
            )
        )
    fig.update_yaxes(type=y_scale),
    fig.update_xaxes(tickangle=-45,
                 nticks=20,
                 tickfont=dict(size=14,color="#7f7f7f")
                )
    if slider==True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()

In [16]:
quick_plot(df_analyse.date, df_analyse.iloc[:,1:], y_scale='linear', slider=True)

In [17]:
threshold = 100

In [18]:
compare_list = []
for pos, country in enumerate(df_analyse.columns[1:]):
    compare_list.append(
        np.array(df_analyse[country][df_analyse[country]>threshold]))

In [23]:
pd_sync_timelines = pd.DataFrame(compare_list, index = df_analyse.columns[1:]).T

In [24]:
pd_sync_timelines['date'] = np.arange(pd_sync_timelines.shape[0])

In [26]:
quick_plot(pd_sync_timelines.date, pd_sync_timelines.iloc[:,1:-1], y_scale = 'log', slider=True)

In [27]:
def doubling_rate(N_0,t,T_d):
    return N_0*np.power(2,t/T_d)

In [30]:
max_days = 34
norm_slope = {
    'doubling every day':doubling_rate(100, np.arange(max_days),1),
    'doubling every two days': doubling_rate(100, np.arange(max_days),2),
    'doubling every four days': doubling_rate(100, np.arange(max_days),4),
    'doubling every ten days': doubling_rate(100, np.arange(max_days),10),
}


In [32]:
pd_sync_timelines_w_slope = pd.concat([pd.DataFrame(norm_slope),pd_sync_timelines], axis=1)

In [33]:
quick_plot(pd_sync_timelines_w_slope.date,
           pd_sync_timelines_w_slope.iloc[:,0:5],
           y_scale='log',
           slider=True)

In [34]:
from sklearn import linear_model
reg = linear_model.LinearRegression(fit_intercept=False)

In [40]:
l_vec = len(df_analyse['Germany'])
X = np.arange(l_vec).reshape(-1,1)
y = np.array(df_analyse['Germany'])

In [41]:
reg.fit(X, y)

LinearRegression(copy_X=True, fit_intercept=False, n_jobs=None, normalize=False)

In [63]:
X_hat = np.arange(l_vec).reshape(-1,1)
y_hat = reg.predict(X_hat)

In [71]:
LR_inspect = df_analyse[['date','Germany']].copy()
LR_inspect['prediction'] = y_hat

In [73]:
quick_plot(LR_inspect.date,
           LR_inspect.iloc[:,1:],
           y_scale='log',
           slider=True)

In [78]:
reg = linear_model.LinearRegression(fit_intercept=True)
l_vec = len(df_analyse['Germany'])
X = np.arange(l_vec-50).reshape(-1,1)
y = np.array(df_analyse['Germany'][50:])

In [79]:
reg.fit(X,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [80]:
reg.intercept_

-3239.458775510211

In [81]:
reg.coef_

array([3842.30397959])

In [82]:
def get_rate_via_regression(in_array):
    y = np.array(in_array)
    X = np.arange(-1,2).reshape(-1,1)
    
    assert len(in_array) == 3
    
    reg.fit(X,y)
    intercept = reg.intercept_
    slope = reg.coef_
    
    return intercept/slope


In [86]:
df_analyse['Germany_RT'] = df_analyse['Germany'].rolling(window = 3, min_periods=3).apply(get_rate_via_regression)





In [88]:
quick_plot(df_analyse.date, df_analyse.iloc[40:,[6]], y_scale='linear')

In [89]:
def doubling_time(in_array):
    y = np.array(in_array)
    return len(y)*np.log(2)/np.log(y[-1]/y[0])

In [90]:
df_analyse['Germany_wiki']=df_analyse['Germany'].rolling(window = 3, min_periods=3).apply(doubling_time)





In [91]:
quick_plot(df_analyse.date, df_analyse.iloc[40:,[6,7]], y_scale='linear')