In [44]:
import pandas as pd
import requests
import os
import json
import numpy as np
from datetime import datetime

%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import plotly
import plotly.graph_objs as go

import dash
from dash import dcc as dcc
from dash import html as html
pd.set_option('display.max_rows',500) 

![CRISP_DM](crisp_dm2.png)

# Modeling Spread

# Data Load

In [45]:
#Parsing the dates right at the beginning

df_analyse = pd.read_csv('C:/Users/patka/enterprise_data_science-covid-19/data/processed/COVID_small_flat_table.csv', sep=';',
                        parse_dates=[0])

df_analyse.sort_values('date', ascending=True).tail()

Unnamed: 0.1,Unnamed: 0,date,Italy,US,Spain,Germany,"Korea, South"
594,594,9/7/21,4579502,40440640,4892640,4017116,265423
230,230,9/8/20,280153,6342340,534513,252298,21588
595,595,9/8/21,4585423,40617567,4898258,4030681,267470
231,231,9/9/20,281583,6375978,543379,253474,21743
596,596,9/9/21,4590941,40783985,4903021,4046112,269362


In [46]:
df_plot = pd.read_csv('C:/Users/patka/enterprise_data_science-covid-19/data/processed/COVID_small_flat_table.csv', sep=';')
df_plot.head()

Unnamed: 0.1,Unnamed: 0,date,Italy,US,Spain,Germany,"Korea, South"
0,0,1/22/20,0,1,0,0,1
1,1,1/23/20,0,1,0,0,1
2,2,1/24/20,0,2,0,0,2
3,3,1/25/20,0,2,0,0,2
4,4,1/26/20,0,5,0,0,3


# Helper Functions

In [47]:
def quick_plot(x_in, df_input, y_scale='log', slider = False):
    fig = go.Figure()
# Defines how to plot individual trace
    country_list=['Italy', 'US', 'Spain', 'Germany', 'Korea, South']
    for each in country_list:
        fig.add_trace(go.Scatter(x=x_in,
                                 y=df_input[each],
                                 mode = 'markers + lines',
                                 marker_size = 2.5,
                                 opacity = 0.8,
                                 line_width = 1.5,
                                 name = each))

# Defines overall layout
    fig.update_layout(autosize=True,
        width=1024,
        height=768,
        font=dict(
            family="PT Sans, monospace",
            size = 18,
            color="#7f7f7f")
    #xaxis_title="Time",
    #yaxis_title="Confirmed Cases(Data from JHU, log scale)"
)

    fig.update_yaxes(type="linear"),
    fig.update_xaxes(tickangle=-45,
                      nticks=20,
                      tickfont=dict(size=14, color="#7f7f7f")
                 )
    if slider == True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()

In [48]:
quick_plot(df_analyse.date,
           df_analyse.iloc[:,1:],
           y_scale='log',
           slider=True)

In [49]:
threshold = 100


In [50]:
compare_list=[]
for pos,country in enumerate(df_analyse.columns[1:]):
    compare_list.append(np.array(df_analyse[country][df_analyse[country]>threshold]))

TypeError: '>' not supported between instances of 'str' and 'int'

In [None]:
pd_sync_timelines = pd.DataFrame(compare_list, index = df_analyse.columns[2:]).T

In [None]:
pd_sync_timelines['date'] = np.arange(pd_sync_timelines.shape[0])

In [None]:
pd_sync_timelines.head()

$N(t) = N_0^2(t/T)$

In [None]:
def doubling_rate(N_0, t, T_d):
    return N_0*np.power(2,t/T_d)

In [None]:
max_days = 34

norm_slopes = {
    'doubling after every 1 day' : doubling_rate(100, np.arange(20), 1),
    'doubling after every 2 days' : doubling_rate(100, np.arange(20), 2),
    'doubling after every 4 days' : doubling_rate(100, np.arange(20), 4),
    'doubling after every 10 days' : doubling_rate(100, np.arange(20), 10),
}

In [None]:
pd_sync_timelines_w_slopes = pd.concat([pd.DataFrame(norm_slopes), pd_sync_timelines], axis = 1)

In [None]:
#pd_sync_timelines_w_slopes

In [None]:
quick_plot(pd_sync_timelines_w_slopes.date,
           pd_sync_timelines_w_slopes.iloc[:,0:5],
           y_scale='log',
           slider=True) 

# Linear Regression

In [None]:
from sklearn import linear_model

reg = linear_model.LinearRegression()

In [None]:
l_vec = len(df_analyse['Germany'])
X = np.arange(l_vec).reshape(-1,1)
y = np.array(df_analyse['Germany'])

In [None]:
reg.fit(X,y)

In [None]:
X_hat = np.arange(l_vec).reshape(-1,1)
Y_hat = reg.predict(X_hat)

In [None]:
LR_inspect = df_analyse[['date', 'Germany']].copy()

In [None]:
LR_inspect['prediction'] = Y_hat

In [None]:
quick_plot(LR_inspect.date,
           LR_inspect.iloc[:,1:],
           y_scale = 'linear',
           slider=True)