In [2]:
import pandas as pd
import datetime as datetime
import matplotlib as mpl
import plotly.graph_objects as go
import dash
import requests
import json

import numpy as np
from sklearn import linear_model
from scipy import signal
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

In [4]:
%matplotlib inline
mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 500)

# Modelling Spread

In [5]:
data_dir='../data/processed/Cases_pop.csv'

In [6]:
df_list=pd.read_csv(data_dir, sep=';')

In [7]:
df_list.sort_values('date', ascending=True).tail()

Unnamed: 0,date,Cases_per_pop_AUS,Cases_per_pop_USA,Cases_per_pop_ESP,Cases_per_pop_IND,Cases_per_pop_CHN,Cases_per_pop_DEU,Cases_per_pop_AFG
929,2022-07-18,0.341932,0.26621,0.275665,0.031106,0.000629,0.357921,0.004581
930,2022-07-19,0.343818,0.266561,0.276543,0.031119,0.000629,0.359612,0.004586
931,2022-07-20,0.34594,0.267201,0.276543,0.031135,0.00063,0.36125,0.00459
932,2022-07-21,0.347939,0.267659,0.276543,0.031151,0.00063,0.362542,0.004594
933,2022-07-22,0.349726,0.268153,0.278074,0.031166,0.000631,0.363645,0.004598


# Helper Function

In [8]:
def quick_plot(x_in, df_input, y_scale='log', slider=False):
    """ Quick basic plot for quick static evaluation of a time series

        you can push selective columns of your data frame by .iloc[:,[0,6,7,8]]

        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider


        Returns:
        ----------

    """
    fig = go.Figure()

    for each in df_input.columns:
        fig.add_trace(
            go.Scatter(x=x_in, y=df_input[each], name=each, opacity=0.8))

    fig.update_layout(autosize=True,
                      width=1024,
                      height=768,
                      font=dict(family="PT Sans, monospace",
                                size=18,
                                color="#7f7f7f"))
    fig.update_yaxes(type=y_scale),
    fig.update_xaxes(tickangle=-45,
                     nticks=20,
                     tickfont=dict(size=14, color="#7f7f7f"))
    if slider == True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()

In [9]:
quick_plot(df_list.date, df_list.iloc[:, 1:], y_scale='log', slider=True)

In [10]:
threshold = 0.01

In [11]:
compare_list = []
for pos, country in enumerate(df_list.columns[1:]):
    compare_list.append(
        np.array(df_list[country][df_list[country] > threshold]))

In [12]:
pd_sync_timelines = pd.DataFrame(compare_list, index=df_list.columns[1:]).T

In [13]:
pd_sync_timelines['date'] = np.arange(pd_sync_timelines.shape[0])

In [14]:
pd_sync_timelines.head()

Unnamed: 0,Cases_per_pop_AUS,Cases_per_pop_USA,Cases_per_pop_ESP,Cases_per_pop_IND,Cases_per_pop_CHN,Cases_per_pop_DEU,Cases_per_pop_AFG,date
0,0.010212,0.010193,0.010099,0.010154,,0.010262,,0
1,0.010534,0.010389,0.010287,0.01032,,0.010545,,1
2,0.010851,0.010599,0.010508,0.010506,,0.010821,,2
3,0.011232,0.010797,0.010508,0.010701,,0.011009,,3
4,0.011597,0.010988,0.010508,0.010885,,0.01114,,4


In [15]:
quick_plot(pd_sync_timelines.date,
           pd_sync_timelines.iloc[:, :-1],
           y_scale='log',
           slider=True)

# Doubling Rate

N_0 base

t = time in days

T_d = time period

In [16]:

def doubling_rate(N_0, t, T_d):
    return N_0 * np.power(2, t / T_d)
max_days = 800

norm_slopes = {
    # 'doubling every day': doubling_rate(100, np.arange(10), 1),
    'doubling every 30 days': doubling_rate(0.01, np.arange(790), 30),
    'doubling every 180 days': doubling_rate(0.01, np.arange(790), 180),
    'doubling every 365 days': doubling_rate(0.01, np.arange(790), 365),
    'doubling every 720 days': doubling_rate(0.01, np.arange(790), 720)
}
pd_sync_timelines_w_slope = pd.concat([pd.DataFrame(norm_slopes), pd_sync_timelines], axis=1)

pd_sync_timelines_w_slope
quick_plot(pd_sync_timelines_w_slope.date,
           pd_sync_timelines_w_slope.iloc[:, 0:7],
           y_scale='log',
           slider=True)
# pd_sync_timelines_w_slope.to_csv('../data/processed/COVID_small_sync_timeline_table.csv',sep=';',index=False) # Needs for us to save the processed csv file in data/processed/sudonamehere.csv for this to work