In [11]:
import pandas as pd
import datetime as datetime
import matplotlib as mpl
import plotly.graph_objects as go
import dash
import requests
import json

import numpy as np
from sklearn import linear_model
from dash import dcc
from dash import html
from dash.dependencies import Input, Output

In [35]:
%matplotlib inline
mpl.rcParams['figure.figsize'] = (16, 9)
pd.set_option('display.max_rows', 500)

# Data Set Loading for Visualization
Our World In Data look for the link with .org

In [36]:
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"

In [37]:
# Requesting the Covid Cases data from our world in data website as a json object.
data_cases = requests.get(
    'https://covid.ourworldindata.org/data/owid-covid-data.json')
# load json object for the total number of COVID cases
json_object_cases = json.loads(data_cases.content)

In [38]:
df_country_info = pd.read_csv(url, sep=',')

In [39]:
df_country_info.columns
df_country_info.head()
#df_country_info.to_csv('covid_full_data.csv', ';')

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-02-24,5.0,5.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
1,AFG,Asia,Afghanistan,2020-02-25,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
2,AFG,Asia,Afghanistan,2020-02-26,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
3,AFG,Asia,Afghanistan,2020-02-27,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,
4,AFG,Asia,Afghanistan,2020-02-28,5.0,0.0,,,,,...,,,37.746,0.5,64.83,0.511,,,,


In [40]:
country_list = ['AUS', 'USA', 'ESP', 'IND', 'CHN', 'DEU', 'AFG']

In [41]:
df_country = pd.DataFrame()
for each in country_list:
    df_country_info['iso_code'].unique()
    df_country_info['iso_code'] == each
    df_country = pd.concat(
        [df_country, df_country_info[df_country_info['iso_code'] == each]],
        sort=False)
    df_country = df_country.reset_index(drop=True)

df_country.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AUS,Oceania,Australia,2020-01-26,4.0,4.0,,,,,...,13.0,16.5,,3.84,83.44,0.944,-47.0,-0.4,2.8,-1.813195
1,AUS,Oceania,Australia,2020-01-27,5.0,1.0,,,,,...,13.0,16.5,,3.84,83.44,0.944,,,,
2,AUS,Oceania,Australia,2020-01-28,5.0,0.0,,,,,...,13.0,16.5,,3.84,83.44,0.944,,,,
3,AUS,Oceania,Australia,2020-01-29,6.0,1.0,,,,,...,13.0,16.5,,3.84,83.44,0.944,,,,
4,AUS,Oceania,Australia,2020-01-30,9.0,3.0,,,,,...,13.0,16.5,,3.84,83.44,0.944,,,,


In [82]:
location_list = df_country_info['location'].unique()
dict_country = {}
for each in location_list:
    dict_country.update({
        each:
        len(df_country_info[df_country_info['location'] == each]['date'])
    })

country_name_date = max(dict_country, key=lambda x: dict_country[x])
df_list = df_country_info[df_country_info['location'] ==
                          country_name_date].copy()
df_list.reset_index(drop=True)
df_list['date'] = pd.to_datetime(df_list['date'], format='%Y-%m-%d')
df_list = df_list.drop(df_list.iloc[:, :3], axis=1).drop(df_list.iloc[:, 4:],
                                                         axis=1)
df_list.head()

Unnamed: 0,date
6926,2020-01-01
6927,2020-01-02
6928,2020-01-03
6929,2020-01-04
6930,2020-01-05


In [86]:
for each in country_list:
    df_country_info['iso_code'].unique()
    df_info = df_country_info[df_country_info['iso_code'] == each]
    df_data = df_info.drop(df_info.iloc[:, :3], axis=1).drop(
        df_info.iloc[:, 5:],
        axis=1).rename(columns={'total_cases': 'Cases_per_pop_' + each})
    pop = json_object_cases[each]['population']
    df_data.iloc[:, 1] = df_data.iloc[:, 1].div(pop, axis=0)
    df_data['date'] = pd.to_datetime(df_data['date'], format='%Y-%m-%d')
    df_list = df_list.join(df_data.set_index('date'), on='date')
    df_list = df_list.reset_index(drop=True)

0           5.0
1           5.0
2           5.0
3           5.0
4           5.0
         ...   
870    183285.0
871    183358.0
872    183407.0
873    183445.0
874    183572.0
Name: Cases_per_pop_AFG, Length: 875, dtype: float64

In [11]:
# Defining the size for plotting
fig = go.Figure()
for each in country_list:
    fig.add_trace(
        go.Scatter(x=df_list.date,
                   y=df_list['Cases_per_pop_' + each],
                   mode='markers+lines',
                   opacity=0.9,
                   line_width=2,
                   marker_size=1,
                   name=each))

fig.update_layout(
    width=1600,
    height=1200,
    xaxis_title='Date',
    yaxis_title='Relative COVID Cases (Absolute Cases/Total Population)')

fig.update_yaxes(type='log')
fig.show()

# Dashboard Creation

In [12]:
app = dash.Dash()
app.layout = html.Div([
    html.Label('Multi-Select Country'),
    dcc.Dropdown(id='country_drop_down',
                 options=[{
                     'label': 'Australia',
                     'value': 'AUS'
                 }, {
                     'label': 'USA',
                     'value': 'USA'
                 }, {
                     'label': 'Spain',
                     'value': 'ESP'
                 }, {
                     'label': 'India',
                     'value': 'IND'
                 }, {
                     'label': 'China',
                     'value': 'CHN'
                 }, {
                     'label': 'Germany',
                     'value': 'DEU'
                 }, {
                     'label': 'Afghanistan',
                     'value': 'AFG'
                 }],
                 value=['USA', 'IND'],
                 multi=True),
    dcc.Graph(figure=fig, id='main_window_slope')
])

In [13]:
@app.callback(Output('main_window_slope', 'figure'),
              [Input('country_drop_down', 'value')])
def update_figure(country_list):

    traces = []
    for each in country_list:
        traces.append(
            dict(x=df_list.date,
                 y=df_list['Cases_per_pop_' + each],
                 mode='markers+lines',
                 opacity=0.9,
                 line_width=2,
                 marker_size=1,
                 name=each))

    return {
        'data':
        traces,
        'layout':
        dict(width=1280,
             height=720,
             xaxis_title='Date',
             yaxis_title=
             'Relative COVID Cases (Absolute Cases/Total Population)',
             xaxis={
                 'tickangle': -45,
                 'nticks': 20,
                 'tickfont': dict(size=14, color='#7f7f7f'),
             },
             yaxis={
                 'type': 'log',
                 'range': '[1.1, 5.5]'
             })
    }

In [14]:
app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on


# Modelling Spread

In [15]:
df_list.sort_values('date', ascending=True).tail()

Unnamed: 0,date,Cases_per_pop_AUS,Cases_per_pop_USA,Cases_per_pop_ESP,Cases_per_pop_IND,Cases_per_pop_CHN,Cases_per_pop_DEU,Cases_per_pop_AFG
924,2022-07-13,0.333824,0.264539,0.274451,0.031039,0.000627,0.353204,0.004571
925,2022-07-14,0.33532,0.26497,0.274451,0.031054,0.000627,0.354519,0.004573
926,2022-07-15,0.336968,0.265441,0.275665,0.031068,0.000627,0.355995,0.004574
927,2022-07-16,0.338469,0.265643,0.275665,0.031082,0.000628,0.355995,0.004575
928,2022-07-17,0.339927,0.265705,0.275665,0.031095,0.000628,0.355995,0.004578


# Helper Function

In [16]:
def quick_plot(x_in, df_input, y_scale='log', slider=False):
    """ Quick basic plot for quick static evaluation of a time series

        you can push selective columns of your data frame by .iloc[:,[0,6,7,8]]

        Parameters:
        ----------
        x_in : array 
            array of date time object, or array of numbers
        df_input : pandas dataframe 
            the plotting matrix where each column is plotted
            the name of the column will be used for the legend
        scale: str
            y-axis scale as 'log' or 'linear'
        slider: bool
            True or False for x-axis slider


        Returns:
        ----------

    """
    fig = go.Figure()

    for each in df_input.columns:
        fig.add_trace(
            go.Scatter(x=x_in, y=df_input[each], name=each, opacity=0.8))

    fig.update_layout(autosize=True,
                      width=1024,
                      height=768,
                      font=dict(family="PT Sans, monospace",
                                size=18,
                                color="#7f7f7f"))
    fig.update_yaxes(type=y_scale),
    fig.update_xaxes(tickangle=-45,
                     nticks=20,
                     tickfont=dict(size=14, color="#7f7f7f"))
    if slider == True:
        fig.update_layout(xaxis_rangeslider_visible=True)
    fig.show()

In [17]:
quick_plot(df_list.date, df_list.iloc[:, 1:], y_scale='log', slider=True)

In [18]:
threshold = 0.01

In [19]:
import numpy as np
compare_list = []
for pos, country in enumerate(df_list.columns[1:]):
    compare_list.append(
        np.array(df_list[country][df_list[country] > threshold]))

In [1]:
pd_sync_timelines = pd.DataFrame(compare_list, index=df_list.columns[1:]).T

NameError: name 'pd' is not defined

In [21]:
pd_sync_timelines['date'] = np.arange(pd_sync_timelines.shape[0])

In [22]:
pd_sync_timelines.head()

Unnamed: 0,Cases_per_pop_AUS,Cases_per_pop_USA,Cases_per_pop_ESP,Cases_per_pop_IND,Cases_per_pop_CHN,Cases_per_pop_DEU,Cases_per_pop_AFG,date
0,0.010212,0.010193,0.010099,0.010154,,0.010262,,0
1,0.010534,0.010389,0.010287,0.01032,,0.010545,,1
2,0.010851,0.010599,0.010508,0.010506,,0.010821,,2
3,0.011232,0.010797,0.010508,0.010701,,0.011009,,3
4,0.011597,0.010988,0.010508,0.010885,,0.01114,,4


In [23]:
quick_plot(pd_sync_timelines.date,
           pd_sync_timelines.iloc[:, :-1],
           y_scale='log',
           slider=True)

# Doubling Rate

In [24]:
def doubling_rate(N_0, t, T_d):
    return N_0 * np.power(2, t / T_d)

In [25]:
max_days = 30

norm_slopes = {
    # 'doubling every day': doubling_rate(100, np.arange(10), 1),
    'doubling every 2 days': doubling_rate(100, np.arange(20), 2),
    'doubling every 4 days': doubling_rate(100, np.arange(20), 4),
    'doubling every 10 days': doubling_rate(100, np.arange(20), 10),
    'doubling every 25 days': doubling_rate(100, np.arange(20), 25)
}
pd_sync_timelines_w_slope = pd.concat(
    [pd.DataFrame(norm_slopes), pd_sync_timelines], axis=1)
quick_plot(pd_sync_timelines_w_slope.date,
           pd_sync_timelines_w_slope.iloc[:, 0:5],
           y_scale='log',
           slider=True)
# pd_sync_timelines_w_slope.to_csv('../data/processed/COVID_small_sync_timeline_table.csv',sep=';',index=False) # Needs for us to save the processed csv file in data/processed/sudonamehere.csv for this to work

# Understanding Linear Regression

In [None]:
reg = linear_model.LinearRegression(fit_intercept=True)
l_vec = len(df_list['total_cases_DEU'])
X = np.arange(l_vec - 5).reshape(-1, 1)
y = np.log(np.array(df_list['total_cases_DEU'][5:]))
reg.fit(X, y)
X_hat = np.arange(l_vec).reshape(-1, 1)
Y_hat = reg.predict(X_hat)
LR_inspect = df_list[['date', 'total_cases_DEU']].copy()
LR_inspect['prediction'] = np.exp(Y_hat)
quick_plot(LR_inspect.date, LR_inspect.iloc[:, 1:], y_scale='log', slider=True)

# Doubling Rate - Piecewise Linear Regression

In [None]:
from scipy import signal
reg = linear_model.LinearRegression(fit_intercept=True)
l_vec = len(df_list['total_cases_DEU'])
X = np.arange(l_vec - 50).reshape(-1, 1)
y = np.array(df_list['total_cases_DEU'][50:])
country_list = df_list.columns[1:]
for each in country_list:
    df_list[each + '_filter'] = signal.savgol_filter(
        df_list[each],
        19,  # window size used for filtering
        3)  # order of fitted polynomial
filter_cols = [
    'cases_per_pop_AUS_filter', 'total_cases_USA_filter',
    'total_cases_ESP_filter', 'total_cases_DEU_filter',
    'total_cases_IND_filter', 'total_cases_CHN_filter'
]
start_pos = 5
quick_plot(df_list.date[start_pos:],
           df_list[filter_cols].iloc[start_pos:, :],
           y_scale='log',
           slider=True)

In [None]:
def get_rate_via_regression(in_array):
    ''' Use a linear regression to approximate the doubling rate'''

    y = np.array(in_array)
    X = np.arange(-1, 2).reshape(-1, 1)

    assert len(in_array) == 3
    reg.fit(X, y)
    intercept = reg.intercept_
    slope = reg.coef_

    return intercept / slope

In [None]:
def doubling_time(in_array):
    ''' Use a classical doubling time formular, 
     see https://en.wikipedia.org/wiki/Doubling_time '''
    y = np.array(in_array)
    return len(y) * np.log(2) / np.log(y[-1] / y[0])


days_back = 3
for pos, country in enumerate(country_list):
    df_list[country + '_DR'] = df_list[country].rolling(
        window=days_back, min_periods=days_back).apply(get_rate_via_regression,
                                                       raw=False)
days_back = 3
for pos, country in enumerate(filter_cols):
    df_list[country + '_DR'] = df_list[country].rolling(
        window=days_back, min_periods=days_back).apply(get_rate_via_regression,
                                                       raw=False)
df_list['Germany_DR_math'] = df_list['total_cases_DEU'].rolling(
    window=days_back, min_periods=days_back).apply(doubling_time, raw=False)

In [None]:
days_back = 3
for pos, country in enumerate(filter_cols):
    df_list[country + '_DR'] = df_list[country].rolling(
        window=days_back, min_periods=days_back).apply(get_rate_via_regression,
                                                       raw=False)
df_list.columns
start_pos = 40
quick_plot(df_list.date,
           df_list.iloc[start_pos:, [11, 12, 13, 14]],
           y_scale='linear',
           slider=True)

In [None]:
start_pos = 40
quick_plot(df_list.date,
           df_list.iloc[start_pos:, [16, 17, 18, 19]],
           y_scale='linear',
           slider=True)

# Dashboard for Vaccination

In [48]:
url_vaccination = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'

In [49]:
df_vaccination_info = pd.read_csv(url_vaccination, sep=',')

In [50]:
df_vaccination_info.columns
df_vaccination_info.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,daily_vaccinations_per_million,daily_people_vaccinated,daily_people_vaccinated_per_hundred
0,Afghanistan,AFG,2021-02-22,0.0,0.0,,,,,0.0,0.0,,,,,
1,Afghanistan,AFG,2021-02-23,,,,,,1367.0,,,,,34.0,1367.0,0.003
2,Afghanistan,AFG,2021-02-24,,,,,,1367.0,,,,,34.0,1367.0,0.003
3,Afghanistan,AFG,2021-02-25,,,,,,1367.0,,,,,34.0,1367.0,0.003
4,Afghanistan,AFG,2021-02-26,,,,,,1367.0,,,,,34.0,1367.0,0.003


In [51]:
df_vaccination = pd.DataFrame()
for each in country_list:
    df_vaccination_info['iso_code'].unique()
    df_vaccination_info['iso_code'] == each
    df_vaccination = pd.concat([
        df_vaccination,
        df_vaccination_info[df_vaccination_info['iso_code'] == each]
    ],
                               sort=False)
    df_vaccination = df_vaccination.reset_index(drop=True)

df_vaccination.head()

Unnamed: 0,location,iso_code,date,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,daily_vaccinations_raw,daily_vaccinations,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,daily_vaccinations_per_million,daily_people_vaccinated,daily_people_vaccinated_per_hundred
0,Australia,AUS,2021-02-21,20.0,20.0,,,,,0.0,0.0,,,,,
1,Australia,AUS,2021-02-22,2789.0,2789.0,,,2769.0,2769.0,0.01,0.01,,,107.0,2769.0,0.011
2,Australia,AUS,2021-02-23,6914.0,6911.0,3.0,,4125.0,3447.0,0.03,0.03,0.0,,133.0,3446.0,0.013
3,Australia,AUS,2021-02-24,16629.0,16626.0,3.0,,9715.0,5536.0,0.06,0.06,0.0,,214.0,5535.0,0.021
4,Australia,AUS,2021-02-25,23510.0,23501.0,9.0,,6881.0,5872.0,0.09,0.09,0.0,,227.0,5870.0,0.023


In [97]:
location_vacc_list = df_vaccination_info['location'].unique()
dict_vacc_country = {}
for each in location_vacc_list:
    dict_vacc_country.update({
        each:
        len(df_vaccination_info[df_vaccination_info['location'] == each]
            ['date'])
    })

country_vacc_name_date = max(dict_vacc_country,
                             key=lambda x: dict_vacc_country[x])
df_vacc_list = df_vaccination_info[df_vaccination_info['location'] ==
                                   country_vacc_name_date].copy()
df_vacc_list.reset_index(drop=True)
df_vacc_list['date'] = pd.to_datetime(df_vacc_list['date'], format='%Y-%m-%d')
df_vacc_list = df_vacc_list.drop(df_vacc_list.iloc[:, :2],
                                 axis=1).drop(df_vacc_list.iloc[:, 3:], axis=1)
df_vacc_list.head()

Unnamed: 0,date
33367,2020-12-02
33368,2020-12-03
33369,2020-12-04
33370,2020-12-05
33371,2020-12-06


In [108]:
for each in country_list:
    df_vaccination_info['iso_code'].unique()
    df_vacc_info = df_vaccination_info[df_vaccination_info['iso_code'] == each]
    df_vacc_data = df_vacc_info.drop(df_vacc_info.iloc[:, :2], axis=1).drop(
        df_vacc_info.iloc[:, 4:],
        axis=1).rename(columns={'total_vaccinations': 'Vacc_per_pop_' + each})
    pop = json_object_cases[each]['population']
    df_vacc_data.iloc[:, 1] = df_vacc_data.iloc[:, 1].div(pop, axis=0)
    df_vacc_data['date'] = pd.to_datetime(df_vacc_data['date'],
                                          format='%Y-%m-%d')
    df_vacc_list = df_vacc_list.join(df_vacc_data.set_index('date'), on='date')
    df_vacc_list = df_vacc_list.reset_index(drop=True)

df_vacc_list

Unnamed: 0,date,Vacc_per_pop_AUS,Vacc_per_pop_USA,Vacc_per_pop_ESP,Vacc_per_pop_IND,Vacc_per_pop_CHN,Vacc_per_pop_DEU,Vacc_per_pop_AFG
0,2020-12-02,,,,,,,
1,2020-12-03,,,,,,,
2,2020-12-04,,,,,,,
3,2020-12-05,,,,,,,
4,2020-12-06,,,,,,,
...,...,...,...,...,...,...,...,...
588,2022-07-13,2.238955,,2.007726,1.414671,2.391773,2.197502,
589,2022-07-14,2.239516,,,1.416753,2.392280,2.198255,
590,2022-07-15,2.240092,,,1.418739,2.392818,2.198834,
591,2022-07-16,2.240367,,,,2.393206,2.198996,


In [109]:
# Defining the size for plotting
fig = go.Figure()
for each in country_list:
    fig.add_trace(
        go.Scatter(x=df_vacc_list.date,
                   y=df_vacc_list['Vacc_per_pop_' + each],
                   mode='markers+lines',
                   opacity=0.9,
                   line_width=2,
                   marker_size=1,
                   name=each))

fig.update_layout(
    width=1600,
    height=1200,
    xaxis_title='Date',
    yaxis_title='Relative Vaccination(Total Vaccination/Total Population)')

fig.update_yaxes(type='log')
fig.show()

In [110]:
app = dash.Dash()
app.layout = html.Div([
    html.Label('Multi-Select Country'),
    dcc.Dropdown(id='country_drop_down',
                 options=[{
                     'label': 'Australia',
                     'value': 'AUS'
                 }, {
                     'label': 'USA',
                     'value': 'USA'
                 }, {
                     'label': 'Spain',
                     'value': 'ESP'
                 }, {
                     'label': 'India',
                     'value': 'IND'
                 }, {
                     'label': 'China',
                     'value': 'CHN'
                 }, {
                     'label': 'Germany',
                     'value': 'DEU'
                 }, {
                     'label': 'Afghanistan',
                     'value': 'AFG'
                 }],
                 value=['USA', 'IND'],
                 multi=True),
    dcc.Graph(figure=fig, id='main_window_slope')
])

In [111]:
@app.callback(Output('main_window_slope', 'figure'),
              [Input('country_drop_down', 'value')])
def update_figure(country_list):

    traces = []
    for each in country_list:
        traces.append(
            dict(x=df_vacc_list.date,
                 y=df_vacc_list['Vacc_per_pop_' + each],
                 mode='markers+lines',
                 opacity=0.9,
                 line_width=2,
                 marker_size=1,
                 name=each))

    return {
        'data':
        traces,
        'layout':
        dict(width=1280,
             height=720,
             xaxis_title='Date',
             yaxis_title=
             'Relative Vaccination(Total Vaccination/Total Population)',
             xaxis={
                 'tickangle': -45,
                 'nticks': 20,
                 'tickfont': dict(size=14, color='#7f7f7f'),
             },
             yaxis={
                 'type': 'log',
                 'range': '[1.1, 5.5]'
             })
    }

In [112]:
app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
