# Full Walkthorough Dashboard -SIR Modelling


The following parts are included for any countries which can be selected in the dashboard. 
* SIR Model to model COVID19 infection - SIR fit of the infection rate for more than 100+ countries (Any country can be selected) 

In [6]:
## check some parameters
## depending where you launch your notebook, the relative path might not work
## you should start the notebook server from your base path
## when opening the notebook, typically your path will be ../ads_covid-19/notebooks
import os
if os.path.split(os.getcwd())[-1]=='notebooks':
    os.chdir("../")

'Your base path is at: '+os.path.split(os.getcwd())[-1]

'Your base path is at: ads_covid-19'

## Imports 

In [7]:
import subprocess
import os

import pandas as pd
import numpy as np

from datetime import datetime

import requests
import json

## Automatic dataset retreival 

### Source: 
* John Hopkins dataset from https://github.com/CSSEGISandData/COVID-19.git 


In [8]:
def get_johns_hopkins():
    ''' Get data by a git pull request, the source code has to be pulled first
        Result is stored in the predifined csv structure. If there is no Repository 
        not present then clone the data from GitHub.
    '''
    
    if os.path.exists('data/raw/COVID-19/'):
        print('Repository exists. No fetch action required.')
        git_pull = subprocess.Popen( "git pull" ,
                             cwd = os.path.dirname('data/raw/COVID-19/' ),
                             shell = True,
                             stdout = subprocess.PIPE,
                             stderr = subprocess.PIPE )
        (out, error) = git_pull.communicate()
    else:
        print('Repository does not exist. Fetch action required.')
        git_clone = subprocess.Popen( "git clone https://github.com/CSSEGISandData/COVID-19.git" ,
                             cwd = os.path.dirname('data/raw/' ),
                             shell = True,
                             stdout = subprocess.PIPE,
                             stderr = subprocess.PIPE )
        (out, error) = git_clone.communicate()

if __name__ == '__main__':
    get_johns_hopkins()

Repository exists. No fetch action required.


## Initial data preparation

In [9]:
import pandas as pd
import numpy as np

from datetime import datetime


def store_relational_data():
    data_path='data/raw/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
    pd_raw=pd.read_csv(data_path)
    
    #SIR model data preparation
    time_idx = pd_raw.columns[4:]
    sir_plot = pd.DataFrame({
    'date':time_idx})
    #sir_plot.head()
    sir_arr= pd_raw['Country/Region'].unique()
    sir_list = sir_arr.tolist()
    for each in sir_list:
        sir_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
    #sir_plot.head()
    
    #Creating SIR plot for 100+ countries
    sir_plot= sir_plot.drop(columns = ['Taiwan*', 'South Sudan', 'Guyana','Haiti', 'Holy See', 'Honduras', 'Hungary', 'Iceland',
                                   'Iraq', 'Ireland', 'Israel', 'Italy',
       'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya',
       'Korea, South', 'Kosovo','Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia and Herzegovina', 'Botswana',
       'Brunei', 'Bulgaria', 'Burkina Faso', 'Burma', 'Burundi',
       'Cabo Verde', 'Cambodia', 'Cameroon', 'Canada',
       'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia',
       'Comoros', 'Congo (Brazzaville)', 'Congo (Kinshasa)', 'Costa Rica',
       "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark',
       'Diamond Princess', 'Djibouti', 'Luxembourg', 'MS Zaandam', 'Madagascar', 'Malawi',
       'Malaysia', 'Maldives', 'Mali', 'Malta', 'Mauritania', 'Mauritius',
       'Mexico', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco',
       'Mozambique', 'Namibia', 'Nepal', 'Netherlands', 'New Zealand',
       'Nicaragua', 'Niger', 'Panama', 'Papua New Guinea', 'Paraguay',
       'Peru', 'Philippines', 'Bahamas', 'Egypt'])
    time_idx = [datetime.strptime(each, "%m/%d/%y") for each in sir_plot.date] #to convert all the dates into datetime 
    time_str= [each.strftime('%Y-%m-%d') for each in time_idx] #to convert datetime function to string
    #time_str[0:5]
    
    #Storing the processed data file and sep';' is a seperator [German std]
    sir_plot.to_csv('data/processed/COVID_SIR_flat_table_multiplecountries.csv', sep=';',index=False)
    


if __name__ == '__main__':
    store_relational_data()



## Calculations 

* Part 1: Doubling rate, doubling rate via regression, savgol filter, Rolling regression, Filtered data. 
* Part 2: SIR modelling, SIR fit.

In [10]:
from scipy import optimize
from scipy import integrate

df_input_large=pd.read_csv('data/processed/COVID_SIR_flat_table_multiplecountries.csv',sep=';').iloc[80:]

df_all = df_input_large.columns
df_all = list(df_all)

def SIR_model(SIR,beta,gamma):
    ''' Simple SIR model
        S: susceptible population
        I: infected people
        R: recovered people
        beta: 
        
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)
    
    '''
    
    S,I,R=SIR
    dS_dt=-beta*S*I/N0          #S*I is the 
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return([dS_dt,dI_dt,dR_dt])

# Functions for SIR model with time step
def SIR_model_t(SIR,t,beta,gamma):
    ''' Simple SIR model
        S: susceptible population
        t: time step, mandatory for integral.odeint
        I: infected people
        R: recovered people
        beta: 
        
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)
    
    '''
    
    S,I,R=SIR
    dS_dt=-beta*S*I/N0          #S*I is the 
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return dS_dt,dI_dt,dR_dt

#Function defined for optimize curve fit
def fit_odeint(x, beta, gamma):
    '''
    helper function for the integration
    '''
    return integrate.odeint(SIR_model_t, (S0, I0, R0), t, args=(beta, gamma))[:,1] # we only would like to get dI

#Fitting parameter for SIR model
for each in df_all[1:]:
    ydata = np.array(df_input_large[each])
    t=np.arange(len(ydata))
    N0 = 6000000 #max susceptible population

    # ensure re-initialization 
    I0=ydata[0]
    S0=N0-I0
    R0=0

    popt, pcov = optimize.curve_fit(fit_odeint, t, ydata, maxfev = 20000)
    perr = np.sqrt(np.diag(pcov))

    # get the final fitted curve
    fitted=fit_odeint(t, *popt).reshape(-1,1)
    df_input_large[each +'_fitted'] = fitted 
    
df_input_large.to_csv('data/processed/COVID_fitted_SIR_flat_table.csv', sep=';',index=False)



## Dashboard Implementation

In [11]:
import pandas as pd
import numpy as np

import dash
dash.__version__
import dash_bootstrap_components as dbc
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output,State


import plotly.graph_objects as go
from plotly import tools

import os
print(os.getcwd())

df_input_sir = pd.read_csv('data/processed/COVID_fitted_SIR_flat_table.csv',sep=';')
df_input_sir['date'] = pd.to_datetime(df_input_sir['date'], format='%m/%d/%y')
df_all = df_input_sir.columns

# convert to datetime format
df_all = list(df_all[:300])


'''Dashboard is created by using an external stylesheet named BOOTSTRAP. 
BOOTSTRAP allows us to divide the dashboard into Rows and columns.
COVID-19 dashbord has 5 Rows and 2 columns'''

#app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app = dash.Dash()
app.title = 'COVID-19 Dashboard'

app.layout = html.Div([
        
        dcc.Markdown('''
                            # Enterprise Data Science: COVID-19 Data Analytics
                            Goals of the project:
                            * To trace the confirmed cases for all the countries
                            * To calculate the doubling rate.
                            * To simulate the spread of COVID-19 using SIR model for 100+ countries.
                            * To create a user friendly dashboard, which shows the current count of confirmed cases, doubling rate and SIR model.
                            '''),
    
       
               #Dropdown for Timeline Confirmed and Doubling rate
                #Dropdown for SIR model
        dcc.Dropdown(
                    id='country_dropdown_sir',
                    options=[ {'label': each,'value':each} for each in df_all[1:]],
                    value='Albania', # which are pre-selected
                    multi= False
                    ),
                        
                    dcc.Graph( 
                            id='SIR_model'
                            )
            ])

@app.callback(
    Output('SIR_model', 'figure'),
    [Input('country_dropdown_sir', 'value')])

def SIR_fig(con_input):
    df= df_input_sir
   
    
    for i in df[1:]:
        data = []
        trace = go.Scatter(x=df_input_sir.date,
                        y=df[con_input],
                        mode='lines+markers',
                        name = con_input)
        data.append(trace)
        
        trace_fitted = go.Scatter(x=df.date,
                        y=df[con_input +'_fitted'], 
                        mode='lines+markers',
                        name=con_input+'_fitted')
        data.append(trace_fitted)
        
        
            
    return {'data': data,
            'layout' : dict(
                height=900,
                title= 'SIR virus spread model',
                xaxis={'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                      },
                yaxis={'type':"log",
                       'range':'[1.1,5.5]'
                      }
                
            )
        }

if __name__ == '__main__':

    app.run_server(debug=True, use_reloader=False)   

D:\CVT\EDS_SS2022_Salim\ads_covid-19
Dash is running on http://127.0.0.1:8050/



The dash_core_components package is deprecated. Please replace
`import dash_core_components as dcc` with `from dash import dcc`
  import sys
The dash_html_components package is deprecated. Please replace
`import dash_html_components as html` with `from dash import html`
  
[33m * Tip: There are .env or .flaskenv files present. Do "pip install python-dotenv" to use them.[0m


 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
