In [6]:
# %load ./src/import_data.py
import subprocess
import os
import requests
import json
import pandas as pd
from datetime import datetime

def import_json():
    data_cases = requests.get('https://covid.ourworldindata.org/data/owid-covid-data.json')
    json_object_cases=json.loads(data_cases.content)

    return json_object_cases


def import_cases_data():
    ''' Get data by a git pull request, the source code has to be pulled first
        Result is stored in the predifined csv structure. If there is no Repository 
        not present then clone the data from GitHub.
    '''
    
    if os.path.exists('../data/raw/COVID-19/'):
        print('Repository Exists: Fetch the latest data from repository')
        git_pull = subprocess.Popen( "git pull" ,
                             cwd = os.path.dirname('../data/raw/COVID-19/' ),
                             shell = True,
                             stdout = subprocess.PIPE,
                             stderr = subprocess.PIPE )
        (out, error) = git_pull.communicate()
    else:
        print('Repository not present. Fetch the entire repository')
        git_clone = subprocess.Popen( "git clone https://github.com/CSSEGISandData/COVID-19.git" ,
                             cwd = os.path.dirname('../data/raw/' ),
                             shell = True,
                             stdout = subprocess.PIPE,
                             stderr = subprocess.PIPE )
        (out, error) = git_clone.communicate()


    url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
    df_country_info = pd.read_csv(url, sep=',')


    
    # load json object for the total number of COVID cases
    json_object_cases=import_json()

    countries_list = list(json_object_cases.keys())
    country_remove=['OWID_INT','OWID_CYN']
    list_cases_country=list(set(countries_list) - set(country_remove))

    return list_cases_country, df_country_info

def import_vacc_data():
    ''' Get data by a git pull request, the source code has to be pulled first
        Result is stored in the predifined csv structure. If there is no Repository 
        not present then clone the data from GitHub.
    '''
    
    # Requesting the Vacination data from our world in data:
    url_vaccination = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/vaccinations.csv'

    #Dumping all data from json into a variable:
    df_vaccination_info = pd.read_csv(url_vaccination, sep=',')

    return df_vaccination_info


if __name__ == '__main__':
    import_json()
    import_cases_data()
    import_vacc_data()
    

Repository Exists: Fetch the latest data from repository


In [7]:
# %load ./src/store_relational_data.py
import pandas as pd
import numpy as np
import os
import sys

from datetime import datetime
path=(os.getcwd()+'\\src\\')
sys.path.append(path)

import import_data


def store_relational_data():
    ''' Transformes the COVID data in a relational data set

    '''
    data_path='../data/raw/COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
    pd_raw=pd.read_csv(data_path)

    time_idx = pd_raw.columns[4:]
    df_plot = pd.DataFrame({
        'date':time_idx})
    df_input_large= pd_raw['Country/Region'].unique()
    
    for each in df_input_large:
        df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
    df = df_plot.drop('date', axis=1)
    
    #Merging the data set over COUNTRY for CODE column for worldmap
    df_code = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
    world_raw =  pd.DataFrame({"COUNTRY" : df_input_large, "Confirm cases" :df.iloc[-1]})
    world_con = pd.merge(world_raw, df_code, on = "COUNTRY").drop('GDP (BILLIONS)', axis=1)
    world_con.to_csv('../data/processed/COVID_CRD.csv',sep=';',index=False)
    
    #Continuation of data preparation 
    pd_data_base=pd_raw.rename(columns={'Country/Region':'COUNTRY',
                      'Province/State':'state'})

    pd_data_base['state']=pd_data_base['state'].fillna('no')

    pd_data_base=pd_data_base.drop(['Lat','Long'],axis=1)


    pd_relational_model_1=pd_data_base.set_index(['state','COUNTRY']) \
                                .T                              \
                                .stack(level=[0,1])             \
                                .reset_index()                  \
                                .rename(columns={'level_0':'date',
                                                   0:'confirmed'},
                                                  )
    pd_relational_model = pd.merge(pd_relational_model_1, df_code, on = "COUNTRY").drop('GDP (BILLIONS)', axis=1)
    pd_relational_model['date']=pd_relational_model.date.astype('datetime64[ns]')
    
    pd_relational_model.to_csv('../data/processed/20200823_COVID_relational_confirmed.csv',sep=';',index=False)
    
    
    #SIR model data preparation
    sir_plot = pd.DataFrame({
    'date':time_idx})
    #sir_plot.head()
    sir_arr= pd_raw['Country/Region'].unique()
    sir_list = sir_arr.tolist()
    for each in sir_list:
        sir_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
    #sir_plot.head()
    
    time_idx = [datetime.strptime(each, "%m/%d/%y") for each in sir_plot.date] #to convert all the dates into datetime 
    time_str= [each.strftime('%Y-%m-%d') for each in time_idx] #to convert datetime function to string
    #time_str[0:5]
    
    #Storing the processed data file and sep';' is a seperator [German std]
    sir_plot.to_csv('../data/processed/COVID_sir_flat_table.csv', sep=';',index=False)
    
    
    print(' Number of rows stored: '+str(pd_relational_model.shape[0]))
    print(' Latest date is: '+str(max(pd_relational_model.date)))

    #Processing Data for Cases per pop:
    list_cases_country,df_country_info=import_data.import_cases_data()
    json_object_cases=import_data.import_json()

    df_country = pd.DataFrame()
    for each in list_cases_country:
        df_country_info['iso_code'].unique()
        df_country_info['iso_code'] == each
        df_country = pd.concat(
            [df_country, df_country_info[df_country_info['iso_code'] == each]],
            sort=False)
        df_country = df_country.reset_index(drop=True)

    location_list = df_country_info['location'].unique()
    dict_country = {}
    for each in location_list:
        dict_country.update({
            each:
            len(df_country_info[df_country_info['location'] == each]['date'])
        })

    country_name_date = max(dict_country, key=lambda x: dict_country[x])
    df_list = df_country_info[df_country_info['location'] ==
                            country_name_date].copy()
    df_list.reset_index(drop=True)
    df_list['date'] = pd.to_datetime(df_list['date'], format='%Y-%m-%d')
    df_list = df_list.drop(df_list.iloc[:, :3], axis=1).drop(df_list.iloc[:, 4:],
                                                            axis=1)


    for each in list_cases_country:
        df_country_info['iso_code'].unique()
        df_info = df_country_info[df_country_info['iso_code'] == each]
        df_data = df_info.drop(df_info.iloc[:, :3], axis=1).drop(
            df_info.iloc[:, 5:],
            axis=1).rename(columns={'total_cases': 'Cases_per_pop_' + each})
        pop = json_object_cases[each]['population']
        df_data.iloc[:, 1] = df_data.iloc[:, 1].div(pop, axis=0)
        df_data['date'] = pd.to_datetime(df_data['date'], format='%Y-%m-%d')
        df_list = df_list.join(df_data.set_index('date'), on='date')
        df_list = df_list.reset_index(drop=True)

    df_list.to_csv('../data/processed/Cases_pop_NoNaN.csv', sep=';', index=False)

    #Procesing Vaccination Data:
    
    df_vaccination_info=import_data.import_vacc_data()
    df_vaccination = pd.DataFrame()
    for each in list_cases_country:
        df_vaccination_info['iso_code'].unique()
        df_vaccination_info['iso_code'] == each
        df_vaccination = pd.concat([
            df_vaccination,
            df_vaccination_info[df_vaccination_info['iso_code'] == each]
        ],
                                sort=False)
        df_vaccination = df_vaccination.reset_index(drop=True)


    location_vacc_list = df_vaccination_info['location'].unique()
    dict_vacc_country = {}
    for each in location_vacc_list:
        dict_vacc_country.update({
            each:
            len(df_vaccination_info[df_vaccination_info['location'] == each]
                ['date'])
        })

    country_vacc_name_date = max(dict_vacc_country,
                                key=lambda x: dict_vacc_country[x])
    df_vacc_list = df_vaccination_info[df_vaccination_info['location'] ==
                                    country_vacc_name_date].copy()
    df_vacc_list.reset_index(drop=True)
    df_vacc_list['date'] = pd.to_datetime(df_vacc_list['date'], format='%Y-%m-%d')
    df_vacc_list = df_vacc_list.drop(df_vacc_list.iloc[:, :2],
                                    axis=1).drop(df_vacc_list.iloc[:, 3:], axis=1)

    # # load json object for the total number of COVID cases
    # data_cases = requests.get('https://covid.ourworldindata.org/data/owid-covid-data.json')
    # json_object_cases=json.loads(data_cases.content)

    for each in list_cases_country:
        df_vaccination_info['iso_code'].unique()
        df_vacc_info = df_vaccination_info[df_vaccination_info['iso_code'] == each]
        df_vacc_data = df_vacc_info.drop(df_vacc_info.iloc[:, :2], axis=1).drop(
            df_vacc_info.iloc[:, 4:],
            axis=1).rename(columns={'total_vaccinations': 'Vacc_per_pop_' + each})
        pop = json_object_cases[each]['population']
        df_vacc_data.iloc[:, 1] = df_vacc_data.iloc[:, 1].div(pop, axis=0)
        df_vacc_data['date'] = pd.to_datetime(df_vacc_data['date'],
                                            format='%Y-%m-%d')
        df_vacc_list = df_vacc_list.join(df_vacc_data.set_index('date'), on='date')
        df_vacc_list = df_vacc_list.reset_index(drop=True)


    df_vacc_list.to_csv('../data/processed/Vax_per_pop.csv', sep=';', index=False)



if __name__ == '__main__':

    store_relational_data()

  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[each] =np.array(pd_raw[pd_raw['Country/Region']==each].iloc[:,4::].sum(axis=0))
  df_plot[

 Number of rows stored: 245488
 Latest date is: 2022-07-25 00:00:00
Repository Exists: Fetch the latest data from repository


Filter and Doubling Rate Calculation (Modelling)

In [8]:
# %load ./src/get_features.py
import numpy as np
from sklearn import linear_model
reg = linear_model.LinearRegression(fit_intercept=True)
import pandas as pd

from scipy import signal


def calc_doubling_rate(df_input,filter_on='confirmed'):
    ''' Calculate approximated doubling rate and return merged data frame

        Parameters:
        ----------
        df_input: pd.DataFrame
        filter_on: str
            defines the used column
        Returns:
        ----------
        df_output: pd.DataFrame
            the result will be joined as a new column on the input data frame
    '''

    must_contain=set(['state','COUNTRY',filter_on])
    assert must_contain.issubset(set(df_input.columns)), ' Erro in calc_filtered_data not all columns in data frame'


    pd_DR_result= df_input.groupby(['state','COUNTRY']).apply(rolling_reg,filter_on).reset_index()

    pd_DR_result=pd_DR_result.rename(columns={filter_on:filter_on+'_DR',
                             'level_2':'index'})

    #we do the merge on the index of our big table and on the index column after groupby
    df_output=pd.merge(df_input,pd_DR_result[['index',str(filter_on+'_DR')]],left_index=True,right_on=['index'],how='left')
    df_output=df_output.drop(columns=['index'])


    return df_output


def get_doubling_time_via_regression(in_array):
    ''' Use a linear regression to approximate the doubling rate

        Parameters:
        ----------
        in_array : pandas.series

        Returns:
        ----------
        Doubling rate: double
    '''

    y = np.array(in_array)
    X = np.arange(-1,2).reshape(-1, 1)

    assert len(in_array)==3
    reg.fit(X,y)
    intercept=reg.intercept_
    slope=reg.coef_

    return intercept/slope


def savgol_filter(df_input,column='confirmed',window=5):
    ''' Savgol Filter is a digital filter that can be applied to a set of digital data points for the purpose of 
        smoothing the data, that is, to increase the precision of the data without distorting the signal tendency.
        
        Parameters:
        ----------
        df_input : pandas.series
        column : str
        window : int
            used data points to calculate the filter result

        Returns:
        ----------
        df_result: pd.DataFrame
            the index of the df_input has to be preserved in result
    '''

    degree=1
    df_result=df_input

    filter_in=df_input[column].fillna(0) # attention with the neutral element here

    result=signal.savgol_filter(np.array(filter_in),
                           window, # window size used for filtering
                           1)
    df_result[str(column+'_filtered')]=result
    return df_result

def rolling_reg(df_input,col='confirmed'):
    ''' Rolling Regression is used to approximate the doubling time'

        Parameters:
        ----------
        df_input: pd.DataFrame
        col: str
            defines the used column
        Returns:
        ----------
        result: pd.DataFrame
    '''
    days_back=3
    result=df_input[col].rolling(
                window=days_back,
                min_periods=days_back).apply(get_doubling_time_via_regression,raw=False)



    return result




def calc_filtered_data(df_input,filter_on='confirmed'):
    '''  Calculate savgol filter and return merged data frame

        Parameters:
        ----------
        df_input: pd.DataFrame
        filter_on: str
            defines the used column
        Returns:
        ----------
        df_output: pd.DataFrame
            the result will be joined as a new column on the input data frame
    '''

    must_contain=set(['state','COUNTRY',filter_on])
    assert must_contain.issubset(set(df_input.columns)), ' Erro in calc_filtered_data not all columns in data frame'

    df_output=df_input.copy() # we need a copy here otherwise the filter_on column will be overwritten

    pd_filtered_result=df_output[['state','COUNTRY',filter_on]].groupby(['state','COUNTRY']).apply(savgol_filter)#.reset_index()

    #print('--+++ after group by apply')
    #print(pd_filtered_result[pd_filtered_result['country']=='Germany'].tail())

    #df_output=pd.merge(df_output,pd_filtered_result[['index',str(filter_on+'_filtered')]],on=['index'],how='left')
    df_output=pd.merge(df_output,pd_filtered_result[[str(filter_on+'_filtered')]],left_index=True,right_index=True,how='left')
    #print(df_output[df_output['country']=='Germany'].tail())
    return df_output.copy()



if __name__ == '__main__':
    test_data_reg=np.array([2,4,6])
    result=get_doubling_time_via_regression(test_data_reg)
    print('the test slope is: '+str(result))

    pd_JH_data=pd.read_csv('../data/processed/20200823_COVID_relational_confirmed.csv',sep=';',parse_dates=[0])
    pd_JH_data=pd_JH_data.sort_values('date',ascending=True).copy()

    #test_structure=pd_JH_data[((pd_JH_data['country']=='US')|
    #                  (pd_JH_data['country']=='Germany'))]

    pd_result_larg=calc_filtered_data(pd_JH_data)
    pd_result_larg=calc_doubling_rate(pd_result_larg)
    pd_result_larg=calc_doubling_rate(pd_result_larg,'confirmed_filtered')


    mask=pd_result_larg['confirmed']>100
    pd_result_larg['confirmed_filtered_DR']=pd_result_larg['confirmed_filtered_DR'].where(mask, other=np.NaN)
    pd_result_larg.to_csv('../data/processed/COVID_final_set.csv',sep=';',index=False)
    print(pd_result_larg[pd_result_larg['COUNTRY']=='Germany'].tail())

the test slope is: [2.]
             date state  COUNTRY   confirmed CODE  confirmed_filtered  \
133731 2022-07-21    no  Germany  30239122.0  DEU          30205473.6   
133732 2022-07-22    no  Germany  30331131.0  DEU          30272764.4   
133733 2022-07-23    no  Germany  30331133.0  DEU          30341824.8   
133734 2022-07-24    no  Germany  30331133.0  DEU          30389321.6   
133735 2022-07-25    no  Germany  30476605.0  DEU          30436818.4   

        confirmed_DR  confirmed_filtered_DR  
133731  2.464517e+02             269.804637  
133732  3.025988e+02             371.001523  
133733  6.586269e+02             444.049693  
133734  3.033113e+07             520.510735  
133735  4.176697e+02             639.818295  


SIR Modelling

In [9]:
# %load ./src/sir_modeling.py
import numpy as np
import pandas as pd 
from scipy import optimize
from scipy import integrate


df_input_large=pd.read_csv('../data/processed/COVID_sir_flat_table.csv',sep=';').iloc[80:]
pop = pd.read_csv('../data/processed/population.csv',sep=';')

df_all = df_input_large.columns
df_all = list(df_all)

def SIR_model(SIR,beta,gamma):
    ''' Simple SIR model
        S: susceptible population
        I: infected people
        R: recovered people
        beta: 
        
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)
    
    '''
    
    S,I,R=SIR
    dS_dt=-beta*S*I/N0          #S*I is the 
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return([dS_dt,dI_dt,dR_dt])


# Functions for SIR model with time step
def SIR_model_t(SIR,t,beta,gamma):
    ''' Simple SIR model
        S: susceptible population
        t: time step, mandatory for integral.odeint
        I: infected people
        R: recovered people
        beta: 
        
        overall condition is that the sum of changes (differnces) sum up to 0
        dS+dI+dR=0
        S+I+R= N (constant size of population)
    
    '''
    
    S,I,R=SIR
    dS_dt=-beta*S*I/N0          #S*I is the 
    dI_dt=beta*S*I/N0-gamma*I
    dR_dt=gamma*I
    return dS_dt,dI_dt,dR_dt


#Function defined for optimize curve fit
def fit_odeint(x, beta, gamma):
    '''
    helper function for the integration
    '''
    return integrate.odeint(SIR_model_t, (S0, I0, R0), t, args=(beta, gamma))[:,1] # we only would like to get dI

#Fitting parameter for SIR model
for each in df_all[1:]:
    ydata = np.array(df_input_large[each])
    t=np.arange(len(ydata))
    N0 = 6000000 #max susceptible population

    # ensure re-initialization 
    I0=ydata[0]
    S0=N0-I0
    R0=0

    popt, pcov = optimize.curve_fit(fit_odeint, t, ydata, maxfev = 1600000)
    perr = np.sqrt(np.diag(pcov))

    # get the final fitted curve
    fitted=fit_odeint(t, *popt).reshape(-1,1)
    df_input_large[each +'_fitted'] = fitted 
    
df_input_large.to_csv('../data/processed/COVID_sir_fitted_table.csv', sep=';')
df_input_large.head()

  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dR_dt=gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dR_dt=gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dR_dt=gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dR_dt=gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dR_dt=gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dR_dt=gamma*I
  dI_dt=beta*S*I/N0-gamma*I
  dI_dt=beta*S*I/N0-gamma*I
  dR_dt=gamma*I
  dI_dt=beta*S*I/N0-gamma*I
  dR_dt=gamma*I
  dS_dt=-beta*S*I/N0          #S*I is the
  dI_dt=beta*S*I/N0-gamma*I
  dI_dt=beta*S*I/N

Unnamed: 0,date,Afghanistan,Albania,Algeria,Andorra,Angola,Antarctica,Antigua and Barbuda,Argentina,Armenia,...,Uruguay_fitted,Uzbekistan_fitted,Vanuatu_fitted,Venezuela_fitted,Vietnam_fitted,West Bank and Gaza_fitted,Winter Olympics 2022_fitted,Yemen_fitted,Zambia_fitted,Zimbabwe_fitted
80,4/11/20,521,433,1825,601,19,0,21,1975,967,...,501.0,767.0,0.0,175.0,258.0,268.0,0.0,1.0,40.0,14.0
81,4/12/20,555,446,1914,638,19,0,21,2142,1013,...,505.083685,775.223011,0.0,177.526843,259.098289,271.658072,0.0,1.01599,40.425164,14.135654
82,4/13/20,607,467,1983,646,19,0,23,2208,1039,...,509.158326,783.534033,0.0,180.090165,260.189975,275.366064,0.0,1.032236,40.85457,14.272591
83,4/14/20,665,475,2070,659,19,0,23,2277,1067,...,513.222829,791.934004,0.0,182.690493,261.274887,279.124657,0.0,1.048742,41.288254,14.410822
84,4/15/20,770,494,2160,673,19,0,23,2443,1111,...,517.276087,800.423875,0.0,185.328359,262.352857,282.934539,0.0,1.065512,41.726252,14.55036


Visual Board

In [10]:
# %load ./src/dashboard.py
import pandas as pd
import numpy as np
import dash_bootstrap_components as dbc
import plotly.graph_objects as go
import dash
import os
import sys

from dash import dcc
from dash import html
from dash.dependencies import Input, Output,State

path=(os.getcwd()+'\\src\\')
sys.path.append(path)
import import_data 

list_cases_country,df_country_info=import_data.import_cases_data()

df_input_large = pd.read_csv('../data/processed/COVID_final_set.csv',sep=';')
df = pd.read_csv('../data/processed/COVID_CRD.csv',sep=';')
df_input_sir = pd.read_csv('../data/processed/COVID_sir_fitted_table.csv',sep=';')
df_all = df_input_sir.columns
df_all = list(df_all[:109])


df_list=pd.read_csv('../data/processed/Cases_pop_NoNaN.csv',sep=';')
df_vacc_list=pd.read_csv('../data/processed/Vax_per_pop.csv', sep=';')


country_name=df_country_info['location'].unique()
country_iso_code=df_country_info['iso_code'].unique()



'''Dashboard is created by using an external stylesheet named BOOTSTRAP. 
BOOTSTRAP allows us to divide the dashboard into Rows and columns.
COVID-19 dashbord has 5 Rows and 2 columns'''

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = 'COVID-19 Dashboard'

app.layout = html.Div([
        # First Row: Information regarding dashboard page
        dbc.Row(dbc.Col(html.Div(dcc.Markdown('''
                            # Enterprise Data Science: COVID-19 Data Analytics
                            Goals of the project:
                            * To trace the confirmed cases for all the countries
                            * To calculate the doubling rate.
                            * To simulate the spread of COVID-19 in Brazil using SIR model and also for 100+ countries.
                            * To create a user friendly dashboard, which shows the current count of confirmed cases, doubling rate and SIR model.
                            ''')),
                        width={'size': 15, 'offset': 1},
                        )
                ),
        # Second Row: Dropdowns for first two graphs
        dbc.Row(
            [   #Dropdown for Timeline Confirmed and Doubling rate
                dbc.Col(dcc.Dropdown(
                            id='country_dropdown',
                            options=[ {'label': each,'value':each} for each in df_input_large['COUNTRY'].unique()],
                            value=['United Kingdom', 'Germany','India'], # which are pre-selected
                            multi= True),
                        
                        width={'size': 5, "offset": 0, 'order': 'first'}
                        ),

                dbc.Col(dcc.Dropdown(
                            id='country_drop_down',
                            options=[{'label':country_name[each],'value':country_iso_code[each]} for each in range(len(country_name))],
                            value=['USA', 'IND'],
                            multi= True),
                          
                        width={'size': 5, "offset": 6, 'order': 'first'}                         
                        ),

                dbc.Col(
                        dcc.Dropdown(
                            id='doubling_time',
                            options=[
                                {'label': 'Timeline Confirmed ', 'value': 'confirmed'},
                                {'label': 'Timeline Confirmed Filtered', 'value': 'confirmed_filtered'},
                                {'label': 'Timeline Doubling Rate', 'value': 'confirmed_DR'},
                                {'label': 'Timeline Doubling Rate Filtered', 'value': 'confirmed_filtered_DR'}
                            ],
                            value='confirmed',
                            multi=False
                            ),
                        width={'size': 3, "offset": 0, 'order': 'second'}
                        ),

                ], className="g-0",
                #style=dict(display='flex')
        ),
    
        # Third Row: Graphs for cases/confirmed cases/Doubling rate and graph for cases per population:                        
        dbc.Row(
            [
                dbc.Col(dcc.Graph( 
                            id='main_window_slope'
                            ),
                        width=6, md={'size': 5,  "offset": 0, 'order': 'first'}
                        ),

                dbc.Col(dcc.Graph(
                            id='cases_per_pop'
                            ),
                        width=6, md={'size': 5,  "offset": 1, 'order': 'first'}
                        ),                
            ],
                ),

        dbc.Row(
            [
                dbc.Col(dcc.Dropdown(
                        id='country_vacc_data',
                        options=[{'label':country_name[each],'value':country_iso_code[each]} for each in range(len(country_name))],
                        value=['USA', 'IND'],
                        multi= True),
                                
                    width={'size': 5, "offset": 0, 'order': 'second'}                         
                   ),

                #Dropdown for SIR model
                dbc.Col(dcc.Dropdown(
                            id='country_dropdown_sir',
                            options=[ {'label': each,'value':each} for each in df_all[1:]],
                            value='Brazil', # which are pre-selected
                            multi= False
                            ),
                           
                        width={'size': 5, "offset": 6, 'order': 'second'}
                        ),
            ], className="g-0",
                ),
        dbc.Row(
            [
                dbc.Col(dcc.Graph(
                            id='vacc_data'
                            ),
                        width=6, md={'size': 5,  "offset": 0, 'order': 'first'}
                        ),   

                dbc.Col(dcc.Graph(
                            id='SIR_model'
                            ),
                        width=6, md={'size': 5,  "offset": 1, 'order': 'first'}
                        ),
            ],
                ),
        
        dbc.Row(
                dbc.Col(dcc.Graph(id = "World_map",
                              figure = go.Figure(data = [go.Choropleth(
                                        locations = df['CODE'],
                                        z = df['Confirm cases'],
                                        text = df['COUNTRY'],
                                        colorscale = 'Blues',
                                        autocolorscale=False,
                                        reversescale=False,
                                        marker_line_color='darkgray',
                                        marker_line_width=0.5,
                                        colorbar_title = 'Confirmed cases'
                                        )],
                                        layout = go.Layout(
                                        title_text='COVID 19 WORLD MAP',
                                        height=1300,
                                        autosize = True,
                                        geo=dict(
                                            showframe=False,
                                            showcoastlines=False,
                                            projection_type='equirectangular'
                                        ))
                                     ),
                              
                              ),
                        width=12, md={'size': 12,  "offset": 0, 'order': 'first'}
                        ),
         )


])



@app.callback(Output('cases_per_pop', 'figure'),
              [Input('country_drop_down', 'value')])
def Cases_fig(list_cases_country):

    traces = []
    for each in list_cases_country:
        traces.append(
            dict(x=df_list.date,
                 y=df_list['Cases_per_pop_' + each],
                 mode='markers+lines',
                 opacity=0.9,
                 line_width=2,
                 marker_size=1,
                 name=each))

    return {
        'data':
        traces,
        'layout':
        dict(width=1280,
             height=900,
             title='Plot for Cases per Population',
             xaxis={'title':'Date',
                 'tickangle': -45,
                 'nticks': 20,
                 'tickfont': dict(size=14, color='#7f7f7f'),
             },
             yaxis={'title':'Relative COVID Cases (Absolute Cases/Total Population)',
                 'type': 'log',
                 'range': '[1.1, 5.5]'
             })
    }






@app.callback(Output('vacc_data', 'figure'),
              [Input('country_vacc_data', 'value')])
def Vacc_fig(list_cases_country):

    traces = []
    for each in list_cases_country:
        traces.append(
            dict(x=df_vacc_list.date,
                 y=df_vacc_list['Vacc_per_pop_' + each],
                 mode='markers+lines',
                 opacity=0.9,
                 line_width=2,
                 marker_size=1,
                 name=each))

    return {
        'data':
        traces,
        'layout':
        dict(width=1280,
             height=900,
             title='Plot for Vaccination Data',
             xaxis={'title':'Date',
                 'tickangle': -45,
                 'nticks': 20,
                 'tickfont': dict(size=14, color='#7f7f7f'),
             },
             yaxis={'title':'Relative Vaccination(Total Vaccination/Total Population)',
                 'type': 'log',
                 'range': '[1.1, 5.5]'
             })
    }




@app.callback(
    Output('main_window_slope', 'figure'),
    [Input('country_dropdown', 'value'),
    Input('doubling_time', 'value')])

def update_figure(country_list,show_doubling):


    if 'DR' in show_doubling:
        my_yaxis={'type':"log",
               'title':'Approximated doubling rate over 3 days (larger numbers are better #stayathome)'
              }
    else:
        my_yaxis={'type':"log",
                  'title':'Confirmed infected people (source johns hopkins csse, log-scale)'
              }


    traces = []
    for each in country_list:

        df_plot=df_input_large[df_input_large['COUNTRY']==each]

        if show_doubling=='doubling_rate_filtered':
            df_plot=df_plot[['state','COUNTRY','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','date']].groupby(['COUNTRY','date']).agg(np.mean).reset_index()
        else:
            df_plot=df_plot[['state','COUNTRY','confirmed','confirmed_filtered','confirmed_DR','confirmed_filtered_DR','date']].groupby(['COUNTRY','date']).agg(np.sum).reset_index()
       #print(show_doubling)


        traces.append(dict(x=df_plot.date,
                                y=df_plot[show_doubling],
                                mode='markers+lines',
                                opacity=0.9,
                                name=each
                        )
                )

    return {
            'data': traces,
            'layout': dict (
                width=1280,
                height=900,
                xaxis={'title':'Timeline',
                        'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                      },

                yaxis=my_yaxis
        ) 
    }

@app.callback(
    Output('SIR_model', 'figure'),
    [Input('country_dropdown_sir', 'value')])

def SIR_fig(con_input):
    df= df_input_sir
   
    
    for i in df[1:]:
        data = []
        trace = go.Scatter(x=df.date,
                        y=df[con_input],
                        mode='lines+markers',
                        name = con_input)
        data.append(trace)
        
        trace_fitted = go.Scatter(x=df.date,
                        y=df[con_input +'_fitted'], 
                        mode='lines+markers',
                        name=con_input+'_fitted')
        data.append(trace_fitted)
        
        
            
    return {'data': data,
            'layout' : dict(
                width=1280,
                height=900,
                title= 'SIR model',
                xaxis={'tickangle':-45,
                        'nticks':20,
                        'tickfont':dict(size=14,color="#7f7f7f"),
                      },
                yaxis={'type':"log",
                       'range':'[1.1,5.5]'
                      }
                
            )
        }

if __name__ == '__main__':

    app.run_server(debug=True, port= 8051, use_reloader=False)

Repository Exists: Fetch the latest data from repository
Dash is running on http://127.0.0.1:8051/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
