# KA SEIRSPlus
> backtesting on KA data

- toc: true 
- badges: false
- comments: true
- metadata_key1: metadata_value1
- metadata_key2: metadata_value2

In [1]:
# hide
# !pip install seirsplus
# !pip install -U plotly
# !pip install fuzzywuzzy
import contextlib
import io
import json
import random
import sys
import warnings
from pathlib import Path
from typing import List, Union
from urllib.request import urlopen

import pandas as pd
import plotly.graph_objects as go
from branca.colormap import linear
from fuzzywuzzy import process
from IPython.display import Latex, Markdown, display
from IPython.utils import io
from ipywidgets import (
    HTML,
    FloatLogSlider,
    FloatSlider,
    GridBox,
    HBox,
    IntSlider,
    Label,
    Layout,
    Output,
    SelectionSlider,
    VBox,
    interactive,
)
from seirsplus.models import *
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import ParameterGrid
from sklearn.model_selection import ParameterSampler
from tqdm import tqdm

warnings.filterwarnings("ignore")





In [0]:
# hide
ka_df = pd.read_csv("KarnatakaDistrictInfo.csv")
ka_df["Population"] = pd.to_numeric(
    ka_df["Population"].apply(lambda x: x.replace(",", ""))
)

In [0]:
# hide
with urlopen(
    "https://raw.githubusercontent.com/covid19india/api/master/state_district_wise.json"
) as response:
    district = json.load(response)

In [4]:
district_maps = {}
for dis in list(district["Karnataka"]["districtData"].keys()):
    sel = process.extractOne(dis, ka_df["District"])
    if dis == "Kalaburagi":
        district_maps["Gulbarga"] = dis
    else:
        district_maps[sel[0]] = dis

district_maps

{'Bagalkot': 'Bagalkote',
 'Bangalore': 'Bengaluru',
 'Bangalore Rural': 'Bengaluru Rural',
 'Belgaum': 'Belagavi',
 'Bellary': 'Ballari',
 'Bidar': 'Bidar',
 'Chikkaballapura': 'Chikkaballapura',
 'Chitradurga': 'Chitradurga',
 'Dakshina Kannada': 'Dakshina Kannada',
 'Davanagere': 'Davanagere',
 'Dharwad': 'Dharwad',
 'Gadag': 'Gadag',
 'Gulbarga': 'Kalaburagi',
 'Kodagu': 'Kodagu',
 'Mandya': 'Mandya',
 'Mysore': 'Mysuru',
 'Tumkur': 'Tumakuru',
 'Udupi': 'Udupi',
 'Uttara Kannada': 'Uttara Kannada'}

In [0]:
ka_df["CaseCounts"] = ka_df["District"].apply(
    lambda x: district["Karnataka"]["districtData"][district_maps[x]]["confirmed"]
    if x in district_maps.keys()
    else 0
)

In [0]:
# hide
def get_infections(
    initI: int = 100,
    initN: int = 10 ** 5,
    days_N: int = 21,
    beta: float = 2.4,
    sigma=1 / 5.2,
    gamma=1 / 12.39,
    beta_D=0.000,
    mu_D=0.02,
    theta_E=0.0002,
    theta_I=0.002,
    psi_E=0.2,
    psi_I=1.0,
) -> List[int]:
    model = SEIRSModel(
        beta=beta,
        sigma=sigma,
        gamma=gamma,
        initN=initN,
        initI=initI,
        beta_D=beta_D,
        mu_D=mu_D,
        theta_E=theta_E,
        theta_I=theta_I,
        psi_E=psi_E,
        psi_I=psi_I,
    )
    with io.capture_output() as captured:
        model.run(T=days_N)
    S = model.numS  # time series of S counts
    E = model.numE  # time series of E counts
    I = model.numI  # time series of I counts
    D_E = model.numD_E  # time series of D_E counts
    D_I = model.numD_I  # time series of D_I counts
    R = model.numR  # time series of R counts
    F = model.numF  # time series of F counts
    t = model.tseries  # time values corresponding to the above time series
    return {"detected_exposed": D_E, "detected_infected": D_I, "model": model, "t": t}

In [0]:
# hide
def get_risk_estimates(
    state_estimated_df: pd.DataFrame,
    beta: float,
    sigma=1 / 5.2,
    gamma=1 / 12.39,
    beta_D=0.000,
    mu_D=0.02,
    theta_E=0.0002,
    theta_I=0.002,
    psi_E=0.2,
    psi_I=1.0,
) -> Union[List, List]:
    days_N = 21
    atrisk_day14, atrisk_day21 = [], []
    for row in state_estimated_df[["initI", "initN", "District"]].iterrows():
        initI, initN, district = row[1][0], int(row[1][1]), row[1][2]
        #     print(type(initI), type(initN))
        infection_results = get_infections(
            initI=initI,
            initN=initN,
            days_N=days_N,
            beta=beta,
            sigma=sigma,
            gamma=gamma,
            beta_D=beta_D,
            mu_D=mu_D,
            theta_E=theta_E,
            theta_I=theta_I,
            psi_E=psi_E,
            psi_I=psi_I,
        )
        detected_infected = infection_results["detected_infected"]
        day14 = int(14 * len(detected_infected) / days_N)
        case_count_day14 = int(infection_results["detected_infected"][day14])
        case_count_day21 = int(infection_results["detected_infected"][-1])
        atrisk_day14.append(case_count_day14)
        atrisk_day21.append(case_count_day21)
    return atrisk_day14, atrisk_day21, infection_results

In [0]:
# hide_input
# @title Assumptions
percent_travellers_infected = 0.039  # @param {type:"slider", min:0, max:1e-1, step:0.001}
unknown_to_known_travelers = 23  # @param {type:"slider", min:5, max:50, step:1}
beta = 0.6  # @param {type:"slider", min:0.5, max:5.0, step:0.1}

# display(Markdown(f"#Assumptions \npercent_travellers_infected :{percent_travellers_infected}\nunknown_to_known_travelers={unknown_to_known_travelers}\nbeta:{beta} "))

In [0]:
# hide
def estimate(
    percent_travellers_infected: float,
    state_df: pd.DataFrame,
    beta: float,
    bus_to_passenger=0.01, 
    public_bus_to_private_bus=0.01,
    percent_qtn_infected=0.01,
    sigma=1 / 5.2,
    gamma=1 / 12.39,
    beta_D=0.000,
    mu_D=0.02,
    theta_E=0.0002,
    theta_I=0.002,
    psi_E=0.2,
    psi_I=1.0,
):

    state_estimated_df = state_df.copy()
    passengers_infected = public_bus_to_private_bus * bus_to_passenger * percent_travellers_infected
    state_estimated_df["initI"] = (
        passengers_infected + percent_qtn_infected * state_df["Inflow"]
    ).astype(int)
    state_estimated_df["initN"] = state_estimated_df["Population"].astype(int)

    (
        state_estimated_df[f"day14"],
        state_estimated_df[f"day21"],
        infection_results,
    ) = get_risk_estimates(
        state_estimated_df,
        beta=beta,
        sigma=sigma,
        gamma=gamma,
        beta_D=beta_D,
        mu_D=mu_D,
        theta_E=theta_E,
        theta_I=theta_I,
        psi_E=psi_E,
        psi_I=psi_I,
    )
    return state_estimated_df

In [0]:
# hide
grid = {
    
    "beta_D": [i / 1000 for i in range(1, 6)],
    "percent_qtn_infected":[i / 1000 for i in range(1, 100, 10)],
    "public_bus_to_private_bus":[i for i in range(30, 50)],
    "bus_to_passenger":[i for i in range(50, 80, 5)],
    "percent_travellers_infected":[i / 1000 for i in range(1, 100, 10)],
    "mu_D": [i / 100 for i in range(1, 6)],
    "theta_E": [i / 10000 for i in range(1, 6)],
    "beta": [i / 10 for i in range(5, 9)],
    "theta_I": [i / 1000 for i in range(1, 6)],
    "psi_E": [i / 10 for i in range(5)],
    "psi_I": [i / 10 for i in range(5)],
    "sigma": [1 / (i / 10) for i in range(50, 140, 10)],
    "gamma": [1 / (i / 10) for i in range(100, 200, 10)],
}

In [11]:
# hide
list(ParameterSampler(grid,  n_iter=2))

[{'beta': 0.5,
  'beta_D': 0.001,
  'bus_to_passenger': 70,
  'gamma': 0.07142857142857142,
  'mu_D': 0.01,
  'percent_qtn_infected': 0.071,
  'percent_travellers_infected': 0.061,
  'psi_E': 0.0,
  'psi_I': 0.3,
  'public_bus_to_private_bus': 30,
  'sigma': 0.08333333333333333,
  'theta_E': 0.0001,
  'theta_I': 0.003},
 {'beta': 0.6,
  'beta_D': 0.002,
  'bus_to_passenger': 55,
  'gamma': 0.05555555555555555,
  'mu_D': 0.03,
  'percent_qtn_infected': 0.001,
  'percent_travellers_infected': 0.031,
  'psi_E': 0.4,
  'psi_I': 0.2,
  'public_bus_to_private_bus': 46,
  'sigma': 0.14285714285714285,
  'theta_E': 0.0001,
  'theta_I': 0.005}]

In [12]:
df_without_bangalore = ka_df[ka_df.Inflow != 0][ka_df.District !="Bangalore"][ka_df.District !="Bangalore Rural"]
df_more_than_30 = ka_df[ka_df.Inflow != 0][ka_df.CaseCounts >=30]
df_10_to_30 = ka_df[ka_df.Inflow != 0][ka_df.CaseCounts >=10][ka_df.CaseCounts <30]
df_less_than_10 = ka_df[ka_df.Inflow != 0][ka_df.CaseCounts <10]
len(df_without_bangalore) , len(df_more_than_30) , len(df_10_to_30) , len(df_less_than_10)

(25, 2, 2, 23)

In [0]:
# hide
def megs_shitty_grid_search(grid, df, config):
    prev = 1000000
    for combination in tqdm(list(ParameterSampler(grid, n_iter=400))):
        ka_estimated_df = estimate(
                percent_travellers_infected= combination["percent_travellers_infected"],
                state_df= df,
                beta = combination["beta"],
                bus_to_passenger=combination["bus_to_passenger"], 
                public_bus_to_private_bus=combination["public_bus_to_private_bus"],
                percent_qtn_infected=combination["percent_qtn_infected"],
                mu_D=combination["mu_D"], 
                theta_E=combination["theta_E"],
                theta_I=combination["theta_I"],
                psi_E=combination["psi_E"],
                psi_I=combination["psi_I"],
                sigma=combination["sigma"],
                gamma=combination["gamma"]
        )
        error = mean_absolute_error(
            ka_estimated_df["day14"], ka_estimated_df["CaseCounts"]
        )
        if error < prev:
            prev = error
            config = combination
            print(f"error : {error}")
    return config

In [14]:
print(f"Predictions for df_more_than_30")
config_for_more_than_30 = {}
config_for_more_than_30 = megs_shitty_grid_search(grid, df_more_than_30, config_for_more_than_30)
print(config_for_more_than_30) 
ka_estimated_df_more_than_30 = estimate(
                percent_travellers_infected= config_for_more_than_30["percent_travellers_infected"],
                state_df= ka_df,
                beta = config_for_more_than_30["beta"],
                bus_to_passenger=config_for_more_than_30["bus_to_passenger"], 
                public_bus_to_private_bus=config_for_more_than_30["public_bus_to_private_bus"],
                percent_qtn_infected=config_for_more_than_30["percent_qtn_infected"],
                mu_D=config_for_more_than_30["mu_D"], 
                theta_E=config_for_more_than_30["theta_E"],
                theta_I=config_for_more_than_30["theta_I"],
                psi_E=config_for_more_than_30["psi_E"],
                psi_I=config_for_more_than_30["psi_I"],
                sigma=config_for_more_than_30["sigma"],
                gamma=config_for_more_than_30["gamma"]
        )

  2%|▏         | 8/400 [00:00<00:05, 72.48it/s]

Predictions for df_more_than_30
error : 46.5
error : 46.0
error : 40.0
error : 38.5
error : 33.0


  8%|▊         | 30/400 [00:00<00:05, 68.90it/s]

error : 32.0


 12%|█▏        | 46/400 [00:00<00:04, 71.37it/s]

error : 29.5
error : 27.5
error : 20.0


 41%|████▏     | 165/400 [00:02<00:03, 72.95it/s]

error : 17.5


100%|██████████| 400/400 [00:05<00:00, 73.59it/s]


error : 10.5
{'theta_I': 0.005, 'theta_E': 0.0002, 'sigma': 0.1111111111111111, 'public_bus_to_private_bus': 36, 'psi_I': 0.4, 'psi_E': 0.0, 'percent_travellers_infected': 0.091, 'percent_qtn_infected': 0.091, 'mu_D': 0.01, 'gamma': 0.06666666666666667, 'bus_to_passenger': 65, 'beta_D': 0.005, 'beta': 0.8}


In [15]:
ka_estimated_df_more_than_30[["District", "CaseCounts", "day14"]]

Unnamed: 0,District,CaseCounts,day14
0,Bagalkot,5,14
1,Bangalore,58,59
2,Belgaum,7,14
3,Bellary,6,14
4,Bidar,10,14
5,Bijapur,0,14
6,Chamarajanagar,0,14
7,Chikkaballapura,10,14
8,Chikmagalur,0,14
9,Chitradurga,1,14


In [16]:
ka_estimated_df_more_than_30[["District", "CaseCounts", "day14"]][ka_estimated_df_more_than_30.CaseCounts >=30]

Unnamed: 0,District,CaseCounts,day14
1,Bangalore,58,59
21,Mysore,35,15


In [17]:
print(f"Predictions for df_10_to_30")
config_for_10_to_30 = {}
config_for_10_to_30 = megs_shitty_grid_search(grid, df_10_to_30, config_for_10_to_30)
print(config_for_10_to_30) 
ka_estimated_df_10_to_30 = estimate(
                percent_travellers_infected= config_for_10_to_30["percent_travellers_infected"],
                state_df= ka_df,
                beta = config_for_10_to_30["beta"],
                bus_to_passenger=config_for_10_to_30["bus_to_passenger"], 
                public_bus_to_private_bus=config_for_10_to_30["public_bus_to_private_bus"],
                percent_qtn_infected=config_for_10_to_30["percent_qtn_infected"],
                mu_D=config_for_10_to_30["mu_D"], 
                theta_E=config_for_10_to_30["theta_E"]
        )

  2%|▏         | 7/400 [00:00<00:06, 64.84it/s]

Predictions for df_10_to_30
error : 8.5
error : 7.0
error : 4.0


 18%|█▊        | 71/400 [00:00<00:04, 74.40it/s]

error : 2.5


 35%|███▌      | 140/400 [00:02<00:03, 70.43it/s]

error : 1.5


 65%|██████▌   | 261/400 [00:03<00:01, 76.04it/s]

error : 1.0


100%|██████████| 400/400 [00:05<00:00, 73.08it/s]


{'theta_I': 0.004, 'theta_E': 0.0002, 'sigma': 0.125, 'public_bus_to_private_bus': 42, 'psi_I': 0.4, 'psi_E': 0.3, 'percent_travellers_infected': 0.081, 'percent_qtn_infected': 0.001, 'mu_D': 0.01, 'gamma': 0.08333333333333333, 'bus_to_passenger': 50, 'beta_D': 0.002, 'beta': 0.8}


In [18]:
print(config_for_more_than_30)
ka_estimated_df_10_to_30[["District", "CaseCounts", "day14"]]

{'theta_I': 0.005, 'theta_E': 0.0002, 'sigma': 0.1111111111111111, 'public_bus_to_private_bus': 36, 'psi_I': 0.4, 'psi_E': 0.0, 'percent_travellers_infected': 0.091, 'percent_qtn_infected': 0.091, 'mu_D': 0.01, 'gamma': 0.06666666666666667, 'bus_to_passenger': 65, 'beta_D': 0.005, 'beta': 0.8}


Unnamed: 0,District,CaseCounts,day14
0,Bagalkot,5,20
1,Bangalore,58,21
2,Belgaum,7,20
3,Bellary,6,20
4,Bidar,10,20
5,Bijapur,0,20
6,Chamarajanagar,0,20
7,Chikkaballapura,10,20
8,Chikmagalur,0,20
9,Chitradurga,1,20


In [19]:
print(config_for_10_to_30)
ka_estimated_df_10_to_30[["District", "CaseCounts", "day14"]][ka_estimated_df_10_to_30.CaseCounts >=10][ka_estimated_df_10_to_30.CaseCounts <30]

{'theta_I': 0.004, 'theta_E': 0.0002, 'sigma': 0.125, 'public_bus_to_private_bus': 42, 'psi_I': 0.4, 'psi_E': 0.3, 'percent_travellers_infected': 0.081, 'percent_qtn_infected': 0.001, 'mu_D': 0.01, 'gamma': 0.08333333333333333, 'bus_to_passenger': 50, 'beta_D': 0.002, 'beta': 0.8}


Unnamed: 0,District,CaseCounts,day14
4,Bidar,10,20
7,Chikkaballapura,10,20
10,Dakshina Kannada,10,20


In [20]:
print(f"Predictions for df_less_than_10")
config_for_less_than_10 = {}
config_for_less_than_10 = megs_shitty_grid_search(grid, df_less_than_10, config_for_less_than_10)
print(config_for_less_than_10) 
ka_estimated_df_less_than_10 = estimate(
                percent_travellers_infected= config_for_less_than_10["percent_travellers_infected"],
                state_df= ka_df,
                beta = config_for_less_than_10["beta"],
                bus_to_passenger=config_for_less_than_10["bus_to_passenger"], 
                public_bus_to_private_bus=config_for_less_than_10["public_bus_to_private_bus"],
                percent_qtn_infected=config_for_less_than_10["percent_qtn_infected"],
                mu_D=config_for_less_than_10["mu_D"], 
                theta_E=config_for_less_than_10["theta_E"]
        )

Predictions for df_less_than_10


  0%|          | 1/400 [00:00<00:40,  9.87it/s]

error : 2.0869565217391304


 10%|█         | 40/400 [00:03<00:35, 10.20it/s]

error : 2.0434782608695654


 92%|█████████▏| 368/400 [00:35<00:02, 10.92it/s]

error : 1.9130434782608696


100%|██████████| 400/400 [00:38<00:00, 10.26it/s]

{'theta_I': 0.005, 'theta_E': 0.0002, 'sigma': 0.14285714285714285, 'public_bus_to_private_bus': 44, 'psi_I': 0.1, 'psi_E': 0.0, 'percent_travellers_infected': 0.021, 'percent_qtn_infected': 0.051, 'mu_D': 0.01, 'gamma': 0.0625, 'bus_to_passenger': 70, 'beta_D': 0.002, 'beta': 0.6}





In [21]:
ka_estimated_df_less_than_10[["District", "CaseCounts", "day14"]]

Unnamed: 0,District,CaseCounts,day14
0,Bagalkot,5,4
1,Bangalore,58,30
2,Belgaum,7,4
3,Bellary,6,4
4,Bidar,10,4
5,Bijapur,0,4
6,Chamarajanagar,0,4
7,Chikkaballapura,10,4
8,Chikmagalur,0,4
9,Chitradurga,1,4


In [22]:
print(config_for_less_than_10)
ka_estimated_df_less_than_10[["District", "CaseCounts", "day14"]][ka_estimated_df_less_than_10.CaseCounts <10]

{'theta_I': 0.005, 'theta_E': 0.0002, 'sigma': 0.14285714285714285, 'public_bus_to_private_bus': 44, 'psi_I': 0.1, 'psi_E': 0.0, 'percent_travellers_infected': 0.021, 'percent_qtn_infected': 0.051, 'mu_D': 0.01, 'gamma': 0.0625, 'bus_to_passenger': 70, 'beta_D': 0.002, 'beta': 0.6}


Unnamed: 0,District,CaseCounts,day14
0,Bagalkot,5,4
2,Belgaum,7,4
3,Bellary,6,4
5,Bijapur,0,4
6,Chamarajanagar,0,4
8,Chikmagalur,0,4
9,Chitradurga,1,4
11,Davanagere,2,4
12,Dharwad,1,4
13,Gadag,1,4


In [29]:
print(f"Predictions for df_without_bangalore")
config_for_without_bangalore = {}
config_for_without_bangalore = megs_shitty_grid_search(grid, df_without_bangalore, config_for_without_bangalore)
print(config_for_without_bangalore) 
ka_estimated_df_without_bangalore = estimate(
                percent_travellers_infected= config_for_without_bangalore["percent_travellers_infected"],
                state_df= ka_df,
                beta = config_for_without_bangalore["beta"],
                bus_to_passenger=config_for_without_bangalore["bus_to_passenger"], 
                public_bus_to_private_bus=config_for_without_bangalore["public_bus_to_private_bus"],
                percent_qtn_infected=config_for_without_bangalore["percent_qtn_infected"],
                mu_D=config_for_without_bangalore["mu_D"], 
                theta_E=config_for_without_bangalore["theta_E"]
        )

  0%|          | 1/400 [00:00<00:41,  9.66it/s]

Predictions for df_without_bangalore
error : 4.12


  1%|▏         | 5/400 [00:00<00:43,  9.02it/s]

error : 3.92


  7%|▋         | 27/400 [00:02<00:39,  9.52it/s]

error : 3.84


 10%|█         | 41/400 [00:04<00:38,  9.39it/s]

error : 3.8


 43%|████▎     | 172/400 [00:18<00:24,  9.21it/s]

error : 3.76


100%|██████████| 400/400 [00:42<00:00,  9.37it/s]


{'theta_I': 0.004, 'theta_E': 0.0001, 'sigma': 0.08333333333333333, 'public_bus_to_private_bus': 33, 'psi_I': 0.2, 'psi_E': 0.4, 'percent_travellers_infected': 0.051, 'percent_qtn_infected': 0.081, 'mu_D': 0.04, 'gamma': 0.08333333333333333, 'bus_to_passenger': 65, 'beta_D': 0.001, 'beta': 0.5}


In [30]:
ka_estimated_df_without_bangalore[["District", "CaseCounts", "day14"]]

Unnamed: 0,District,CaseCounts,day14
0,Bagalkot,5,5
1,Bangalore,58,32
2,Belgaum,7,5
3,Bellary,6,5
4,Bidar,10,5
5,Bijapur,0,5
6,Chamarajanagar,0,5
7,Chikkaballapura,10,5
8,Chikmagalur,0,5
9,Chitradurga,1,5
