# Transforming files to parameters.
## Should run after "Contact Patterns" notebook!!

### In order to run this notebook One should make sure the above files exist:
* Data/raw
    * Data/raw/asymptomatic_proportions.csv
    * Data/raw/population_size.csv                    
    * Data/raw/init_i_italy.csv                                
    * Data/raw/IL_initial_eps.csv                     
    * Data/raw/vent_proba.csv 
    * Data/raw/hospitalizations.csv
    * Data/raw/risk_dist.csv
    
* Data/base_contact_mtx
    * Data/base_contact_mtx/full_home_mtx.csv
    * Data/base_contact_mtx/full_leisure_mtx_no_school.csv
    * Data/base_contact_mtx/full_leisure_mtx_no_work.csv
    * Data/base_contact_mtx/full_leisure_mtx_routine.csv
    * Data/base_contact_mtx/full_work_mtx_no_school.csv
    * Data/base_contact_mtx/full_work_mtx_no_work.csv
    * Data/base_contact_mtx/full_work_mtx_routine.csv
    
* Data/demograph
    * Data/demograph/age_dist_area.csv
    * Data/demograph/religion_dis.csv
    * Data/demograph/sick_prop.csv

### This Notebook generates the above files:
* Data/parameters
    - Data/parameters/f0_full.pickle
    - Data/parameters/eps_dict.pickle
    - Data/parameters/eps_by_region.pickle
    - Data/parameters/hospitalization.pickle
    - Data/parameters/vents_proba.pickle
    - Data/parameters/C_calibration.pickle
    - Data/parameters/orthodox_dist.pickle
    - Data/parameters/init_pop.pickle

# Imports

In [2]:
import numpy as np
import pandas as pd
from matplotlib.patches import Patch
import itertools
import pickle
from matplotlib import pyplot as plt
import datetime
import scipy
from scipy import optimize
from scipy.sparse import csr_matrix
import sys
import os
sys.path.append('../SEIR_full/')
sys.path.append('..')
from SEIR_full.indices import *
%matplotlib inline

In [3]:
with (open('../Data/parameters/indices.pickle', 'rb')) as openfile:
    ind = pickle.load(openfile)

# Asymptomatic

In [4]:
asymp = pd.read_csv('../Data/raw/asymptomatic_proportions.csv', index_col=0)
asymp.head()

Unnamed: 0_level_0,Scenario 1,Scenario 2,Scenario 3,Unnamed: 4
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0-4,0.957467,0.765973,0.612779,
5-9,0.957467,0.765973,0.612779,
10-19,0.972167,0.777734,0.622187,
20-29,0.921424,0.737139,0.589711,
30-39,0.899804,0.719843,0.575874,


In [5]:
f0_full = {} # dict that contains the possible scenarios

# asymptomatic with risk group, high risk with 0
f_init = np.zeros(len(list(itertools.product(ind.R.values(), ind.A.values()))))
for i in [1,2,3]:
    f_tmp = f_init.copy()
    f_tmp[9:] = asymp['Scenario '+ str(i)].values[:-1]
    f0_full['Scenario'+ str(i)] = expand_partial_array(ind.risk_age_dict, f_tmp, len(ind.N))

In [6]:
# Save
try:
    os.mkdir('../Data/parameters')
except:
    pass
with open('../Data/parameters/f0_full.pickle', 'wb') as handle:
    pickle.dump(f0_full, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Initial illness

In [7]:
# Age dist. positive specimens
age_dist = {'0-4':0.02, '5-9':0.02, '10-19':0.11, '20-29':0.23, '30-39':0.15, '40-49':0.14, '50-59':0.14, '60-69':0.11,
           '70+':0.08}

In [8]:
age_dist_area = pd.read_csv('../Data/demograph/age_dist_area.csv')

In [9]:
age_dist_area.drop(['Unnamed: 0'],axis=1, inplace=True)

In [10]:
age_dist_area.set_index('cell_id', inplace=True)

In [11]:
age_dist_area

Unnamed: 0_level_0,0-4,5-9,10-19,20-29,30-39,40-49,50-59,60-69,70+
cell_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2000001,0.000667,0.000689,0.001106,0.000712,0.000686,0.000605,0.000331,0.000226,0.000140
2000002,0.000997,0.001007,0.001857,0.001312,0.000979,0.000809,0.000527,0.000280,0.000198
2000003,0.001051,0.001049,0.001906,0.001402,0.000940,0.000761,0.000507,0.000249,0.000154
2000004,0.000251,0.000242,0.000445,0.000418,0.000284,0.000231,0.000205,0.000125,0.000114
2000005,0.000384,0.000401,0.000760,0.000698,0.000489,0.000423,0.000323,0.000232,0.000206
...,...,...,...,...,...,...,...,...,...
2000246,0.000750,0.000725,0.001054,0.000800,0.000673,0.000422,0.000327,0.000238,0.000135
2000247,0.000724,0.000647,0.000742,0.000653,0.000539,0.000297,0.000201,0.000154,0.000066
2000248,0.000269,0.000276,0.000434,0.000325,0.000306,0.000271,0.000199,0.000203,0.000092
2000249,0.000253,0.000276,0.000414,0.000299,0.000306,0.000313,0.000223,0.000179,0.000107


In [12]:
age_dist_area = age_dist_area.stack()

In [13]:
age_dist_area

cell_id       
2000001  0-4      0.000667
         5-9      0.000689
         10-19    0.001106
         20-29    0.000712
         30-39    0.000686
                    ...   
2000250  30-39    0.000285
         40-49    0.000189
         50-59    0.000179
         60-69    0.000167
         70+      0.000092
Length: 2169, dtype: float64

In [14]:
init_pop = expand_partial_array(ind.region_age_dict, age_dist_area.values, len(ind.N))
init_pop.shape

(8676,)

In [15]:
init_pop[ind.inter_dict['Intervention']]=0
init_pop

array([0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,
       1.78939857e-04, 1.67125238e-04, 9.18787343e-05])

In [16]:
risk_pop = pd.read_csv('../Data/raw/risk_dist.csv')
risk_pop.set_index('Age', inplace=True)
risk_pop['High'] = risk_pop['risk']
risk_pop['Low'] = 1 - risk_pop['risk']
risk_pop.drop(['risk'], axis=1, inplace=True)
risk_pop = risk_pop.stack()
risk_pop.index = risk_pop.index.swaplevel(0, 1)
risk_pop = risk_pop.unstack().stack()
risk_pop

      Age  
High  0-4      0.050
      5-9      0.106
      10-19    0.106
      20-29    0.149
      30-39    0.149
      40-49    0.149
      50-59    0.330
      60-69    0.421
      70+      0.512
Low   0-4      0.950
      5-9      0.894
      10-19    0.894
      20-29    0.851
      30-39    0.851
      40-49    0.851
      50-59    0.670
      60-69    0.579
      70+      0.488
dtype: float64

In [17]:
for (r, a), g_idx in zip(ind.risk_age_dict.keys(), ind.risk_age_dict.values()):
    init_pop[g_idx] = init_pop[g_idx] * risk_pop[r,a]
    
# Age distribution:
pop_dist = init_pop

In [18]:
# Save
with open('../Data/parameters/init_pop.pickle', 'wb') as handle:
    pickle.dump(pop_dist, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [19]:
len(ind.R)*len(ind.A)

18

In [20]:
# risk distribution by age:
risk_dist = pd.read_csv('../Data/raw/population_size.csv')

init_I_dis_italy = pd.read_csv('../Data/raw/init_i_italy.csv')['proportion'].values[:-1]

f_init = pd.read_pickle('../Data/parameters/f0_full.pickle')

eps_t = {}

In [21]:
init_I_dis_italy.sum()

0.9999999999999999

In [22]:
init_I_IL = {}
init_I_dis = {}
for i in [1,2,3]:
    scen = 'Scenario'+str(i)
    f_init_i = f_init[scen][:(len(ind.R)*len(ind.A))]
    init_I_IL[scen] = (491./(1-(f_init_i * risk_dist['pop size'].values).sum())) / 9136000.
    init_I_dis[scen] = init_I_dis_italy * init_I_IL[scen]

In [23]:
for i in [1,2,3]:
    scen = 'Scenario'+str(i)
    eps_t[scen] = []
    for i in range(1000):
        eps_t[scen].append(init_I_dis[scen][i] * pop_dist if i < len(init_I_dis[scen]) else 0)

In [24]:
# Save
with open('../Data/parameters/eps_dict.pickle', 'wb') as handle:
    pickle.dump(eps_t, handle, protocol=pickle.HIGHEST_PROTOCOL)

## eps by region proportion

In [25]:
# Loading data
region_prop = pd.read_csv('../Data/demograph/sick_prop.csv', index_col=0)['cases_prop'].copy()
region_prop.index = region_prop.index.astype(str)
risk_prop = pd.read_csv('../Data/raw/risk_dist.csv', index_col=0)['risk'].copy()
print(region_prop.head())
print()
print(risk_prop.head())

cell_id
2000001    0.137496
2000002    0.137496
2000003    0.137496
2000004    0.137496
2000005    0.137496
Name: cases_prop, dtype: float64

Age
0-4      0.050
5-9      0.106
10-19    0.106
20-29    0.149
30-39    0.149
Name: risk, dtype: float64


In [26]:
eps_t_region = {}

In [27]:
for sc, init_I in zip(init_I_dis.keys(), init_I_dis.values()):
    eps_temp = []
    for t in range(1000):
        if t < len(init_I):
            # empty array for day t
            day_vec = np.zeros(len(ind.N))
            # fill in the array, zero for intervention groups
            for key in ind.N.keys():
                if ind.N[key][0] == 'Intervention':
                    day_vec[key] = 0
                else:
                    day_vec[key] = init_I[t] * region_prop[ind.N[key][1]] * age_dist[ind.N[key][3]] * \
                    (risk_prop[ind.N[key][3]]**(1 - (ind.N[key][2] == 'Low'))) * \
                    ((1 - risk_prop[ind.N[key][3]])**(ind.N[key][2] == 'Low'))
            eps_temp.append(day_vec)
        else:
             eps_temp.append(0.0)
        
        eps_t_region[sc] = eps_temp


In [28]:
# save eps:
with open('../Data/parameters/eps_by_region.pickle', 'wb') as handle:
    pickle.dump(eps_t_region, handle, protocol=pickle.HIGHEST_PROTOCOL)  

## Smoothed eps by region -------------- UNFINISHED -------------------

In [29]:
########## UNFINISHED ############
init_eps = pd.read_csv('../Data/raw/IL_initial_eps.csv', header=None).values[:,0].copy()
init_eps

array([  0.78180071,   2.94105921,   5.84488904,   8.5253491 ,
        12.02483762,  17.497441  ,  24.38473153,  33.09622265,
        41.99385742,  39.83459892,  75.79742365,  93.14595037,
       114.9991361 , 143.6279275 , 172.5917768 , 219.0158361 ,
       274.5609032 , 341.4606921 ])

In [135]:
init_eps_asymp = {}
for i in [1,2,3]:
    scen = 'Scenario'+str(i)
    f_init_i = f_init[scen][:(len(R)*len(A))]
    init_eps_asymp[scen] = (init_eps/(1-(f_init_i * risk_dist['pop size'].values).sum())) / 9136000.

In [136]:
f_init['Scenario3'][:(len(R)*len(A))]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.6127786 ,
       0.6127786 , 0.62218712, 0.58971125, 0.57587446, 0.59021178,
       0.56415614, 0.54046208, 0.14933333])

In [137]:
init_eps_asymp['Scenario3'].sum()*9136000

3003.15972721764

In [138]:
def fill_eps_day(init_eps, dic=mdl.N, age_)

SyntaxError: invalid syntax (<ipython-input-138-958ad179e108>, line 1)

In [None]:
eps_reg = []


# Hospitalizations

In [30]:
hosp_init = pd.read_csv('../Data/raw/hospitalizations.csv')
hosp_init

Unnamed: 0,Age,risk,pr_hosp
0,0-4,High,0.2
1,05-Sep,High,0.0
2,Oct-19,High,0.0
3,20-29,High,0.195652
4,30-39,High,0.259259
5,40-49,High,0.27619
6,50-59,High,0.300926
7,60-69,High,0.328125
8,70+,High,0.530892
9,0-4,Low,0.061776


In [31]:
hosp = expand_partial_array(ind.risk_age_dict, hosp_init['pr_hosp'].values, len(ind.N))

In [32]:
hosp

array([0.2       , 0.        , 0.        , ..., 0.17814371, 0.26388889,
       0.52361673])

In [33]:
# Save
with open('../Data/parameters/hospitalization.pickle', 'wb') as handle:
    pickle.dump(hosp, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Ventilators

In [34]:
vents_init = pd.read_csv('../Data/raw/vent_proba.csv')
vents_init

Unnamed: 0,Age,risk,pr_vents
0,0-4,High,0.0
1,05-Sep,High,0.0
2,Oct-19,High,0.0
3,20-29,High,0.0
4,30-39,High,0.014159
5,40-49,High,0.041329
6,50-59,High,0.04063
7,60-69,High,0.044061
8,70+,High,0.157187
9,0-4,Low,0.0


In [35]:
vent = expand_partial_array(ind.risk_age_dict, vents_init['pr_vents'].values, len(ind.N))

In [36]:
vent

array([0.        , 0.        , 0.        , ..., 0.00748931, 0.02713968,
       0.12922653])

In [37]:
# Save
with open('../Data/parameters/vents_proba.pickle', 'wb') as handle:
    pickle.dump(vent, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Calibration contact matrix

Important : here no intervention mean the course of interventions made by government

In [49]:
full_mtx_home = scipy.sparse.load_npz('../Data/base_contact_mtx/full_home.npz')

full_mtx_work = {
    'routine': scipy.sparse.load_npz('../Data/base_contact_mtx/full_work_routine.npz'),
    'no_school': scipy.sparse.load_npz('../Data/base_contact_mtx/full_work_no_school.npz'),
    'no_work': scipy.sparse.load_npz('../Data/base_contact_mtx/full_work_no_work.npz'),
    'no_100_meters': scipy.sparse.load_npz('../Data/base_contact_mtx/full_work_no_100_meters.npz'),
    'no_bb': scipy.sparse.load_npz('../Data/base_contact_mtx/full_work_no_bb.npz'),
}

full_mtx_leisure = {
    'routine': scipy.sparse.load_npz('../Data/base_contact_mtx/full_leisure_routine.npz'),
    'no_school': scipy.sparse.load_npz('../Data/base_contact_mtx/full_leisure_no_school.npz'),
    'no_work': scipy.sparse.load_npz('../Data/base_contact_mtx/full_leisure_no_work.npz'),
    'no_100_meters': scipy.sparse.load_npz('../Data/base_contact_mtx/full_leisure_no_100_meters.npz'),
    'no_bb': scipy.sparse.load_npz('../Data/base_contact_mtx/full_leisure_no_bb.npz'),
}

In [50]:
C_calibration = {}
d_tot = 500

In [51]:
# no intervation are null groups
home_no_inter = []
work_no_inter = []
leis_no_inter = []

for i in range(d_tot):
    home_no_inter.append(csr_matrix((full_mtx_home.shape[0], full_mtx_home.shape[1])))
    work_no_inter.append(csr_matrix((full_mtx_work['routine'].shape[0], full_mtx_work['routine'].shape[1])))
    leis_no_inter.append(csr_matrix((full_mtx_leisure['routine'].shape[0], full_mtx_leisure['routine'].shape[1])))

In [52]:
# Intervantion
home_inter = []
work_inter = []
leis_inter = []

# first days of routine from Feb 21st - March 13th
d_rout = 9+13
for i in range(d_rout):
    home_inter.append(full_mtx_home)
    work_inter.append(full_mtx_work['routine'])
    leis_inter.append(full_mtx_leisure['routine'])

# first days of no school from March 14th - March 16th
d_no_school = 3
for i in range(d_no_school):
    home_inter.append(full_mtx_home)
    work_inter.append(full_mtx_work['no_school'])
    leis_inter.append(full_mtx_leisure['no_school'])

# without school and work from March 17th - March 25th
d_no_work = 9
for i in range(d_no_work):
    home_inter.append(full_mtx_home)
    work_inter.append(full_mtx_work['no_work'])
    leis_inter.append(full_mtx_leisure['no_work'])

# 100 meters constrain from March 26th - April 2nd
d_no_100_meters = 8
for i in range(d_no_100_meters):
    home_inter.append(full_mtx_home)
    work_inter.append(full_mtx_work['no_100_meters'])
    leis_inter.append(full_mtx_leisure['no_100_meters'])
    
# Bnei Brak quaranrine from April 3rd
for i in range(d_tot-d_no_school-d_rout-d_no_work-d_no_100_meters):
    home_inter.append(full_mtx_home)
    work_inter.append(full_mtx_work['no_bb'])
    leis_inter.append(full_mtx_leisure['no_bb'])

In [53]:
C_calibration['home_inter'] = home_no_inter
C_calibration['work_inter'] = work_no_inter
C_calibration['leisure_inter'] = leis_no_inter
C_calibration['home_non'] = home_inter
C_calibration['work_non'] = work_inter
C_calibration['leisure_non'] = leis_inter

In [54]:
# Save
with open('../Data/parameters/C_calibration.pickle', 'wb') as handle:
    pickle.dump(C_calibration, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Haredi vector

In [52]:
# Loading raw data
hared_dis = pd.read_csv('../Data/demograph/religion_dis.csv', index_col=0)[['cell_id','Orthodox']].copy()
hared_dis.set_index('cell_id', inplace=True)
hared_dis.index = hared_dis.index.astype(str)
hared_dis.head()

Unnamed: 0_level_0,Orthodox
cell_id,Unnamed: 1_level_1
2000001,0.203647
2000002,0.283772
2000003,0.281337
2000004,0.286254
2000005,0.284137


In [53]:
# Creating model orthodox dist. and save it as pickle
model_orthodox_dis = np.zeros(len(ind.GA))
for i in ind.GA.keys():
    model_orthodox_dis[i] = hared_dis.loc[str(ind.GA[i][0])]
    
with open('../Data/parameters/orthodox_dist.pickle', 'wb') as handle:
    pickle.dump(model_orthodox_dis, handle, protocol=pickle.HIGHEST_PROTOCOL)  

In [54]:
model_orthodox_dis

array([0.20364652, 0.20364652, 0.20364652, ..., 0.08073126, 0.08073126,
       0.08073126])