# Age-structured ICU demand sampler

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import linear_model
from scipy.optimize import minimize 
from datetime import timedelta
import datetime as dt
from IPython.display import Image
import requests

In [11]:
name = 'Blumenau'
state_or_city = 'city'
pop0 = 	357199
p_SUS = 0.7948

state = 'SC'
name_file = 'Blumenau'

## Data

### Brazil

#### COVID-19 DATA

Data source: [Brasil.IO](https://brasil.io/dataset/covid19/caso). Observe the database changes every day.



In [12]:
#url = "https://brasil.io/dataset/covid19/caso/?format=csv"
#filename = 'data/brazil_' + url.split("/")[-3] + '.csv'
#with open(filename, "wb") as f:
#    r = requests.get(url)
#    f.write(r.content)

To keep the same analysis, we keep the data basis from June 11.

In [13]:
filename = 'data/brazil_covid19_06_11.csv'

df = pd.read_csv(filename)
df.head()

Unnamed: 0,date,state,city,place_type,confirmed,deaths,is_last,estimated_population_2019,city_ibge_code,confirmed_per_100k_inhabitants,death_rate
0,2020-06-10,AC,Acrelândia,city,167,2,True,15256.0,1200013.0,1094.65128,0.012
1,2020-06-10,AC,Assis Brasil,city,79,4,True,7417.0,1200054.0,1065.12067,0.0506
2,2020-06-10,AC,Brasiléia,city,176,5,True,26278.0,1200104.0,669.76178,0.0284
3,2020-06-10,AC,Bujari,city,90,1,True,10266.0,1200138.0,876.6803,0.0111
4,2020-06-10,AC,Capixaba,city,73,2,True,11733.0,1200179.0,622.17677,0.0274


Select a particular state or city:

In [14]:
df = df[ df['place_type'] == state_or_city]
df = df[ df[state_or_city] == name ]

df_I = df.groupby('date')[['confirmed']].sum()
df_I.index = pd.to_datetime(df_I.index)
today = df_I.index[-1] + timedelta(days=1)

Last day of data used to run this notebook:

In [15]:
fit_until = df_I.index[-1].strftime('%m-%d')
fit_until

'06-10'

#### Population data combined with ICU adimission probability by age

Data source: [IBGE](https://www.ibge.gov.br/apps/populacao/projecao/).

In [16]:
# State of São Paulo
file = 'data/pop_age_str_IBGE_2020_' + state + '.csv'

# Other states
# file = 'data/pop_age_str_IBGE_2020_' + name + '.csv'

df_age = pd.read_csv(file)
df_age.loc[0, 'Age'] = '00-04'
df_age.loc[1, 'Age'] = '05-09'
df_age['AGE_prob'] = df_age['Total'] / df_age['Total'].sum()

# Selected state
#pop0 = df_age['Total'].sum().item()

In [17]:
df_age_ICU = pd.DataFrame(columns=['Age', 'ICU_prob'])
df_age_ICU['Age'] = ['0-19', '20-44', '45-54', '55-64', '65-74', '75-84', '85+']
df_age_ICU['ICU_prob'] = [0, 4.2, 10.4, 11.2, 18.8, 31, 29]

In [18]:
ICU_prob = [0., 0., 0., 0., 0.042, 
            0.042, 0.042, 0.042, 0.042, 0.104,
            0.104, 0.112, 0.112, 0.188, 0.188,
            0.31, 0.31, 0.29, 0.29]

df_age['ICU_prob'] = ICU_prob

## Sampling from age-structed population probability and ICU admission probability

Function to be used to performe both sampling over time.

In [19]:
def ICU_samp(df, n, n_samp_AGE_max= 1000, n_samp_AGE_min= 100, n_samp_ICU_max= 1000, n_samp_ICU_min= 100):

    df_samp = pd.DataFrame(columns= df['Age'])

    for j in range(n_samp_AGE_max):
    
        samp = np.random.choice(df['Age'], 
                                n, 
                                p= list(df['AGE_prob']) )

        unique, counts = np.unique(samp, return_counts= True)
    
        for l in range(len(unique)):
            df_samp.loc[j, unique[l]] = counts[l]


    df_samp = df_samp.fillna(0)
    df['n_mean'] = list(df_samp.mean(axis= 0))
    df['n_std']  = list(df_samp.std(axis =0))

    df = df.set_index('Age')

    for age in df.index:
    
        aux_ = []

        for j in range(n_samp_ICU_max):      
    
            samp = np.random.uniform(size= int(df.loc[age]['n_mean']))  
            samp_ICU = samp < df.loc[age]['ICU_prob']
            aux_.append(samp_ICU.sum())


        df.loc[age, 'n_mean_ICU']  = np.mean(aux_)
        df.loc[age, 'n_std_ICU']   = np.std(aux_)
        
    df['n_std_ICU'] = np.sqrt( df['n_std']**2 +  df['n_std_ICU']**2)
    
    return df

Running the function over both scenarios:

In [20]:
df1_ = []

for j in range(len(df_I)):
    
    df1 = ICU_samp(df= df_age.reset_index(), 
                   n= df_I.iloc[j][0], 
                   n_samp_AGE_max= 100, n_samp_AGE_min= 100,
                   n_samp_ICU_max= 100, n_samp_ICU_min= 100)

    
    df1_.append(df1)

### Taking into account removal from ICU after `T_ICU` days

In [21]:
def correction(x, df_, T_ICU= 14):

    df_[x]['n_mean_ICU_cor'] = 0.
    df_[x]['n_std_ICU_cor'] = 0.
    
    if x <= T_ICU:
                   
        df_[x]['n_mean_ICU_cor'] = df_[x]['n_mean_ICU']
        df_[x]['n_std_ICU_cor']   = df_[x]['n_std_ICU']
             
    else:
        
        delta = df_[x]['n_mean_ICU'] - df_[x - T_ICU]['n_mean_ICU']
        
        df_[x]['n_mean_ICU_cor'] = np.heaviside(delta, 0) * delta
        df_[x]['n_std_ICU_cor']  = np.sqrt(df_[x]['n_std_ICU']**2 + df_[x - T_ICU]['n_std_ICU']**2)

In [22]:
T_ICU = 14

for x in range(len(df1_)):
    
    correction(x, df_= df1_, T_ICU= T_ICU)

## Collecting daily averages

Given a list `df_` of dataframes, the function `daily_av` collect averages of collumns values over a `timeseries_data` period. The return is a daaframe called `df_ICU`.

In [23]:
def daily_av(df_, timeseries_data, SUS= True, p_SUS= 0.6278, n_samp_max= 1000, n_samp_min= 100):

    n_mean_    = []
    n_std_     = []
    n_mean_ICU_ = []
    n_std_ICU_  = []

    for j in range(len(df_)):

        n_mean_.append(df_[j]['n_mean'].sum())
        n_std_.append( np.sqrt((df_[j]['n_std']**2).sum()) )
        
        n_mean_ICU_.append(df_[j]['n_mean_ICU_cor'].sum())
        n_std_ICU_.append( np.sqrt((df_[j]['n_std_ICU_cor']**2).sum()) )

    names = ['date', 'n_mean', 'n_std', 'n_mean_ICU', 'n_std_ICU']
    df_ICU = pd.DataFrame(columns= names)
    
    #df_ICU['date'] = timeseries_data
    df_ICU['date'] = df_I.index

    
    df_ICU['n_mean']     = n_mean_ 
    df_ICU['n_std']      = n_std_
    df_ICU['n_mean_ICU'] = n_mean_ICU_
    df_ICU['n_std_ICU']  = n_std_ICU_

    df_ICU = df_ICU.set_index(['date'])


    if SUS:

        for date in df_ICU.index:
            
            aux_ = []

            for j in range(n_samp_max):

                samp = np.random.uniform(size= int(df_ICU.loc[date]['n_mean_ICU']))  
                SUS_samp = samp <= p_SUS
                aux_.append(SUS_samp.sum())



            df_ICU.loc[date, 'n_mean_ICU_SUS'] = np.mean(aux_)
            df_ICU.loc[date, 'n_std_ICU_SUS']  = np.std(aux_)

        df_ICU['n_std_ICU_SUS'] = np.sqrt( df_ICU['n_std_ICU']**2 +  df_ICU['n_std_ICU_SUS']**2)

    return df_ICU

In [24]:
df1_ICU = daily_av(df1_, timeseries_data= df_I.index, SUS= True, p_SUS= p_SUS, n_samp_max= 100, n_samp_min= 100)

In [25]:
df1_ICU

Unnamed: 0_level_0,n_mean,n_std,n_mean_ICU,n_std_ICU,n_mean_ICU_SUS,n_std_ICU_SUS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-03-22,6.0,2.400168,0.00,2.400168,0.00,2.400168
2020-03-23,7.0,2.535326,0.00,2.535326,0.00,2.535326
2020-03-24,7.0,2.553033,0.00,2.553033,0.00,2.553033
2020-03-25,9.0,2.923295,0.00,2.923295,0.00,2.923295
2020-03-26,12.0,3.368601,0.09,3.381327,0.00,3.381327
...,...,...,...,...,...,...
2020-06-06,699.0,25.360066,12.35,35.015436,9.67,35.047994
2020-06-07,699.0,25.260653,14.56,34.408960,11.02,34.443811
2020-06-08,698.0,26.264851,12.49,34.757783,9.60,34.785967
2020-06-09,735.0,26.157924,14.01,34.898556,11.22,34.931086


## Private ICU beds

In [26]:
df1_ICU['n_mean_ICU_PRIVATE'] = df1_ICU['n_mean_ICU'] - df1_ICU['n_mean_ICU_SUS'] 
df1_ICU['n_std_ICU_PRIVATE'] = np.sqrt(  df1_ICU['n_std_ICU']**2 + df1_ICU['n_std_ICU_SUS']  )

## Saving the results. 

In [27]:
file1 = 'results/dfs/df_ICU_' + state_or_city + '_' + name_file + '_fit_until_' + fit_until + '.pkl'
df1_ICU.to_pickle(file1) 