In [64]:
import pandas as pd
import numpy as np

In [65]:
##############################################################################
# Load, process, visualise for the separate models
DATA_PATH = '../../data/'
OUT_PATH = '../figures/'
SAMPLES_PATH = '../fitting_outputs/'
RESULTS_PATH = '../results/'
MAX_VAL = 133 
MIN_VAL = 1  # or 1

strat = 'wave'

In [66]:
##############################################################################
# load best models results
df_best_models = pd.read_csv(RESULTS_PATH + 'best_models.csv')
df_best_models = df_best_models.set_index(df_best_models.columns[0]).transpose()

In [67]:
df_best_models

Epidemilogical distribution,icu_stay,hosp_stay,onset_icu,onset_hosp,onset_death
Gamma,1784.604885,13705.279845,3735.841745,6284.611918,460.013954
Lognormal,160.504407,177.901618,928.318456,1156.085625,7.447201
Weibull,3551.564872,19157.024686,5718.721788,15042.612995,2339.510146
Exponential,7568.008197,23439.784061,15372.773116,47793.381182,8631.483925
Gamma 3p,179759.907133,219187.681072,99057.537214,133422.419425,212046.575538
Gen Lognormal,0.0,0.0,0.0,0.0,0.0


In [68]:
##############################################################################
# load the observed data
drop_columns = ['start_date', 'end_date']

df_icu_stay = pd.read_csv(DATA_PATH + 'icu_stay_bog.csv')
df_icu_stay = df_icu_stay[(df_icu_stay['icu_stay'] > MIN_VAL)&(df_icu_stay['icu_stay'] <= MAX_VAL)]

df_hosp_stay = pd.read_csv(DATA_PATH + 'hosp_stay_bog.csv')
df_hosp_stay = df_hosp_stay[(df_hosp_stay['hosp_stay'] > MIN_VAL)&(df_hosp_stay['hosp_stay'].abs() <= MAX_VAL)]

df_onset_icu = pd.read_csv(DATA_PATH + 'onset_icu_bog.csv')
df_onset_icu = df_onset_icu[(df_onset_icu['onset_icu'] > MIN_VAL)&(df_onset_icu['onset_icu'] <= MAX_VAL)]

df_onset_hosp = pd.read_csv(DATA_PATH + 'onset_hosp_bog.csv')
df_onset_hosp = df_onset_hosp[(df_onset_hosp['onset_hosp'] > MIN_VAL)&(df_onset_hosp['onset_hosp'] <= MAX_VAL)]

df_onset_death = pd.read_csv(DATA_PATH + 'onset_death_bog.csv')
df_onset_death = df_onset_death[(df_onset_death['onset_death'] > MIN_VAL)&(df_onset_death['onset_death'] <= MAX_VAL)]

all_dfs = [df_icu_stay, df_hosp_stay, df_onset_icu, df_onset_hosp, df_onset_death]

In [69]:
# clean and preparation of data
for df in all_dfs:
    df.dropna(inplace=True)
    
strat_ages = df_onset_icu['age_group'].unique()
strat_sex = df_onset_icu['sex'].unique()
strat_wave= df_onset_icu['wave'].unique()

strat_sex.sort()
strat_ages.sort()
strat_wave.sort()

strat_sex_map = dict(zip(strat_sex, list(range(1, len(strat_sex)+1))))
strat_sex = list(range(1, len(strat_sex)+1))

strat_ages_map = dict(zip(strat_ages, list(range(1, len(strat_ages)+1))))
strat_ages = list(range(1, len(strat_ages)+1))

strat_wave_map = dict(zip(strat_wave, list(range(1, len(strat_wave)+1))))
strat_wave = list(range(1, len(strat_wave)+1))

if strat=='wave':
    strat_=strat_wave
elif strat=='age':
    strat_=strat_age
elif strat=='sex':
    strat_=strat_sex

columns = []
for df in all_dfs:
    df.dropna(inplace=True) # remove the rows with nan values
    col = str(df.columns[4])
    columns.append(col)
    df['age_group_id'] = df['age_group'].map(strat_ages_map)
    df['sex_id'] = df['sex'].map(strat_sex_map)
    df['wave_id'] = df['wave'].astype(int) 

In [70]:
def q025(x):
    return x.quantile(0.025)

# 90th Percentile
def q975(x):
    return x.quantile(0.95)

In [71]:
stat = ['mean', q95, q05]
##############################################################################
# print n samples and range of data
for df in all_dfs:
    col = str(df.columns[4])
    print(col, len(df[col].index), df[col].min(), '-', df[col].max())
##############################################################################
# load the samples (models fits for every epidemiological distribution)

dist_posteriors  = {'icu_stay':{},
                    'hosp_stay':{},
                    'onset_icu':{},
                    'onset_hosp':{},
                    'onset_death':{}
                   }

for col in columns:
    dist_posteriors[col].update({'Gamma': pd.read_csv(SAMPLES_PATH + col +'-samples-gamma_'+ strat + '.csv').agg(stat)})
    dist_posteriors[col].update({'Lognormal': pd.read_csv(SAMPLES_PATH + col +'-samples-logn_'+ strat + '.csv').agg(stat)})
    dist_posteriors[col].update({'Weibull': pd.read_csv(SAMPLES_PATH + col +'-samples-weibull_'+ strat + '.csv').agg(stat)})
    dist_posteriors[col].update({'Exponential': pd.read_csv(SAMPLES_PATH + col +'-samples-exponential_'+ strat + '.csv').agg(stat)})
    dist_posteriors[col].update({'Gen Lognormal': pd.read_csv(SAMPLES_PATH + col +'-samples-gln_'+ strat + '.csv').agg(stat)})
    dist_posteriors[col].update({'Gamma 3p': pd.read_csv(SAMPLES_PATH + col +'-samples-gamma3p_'+ strat + '.csv').agg(stat)})  

icu_stay 28040 2.0 - 133.0
hosp_stay 66652 2.0 - 133.0
onset_icu 29248 2.0 - 133.0
onset_hosp 77012 2.0 - 131.0
onset_death 26469 2.0 - 133.0


In [72]:
df_res = pd.DataFrame({})
for dist in dist_posteriors:
    best = df_best_models[df_best_models[dist] == 0].index[0]
    df_temp = dist_posteriors[dist][best]
    df_temp = df_temp.reset_index()
    df_temp = df_temp.rename(columns = {'index':'stat'})
    cols = ['t', 'best', 'stat'] + [col for col in df_temp.columns.tolist() if 'mu' in col]
    df_temp['t'] = dist
    df_temp['best'] = best
    df_temp = df_temp[cols]
    df_res = pd.concat([df_res, df_temp])

In [73]:
for col in df_res.columns:
    if 'mu' in col:
        df_res[col] = np.exp(df_res[col]).round(1)

In [74]:
df_res.to_csv(RESULTS_PATH+'best_fit_summary.csv', index = False)