In [15]:
import os
import yaml

import numpy as np
import pandas as pd

pd.options.display.max_rows = 100

In [16]:
artifacts_fpath = "/local-scratch/nigam/projects/lguo/temp_ds_shift_robustness/experiments/baseline/artifacts"
hparams_fpath = "/local-scratch/nigam/projects/lguo/temp_ds_shift_robustness/experiments/baseline/hyperparams/nn.yml"
tasks = {
    'hospital_mortality':'In-Hospital Mortality',
    'LOS_7':'LOS > 7 Days',
    'readmission_30':'Readmission in 30 Days',
    'icu_admission':'ICU Admission'
}
years = {
    '2009_2010_2011_2012':'09-12',
    '2013':'2013',
    '2014':'2014',
    '2015':'2015',
    '2016':'2016',
    '2017':'2017',
    '2018':'2018',
    '2019':'2019',
    '2020':'2020',
    '2021':'2021',
    '2009_2010_2011_2012':'09-12',
    '2010_2011_2012_2013':'10-13',
    '2011_2012_2013_2014':'11-14',
    '2012_2013_2014_2015':'12-15',
    '2013_2014_2015_2016':'13-16',
    '2014_2015_2016_2017':'14-17',
    '2015_2016_2017_2018':'15-18',
    '2009_2010_2011_2012':'09-12',
    '2009_2010_2011_2012_2013':'09-13',
    '2009_2010_2011_2012_2013_2014':'09-14',
    '2009_2010_2011_2012_2013_2014_2015':'09-15',
    '2009_2010_2011_2012_2013_2014_2015_2016':'09-16',
    '2009_2010_2011_2012_2013_2014_2015_2016_2017':'09-17',
    '2009_2010_2011_2012_2013_2014_2015_2016_2017_2018':'09-18',
}

#### Hparam Grid

In [17]:
hparams = yaml.load(
    open(f"{hparams_fpath}"),
    Loader=yaml.FullLoader
)
df_hparams = pd.DataFrame({
    k:[','.join([str(x) for x in v])] for k,v in hparams.items()
})
df_hparams = df_hparams.T
df_hparams.columns = ['Values']

In [18]:
df_hparams

Unnamed: 0,Values
drop_prob,"0.0,0.25,0.5,0.75"
early_stopping,True
early_stopping_patience,10
hidden_dim,128256
lr,"0.0001,1e-05"
num_epochs,150
num_hidden,13
verbose,False
sparse_mode,list


#### Selected hparams

In [19]:
df_hparams = pd.DataFrame()

for task in tasks:
    for year in years:
    
        fpath = os.path.join(
            artifacts_fpath,
            task,
            f"models/nn_{year}"
        )

        best_model_name = [x for x in os.listdir(fpath) if "best_model" in x][0]

        hparams = yaml.load(
            open(f"{fpath}/{best_model_name}/hparams.yml"),
            Loader=yaml.FullLoader
        )

        df_hparams = pd.concat((
            df_hparams,
            pd.DataFrame({k:[v] for k,v in hparams.items()}).assign(
                Task=tasks[task],
                Train_Group=years[year]
            )
        ))
df_params = df_hparams[['Task','Train_Group','hidden_dim','num_hidden','lr','drop_prob']]

In [20]:
df_params.query("Train_Group=='09-12'")

Unnamed: 0,Task,Train_Group,hidden_dim,num_hidden,lr,drop_prob
0,In-Hospital Mortality,09-12,128,3,0.0001,0.75
0,LOS > 7 Days,09-12,128,1,0.0001,0.75
0,Readmission in 30 Days,09-12,256,3,1e-05,0.5
0,ICU Admission,09-12,256,1,1e-05,0.0


In [21]:
df_params

Unnamed: 0,Task,Train_Group,hidden_dim,num_hidden,lr,drop_prob
0,In-Hospital Mortality,09-12,128,3,0.0001,0.75
0,In-Hospital Mortality,2013,256,3,0.0001,0.75
0,In-Hospital Mortality,2014,256,3,0.0001,0.75
0,In-Hospital Mortality,2015,256,3,0.0001,0.75
0,In-Hospital Mortality,2016,256,3,0.0001,0.75
0,In-Hospital Mortality,2017,256,3,0.0001,0.75
0,In-Hospital Mortality,2018,256,3,0.0001,0.75
0,In-Hospital Mortality,2019,256,3,0.0001,0.75
0,In-Hospital Mortality,2020,256,3,0.0001,0.75
0,In-Hospital Mortality,2021,256,3,0.0001,0.75
