In [1]:
import os
import yaml

import numpy as np
import pandas as pd

pd.options.display.max_rows = 100

In [2]:
artifacts_fpath = "/local-scratch/nigam/projects/lguo/temp_ds_shift_robustness/clmbr/experiments/baseline/artifacts"
hparams_fpath = "/local-scratch/nigam/projects/lguo/temp_ds_shift_robustness/clmbr/experiments/baseline/hyperparams/"

tasks = {
    'hospital_mortality':'In-Hospital Mortality',
    'LOS_7':'LOS > 7 Days',
    'readmission_30':'Readmission in 30 Days',
    'icu_admission':'ICU Admission'
}

models = {
    'lr':'Logistic Regression',
    'gbm':'LightGBM'
}

years = {
    '2009_2010_2011_2012':'09-12',
    '2013':'2013',
    '2014':'2014',
    '2015':'2015',
    '2016':'2016',
    '2017':'2017',
    '2018':'2018',
    '2019':'2019',
    '2020':'2020',
    '2021':'2021',
    '2009_2010_2011_2012':'09-12',
    '2010_2011_2012_2013':'10-13',
    '2011_2012_2013_2014':'11-14',
    '2012_2013_2014_2015':'12-15',
    '2013_2014_2015_2016':'13-16',
    '2014_2015_2016_2017':'14-17',
    '2015_2016_2017_2018':'15-18',
    '2009_2010_2011_2012':'09-12',
}

#### Hparam Grid

In [7]:
df_hparams=pd.DataFrame()
for model in models:
    
    hparams = yaml.load(
        open(f"{hparams_fpath}{model}.yml"),
        Loader=yaml.FullLoader
    )
    
    df = pd.DataFrame({
        k:[','.join([str(x) for x in v])] for k,v in hparams.items()
    })
    
    df = df.T
    df.columns=['Values']
    df['Model']=models[model]
    df_hparams=pd.concat((df_hparams,df))

In [12]:
df_hparams

Unnamed: 0,Values,Model
C,"1e-06,1e-05,0.0001,0.001,0.01,0.1,1,10,100",Logistic Regression
max_iter,10000,Logistic Regression
learning_rate,"0.1,0.2,0.01",LightGBM
num_leaves,100300,LightGBM
n_estimators,1000,LightGBM
max_depth,-1,LightGBM
boosting_type,"gbdt,dart,goss",LightGBM
objective,binary,LightGBM
metric,binary_logloss,LightGBM
first_metric_only,True,LightGBM


#### Selected hparams

In [14]:
for model in models:
    df_hparams = pd.DataFrame()
    for task in tasks:
        for year in years:

            fpath = os.path.join(
                artifacts_fpath,
                task,
                f"models/{model}_{year}"
            )

            best_model_name = [x for x in os.listdir(fpath) if "best_model" in x][0]

            hparams = yaml.load(
                open(f"{fpath}/{best_model_name}/hparams.yml"),
                Loader=yaml.FullLoader
            )

            df_hparams = pd.concat((
                df_hparams,
                pd.DataFrame({k:[v] for k,v in hparams.items()}).assign(
                    Task=tasks[task],
                    Train_Group=years[year]
                )
            ))
            
    display(df_hparams)

Unnamed: 0,C,max_iter,Task,Train_Group
0,0.01,10000,In-Hospital Mortality,09-12
0,0.01,10000,In-Hospital Mortality,2013
0,0.01,10000,In-Hospital Mortality,2014
0,0.01,10000,In-Hospital Mortality,2015
0,0.01,10000,In-Hospital Mortality,2016
0,0.01,10000,In-Hospital Mortality,2017
0,0.01,10000,In-Hospital Mortality,2018
0,0.01,10000,In-Hospital Mortality,2019
0,0.01,10000,In-Hospital Mortality,2020
0,0.01,10000,In-Hospital Mortality,2021


Unnamed: 0,boosting_type,first_metric_only,learning_rate,max_depth,metric,n_estimators,num_leaves,objective,Task,Train_Group
0,goss,True,0.01,-1,binary_logloss,323,100,binary,In-Hospital Mortality,09-12
0,goss,True,0.01,-1,binary_logloss,226,300,binary,In-Hospital Mortality,2013
0,goss,True,0.1,-1,binary_logloss,21,100,binary,In-Hospital Mortality,2014
0,goss,True,0.1,-1,binary_logloss,20,100,binary,In-Hospital Mortality,2015
0,goss,True,0.2,-1,binary_logloss,9,300,binary,In-Hospital Mortality,2016
0,goss,True,0.1,-1,binary_logloss,21,100,binary,In-Hospital Mortality,2017
0,goss,True,0.01,-1,binary_logloss,242,300,binary,In-Hospital Mortality,2018
0,goss,True,0.01,-1,binary_logloss,157,100,binary,In-Hospital Mortality,2019
0,goss,True,0.01,-1,binary_logloss,205,300,binary,In-Hospital Mortality,2020
0,goss,True,0.01,-1,binary_logloss,233,300,binary,In-Hospital Mortality,2021
