In [1]:
import os
import re

import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 500)

In [2]:
def get_hparams_losses(models_dir,e=1e-8):
    
    models=os.listdir(models_dir)
    models=[x for x in models if 'test' not in x]

    df_hparams = pd.DataFrame()
    model_losses = {}

    for c,model in enumerate(models):
        
        df=pd.read_json(
            os.path.join(models_dir,model,'config.json'),
            lines=True
        )

        lines = open(
            os.path.join(models_dir,model,'losses'),
            'r'
        )

        losses = [
            float(re.sub("[^.0-9]","",x.split(' ')[-1])) 
            for x in lines.readlines() 
            if 'Val' in x and 'nan' not in x
        ]

        lines = open(
            os.path.join(models_dir,model,'losses'),
            'r'
        )

        train_losses = [
            float(re.sub("[^.0-9]","",x.split(' ')[-1])) 
            for x in lines.readlines() 
            if 'Train' in x and 'nan' not in x
        ]

        if not losses: losses.append(999999)

        df = df.assign(
            best_loss = min(losses),
            best_iter = losses.index(min(losses)),
            model_num = model,
        )

        model_losses[model] = {}
        model_losses[model]['val'] = [(x - min(losses)+e)/(max(losses)-min(losses)+e) for x in losses]
        model_losses[model]['train'] = [(x - min(train_losses)+e)/(max(train_losses)-min(train_losses)+e) for x in train_losses]

        df_hparams = pd.concat((df_hparams, df),sort=False)
        
    return model_losses, df_hparams.reset_index(drop=True)

#### Best CLMBR_GRU

In [3]:
model_losses, df_hparams = get_hparams_losses("/local-scratch/nigam/projects/lguo/temp_ds_shift_robustness/clmbr/experiments/clmbr/clmbr_artifacts/models/2009_2012/gru")

best = df_hparams.best_loss.min()
display(df_hparams.query("best_loss==@best")[['lr','dropout','l2']])


Unnamed: 0,lr,dropout,l2
26,0.01,0.1,0.1


#### Best CLMBR_transformer

In [4]:
model_losses, df_hparams = get_hparams_losses("/local-scratch/nigam/projects/lguo/temp_ds_shift_robustness/clmbr/experiments/clmbr/clmbr_artifacts/models/2009_2012/transformer")

best = df_hparams.best_loss.min()
display(df_hparams.query("best_loss==@best")[['lr','dropout','l2', 'transformer_layers','code_dropout']])

Unnamed: 0,lr,dropout,l2,transformer_layers,code_dropout
27,0.0001,0.4,0.01,6,0.2


#### Best ETE models

In [5]:
for task in ['hospital_mortality','LOS_7','icu_admission','readmission_30']:
    model_losses, df_hparams = get_hparams_losses(f"/local-scratch/nigam/projects/lguo/temp_ds_shift_robustness/clmbr/experiments/clmbr/clmbr_artifacts/models/2009_2012_end_to_end/gru/{task}")
    print(task)
    best = df_hparams.best_loss.min()
    display(df_hparams.query("best_loss==@best")[['lr','dropout','l2']])

hospital_mortality


Unnamed: 0,lr,dropout,l2
10,0.01,0.2,0.1


LOS_7


Unnamed: 0,lr,dropout,l2
19,0.001,0.0,0.01


icu_admission


Unnamed: 0,lr,dropout,l2
19,0.001,0.0,0.01


readmission_30


Unnamed: 0,lr,dropout,l2
22,0.0001,0.0,0.1


In [6]:
for task in ['hospital_mortality','LOS_7','icu_admission','readmission_30']:
    model_losses, df_hparams = get_hparams_losses(f"/local-scratch/nigam/projects/lguo/temp_ds_shift_robustness/clmbr/experiments/clmbr/clmbr_artifacts/models/2009_2012_end_to_end/transformer/{task}")
    print(task)
    best = df_hparams.best_loss.min()
    display(df_hparams.query("best_loss==@best")[['lr','dropout','l2', 'transformer_layers','code_dropout']])

hospital_mortality


Unnamed: 0,lr,dropout,l2,transformer_layers,code_dropout
24,0.0001,0.0,0.01,6,0.4


LOS_7


Unnamed: 0,lr,dropout,l2,transformer_layers,code_dropout
15,0.0001,0.0,0.1,6,0.4


icu_admission


Unnamed: 0,lr,dropout,l2,transformer_layers,code_dropout
9,0.0001,0.2,0.1,6,0.2


readmission_30


Unnamed: 0,lr,dropout,l2,transformer_layers,code_dropout
30,0.0001,0.0,0.01,6,0.2
