In [1]:
import os
import pickle
import pandas as pd

from src.utils import read_pkl
from src.mappings import model_names, task_names

In [2]:
ROOT_PATH = "/hpf/projects/lsung/projects/lguo/femr-on-sk"
PATH_TO_RESULTS = os.path.join(ROOT_PATH, "data", "evaluate", "adapter_models")

In [3]:
def list_dir(path: str):
    """get list of file/directory names excluding nb checkpoints"""
    
    return [
        x for x in os.listdir(path)
        if x != ".ipynb_checkpoints"
    ]

In [4]:
auroc_results = {}
auprc_results = {}

models = list_dir(PATH_TO_RESULTS)

for model in models:
    tasks = list_dir(os.path.join(PATH_TO_RESULTS, model))
    auroc_results[model] = {}
    auprc_results[model] = {}
    
    for task in tasks:
        result = read_pkl(os.path.join(PATH_TO_RESULTS, model, task, "results.pkl"))
        auroc_results[model][task] = result['auroc']
        auprc_results[model][task] = result['auprc']

In [5]:
column_orders = ["SK [Count]", "SK [Count] Reg", "SK", "Stanford", "Stanford_FT-full", "Stanford_FT-last"]
task_orders = [
    "In-hospital Mortality", "Long LOS", "30-day Readmission", 
    "Hypoglycemia", "Hyperkalemia", "Hyponatremia",
    "Thrombocytopenia", "Anemia"
]

In [6]:
df_auroc = (
    pd.DataFrame.from_dict(auroc_results)
    .round(3)
    .rename(columns=model_names)
    .reset_index(names="Task")
    .replace(task_names)
    .set_index("Task")
    .reindex(task_orders)
    [column_orders]
)

df_auroc

Unnamed: 0_level_0,SK [Count],SK [Count] Reg,SK,Stanford,Stanford_FT-full,Stanford_FT-last
Task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
In-hospital Mortality,0.842,0.789,0.943,0.905,0.918,0.895
Long LOS,0.847,0.783,0.866,0.848,0.857,0.846
30-day Readmission,0.723,0.735,0.813,0.79,0.82,0.785
Hypoglycemia,0.815,0.813,0.828,0.823,0.825,0.819
Hyperkalemia,0.773,0.774,0.801,0.789,0.809,0.797
Hyponatremia,0.791,0.77,0.812,0.808,0.823,0.815
Thrombocytopenia,0.773,0.792,0.84,0.818,0.817,0.812
Anemia,0.719,0.692,0.785,0.771,0.799,0.755


In [8]:
df_auprc = (
    pd.DataFrame.from_dict(auprc_results)
    .round(3)
    .rename(columns=model_names)
    .reset_index(names="Task")
    .replace(task_names)
    .set_index("Task")
    .reindex(task_orders)
    [column_orders]
)

df_auprc

Unnamed: 0_level_0,SK [Count],SK [Count] Reg,SK,Stanford,Stanford_FT-full,Stanford_FT-last
Task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
In-hospital Mortality,0.032,0.025,0.207,0.067,0.121,0.051
Long LOS,0.623,0.496,0.648,0.619,0.64,0.6
30-day Readmission,0.199,0.178,0.291,0.253,0.307,0.271
Hypoglycemia,0.392,0.379,0.409,0.391,0.387,0.4
Hyperkalemia,0.249,0.26,0.284,0.253,0.278,0.272
Hyponatremia,0.132,0.106,0.152,0.168,0.176,0.164
Thrombocytopenia,0.339,0.349,0.462,0.455,0.45,0.445
Anemia,0.212,0.18,0.382,0.37,0.403,0.327
