In [1]:
import os
import pickle
import pandas as pd
import numpy as np

from src.io import read_pkl
from src.default_paths import path_root
from src.mappings import model_names, task_names

In [2]:
PATH_TO_ADAPTER_MODELS_RESULTS = os.path.join(path_root, "data", "evaluate", "adapter_models")
PATH_TO_CLMBR_TASK_RESULTS = os.path.join(path_root, "data", "evaluate", "clmbr_finetuned")

In [3]:
def list_dir(path: str):
    """get list of file/directory names excluding nb checkpoints"""
    
    return [
        x for x in os.listdir(path)
        if x != ".ipynb_checkpoints"
    ]

def get_bstrp_results(results: list, formatted_string: bool = True):
    """return formatted string of bootstrap results"""
    
    lower, med, upper = (
        np.percentile(results,[2.5, 50, 97.5])
        .round(3)
    )
    
    if formatted_string:
        return f"{str(med)} [{str(lower)}, {str(upper)}]"
    else:
        return (lower, med, upper)

In [7]:
use_boot = True

auroc_results = {}
auprc_results = {}

models = list_dir(PATH_TO_ADAPTER_MODELS_RESULTS)

for model in models:
    tasks = list_dir(os.path.join(PATH_TO_ADAPTER_MODELS_RESULTS, model))
    auroc_results[model] = {}
    auprc_results[model] = {}
    
    for task in tasks:
        result = read_pkl(os.path.join(PATH_TO_ADAPTER_MODELS_RESULTS, model, task, "results.pkl"))
        auroc_results[model][task] = get_bstrp_results(result["auroc_bootstrap"]) if use_boot else result["auroc"]
        auprc_results[model][task] = get_bstrp_results(result["auprc_bootstrap"]) if use_boot else result["auprc"]

# models = list_dir(PATH_TO_CLMBR_TASK_RESULTS)

# for model in models:
#     tasks = list_dir(os.path.join(PATH_TO_CLMBR_TASK_RESULTS, model))
#     auroc_results[model+"_ft"] = {}
#     auprc_results[model+"_ft"] = {}
    
#     for task in tasks:
#         if task=="mortality":
#             continue
#         result = read_pkl(os.path.join(PATH_TO_CLMBR_TASK_RESULTS, model, task, "results.pkl"))
#         auroc_results[model+"_ft"][task] =  get_bstrp_results(result["auroc_bootstrap"]) if use_boot else result["auroc"]
#         auprc_results[model+"_ft"][task] = get_bstrp_results(result["auprc_bootstrap"]) if use_boot else result["auprc"]

In [11]:
column_orders = [
    "MIMIC [Count] (GBM)", 
    "MIMIC",
    "SK", #"SK_FT",
    "Stanford", #"Stanford_FT",
    #"Stanford_Cont-Pre", "Stanford_Cont-Pre_FT"
]

task_orders = [
    "In-hospital Mortality", "Long LOS", "30-day Readmission", 
    #"Hypoglycemia", "Hyperkalemia", "Hyponatremia",
    #"Thrombocytopenia", "Anemia"
]

In [12]:
df_auroc = (
    pd.DataFrame.from_dict(auroc_results)
    .round(3)
    .rename(columns=model_names)
    .reset_index(names="Task")
    .replace(task_names)
    .set_index("Task")
    .reindex(task_orders)
    [column_orders]
)

df_auroc

Unnamed: 0_level_0,MIMIC [Count] (GBM),MIMIC,SK,Stanford
Task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
In-hospital Mortality,"0.992 [0.986, 0.997]","0.982 [0.97, 0.992]","0.956 [0.921, 0.977]","0.972 [0.953, 0.985]"
Long LOS,"0.983 [0.98, 0.985]","0.976 [0.972, 0.979]","0.955 [0.949, 0.961]","0.969 [0.965, 0.973]"
30-day Readmission,"0.956 [0.937, 0.971]","0.956 [0.935, 0.972]","0.876 [0.823, 0.921]","0.936 [0.901, 0.96]"


In [13]:
df_auprc = (
    pd.DataFrame.from_dict(auprc_results)
    .round(3)
    .rename(columns=model_names)
    .reset_index(names="Task")
    .replace(task_names)
    .set_index("Task")
    .reindex(task_orders)
    [column_orders]
)

df_auprc

Unnamed: 0_level_0,MIMIC [Count] (GBM),MIMIC,SK,Stanford
Task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
In-hospital Mortality,"0.63 [0.449, 0.766]","0.322 [0.174, 0.489]","0.235 [0.099, 0.371]","0.22 [0.101, 0.382]"
Long LOS,"0.899 [0.884, 0.913]","0.86 [0.842, 0.876]","0.781 [0.759, 0.806]","0.82 [0.795, 0.841]"
30-day Readmission,"0.599 [0.476, 0.708]","0.508 [0.381, 0.63]","0.296 [0.201, 0.397]","0.417 [0.302, 0.553]"


In [10]:
if use_boot:
    df_auroc.to_csv("../results/main_auroc_boot.csv")
    df_auprc.to_csv("../results/main_auprc_boot.csv")
else:
    df_auroc.to_csv("../results/main_auroc.csv")
    df_auprc.to_csv("../results/main_auprc.csv")