In [1]:
import os
import pickle
import pandas as pd

from src.io import read_pkl
from src.default_paths import path_root
from src.mappings import model_names, task_names

In [2]:
PATH_TO_LINEAR_PROBES_RESULTS = os.path.join(path_root, "data", "evaluate", "linear_probes")
PATH_TO_CLMBR_TASK_RESULTS = os.path.join(path_root, "data", "evaluate", "clmbr_task_models")
PATH_TO_COUNT_MODEL_TASK_RESULTS = os.path.join(path_root, "data", "evaluate", "adapter_models", "count_sk")

In [3]:
def list_dir(path: str):
    """get list of file/directory names excluding nb checkpoints"""
    
    return [
        x for x in os.listdir(path)
        if x != ".ipynb_checkpoints"
    ]

In [4]:
auroc_results = {}
auprc_results = {}

tasks = list_dir(os.path.join(PATH_TO_COUNT_MODEL_TASK_RESULTS))
auroc_results["count_sk"] = {}
auprc_results["count_sk"] = {}

for task in tasks:
    result = read_pkl(os.path.join(PATH_TO_COUNT_MODEL_TASK_RESULTS, task, "results.pkl"))
    auroc_results["count_sk"][task] = result['auroc']
    auprc_results["count_sk"][task] = result['auprc']

models = list_dir(PATH_TO_LINEAR_PROBES_RESULTS)

for model in models:
    tasks = list_dir(os.path.join(PATH_TO_LINEAR_PROBES_RESULTS, model))
    auroc_results[model] = {}
    auprc_results[model] = {}
    
    for task in tasks:
        result = read_pkl(os.path.join(PATH_TO_LINEAR_PROBES_RESULTS, model, task, "results.pkl"))
        auroc_results[model][task] = result['auroc']
        auprc_results[model][task] = result['auprc']

models = list_dir(PATH_TO_CLMBR_TASK_RESULTS)

for model in models:
    tasks = list_dir(os.path.join(PATH_TO_CLMBR_TASK_RESULTS, model))
    auroc_results[model] = {}
    auprc_results[model] = {}
    
    for task in tasks:
        result = read_pkl(os.path.join(PATH_TO_CLMBR_TASK_RESULTS, model, task, "results.pkl"))
        auroc_results[model][task] = result['auroc']
        auprc_results[model][task] = result['auprc']

In [5]:
column_orders = [
    "SK [Count]", "SK",
    "Stanford", 
    "Stanford_Cont-Pre", 
]

task_orders = [
    "In-hospital Mortality", "Long LOS", "30-day Readmission", 
    "Hypoglycemia", "Hyperkalemia", "Hyponatremia",
    "Thrombocytopenia", "Anemia"
]

In [6]:
df_auroc = (
    pd.DataFrame.from_dict(auroc_results)
    .round(3)
    .rename(columns=model_names)
    .reset_index(names="Task")
    .replace(task_names)
    .set_index("Task")
    .reindex(task_orders)
    [column_orders]
)

df_auroc

Unnamed: 0_level_0,SK [Count],SK,Stanford,Stanford_Cont-Pre
Task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
In-hospital Mortality,0.855,0.925,0.923,0.923
Long LOS,0.846,0.863,0.845,0.854
30-day Readmission,0.725,0.816,0.789,0.813
Hypoglycemia,0.815,0.833,0.824,0.826
Hyperkalemia,0.773,0.807,0.789,0.811
Hyponatremia,0.792,0.829,0.808,0.822
Thrombocytopenia,0.773,0.837,0.818,0.816
Anemia,0.719,0.776,0.772,0.797


In [7]:
df_auroc = (
    pd.DataFrame.from_dict(auroc_results)
    .round(3)
    .rename(columns=model_names)
    .reset_index(names="Task")
    .replace(task_names)
    .set_index("Task")
    .reindex(task_orders)
    [column_orders]
)

df_auroc

Unnamed: 0_level_0,SK [Count],SK,Stanford,Stanford_Cont-Pre
Task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
In-hospital Mortality,0.855,0.925,0.924,0.924
Long LOS,0.846,0.863,0.846,0.854
30-day Readmission,0.725,0.822,0.808,0.827
Hypoglycemia,0.815,0.828,0.82,0.821
Hyperkalemia,0.773,0.804,0.789,0.809
Hyponatremia,0.792,0.832,0.814,0.829
Thrombocytopenia,0.773,0.84,0.822,0.818
Anemia,0.719,0.773,0.771,0.794


In [8]:
df_auprc = (
    pd.DataFrame.from_dict(auprc_results)
    .round(3)
    .rename(columns=model_names)
    .reset_index(names="Task")
    .replace(task_names)
    .set_index("Task")
    .reindex(task_orders)
    [column_orders]
)

df_auprc

Unnamed: 0_level_0,SK [Count],SK,Stanford,Stanford_Cont-Pre
Task,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
In-hospital Mortality,0.036,0.163,0.09,0.154
Long LOS,0.621,0.639,0.619,0.635
30-day Readmission,0.199,0.323,0.26,0.307
Hypoglycemia,0.39,0.414,0.393,0.385
Hyperkalemia,0.248,0.292,0.27,0.285
Hyponatremia,0.134,0.157,0.168,0.165
Thrombocytopenia,0.339,0.482,0.456,0.471
Anemia,0.211,0.371,0.355,0.379


In [8]:
df_auroc.to_csv("../results/main_auroc.csv")
df_auprc.to_csv("../results/main_auprc.csv")