In [156]:
from tensorboard.backend.event_processing import event_accumulator
import pandas as pd
from pathlib import Path
from tbparse import SummaryReader


In [157]:
MODEL_TYPE='informer' # GRU or informer
SAVE_PATH = f"../results/ICMI24_{MODEL_TYPE}_results.md"
SEED=21
OVERWRITE=True
if OVERWRITE:
     open(SAVE_PATH, 'w').close()


In [158]:
def get_metric_results_from_eventfile(dirpath, metrics, SAVE_PATH="", title="", select="latest"):
    # select = "latest", or "lowest" or "highest"
    reader = SummaryReader(dirpath, pivot=False, extra_columns={'dir_name'})
    df = reader.scalars
    df = df.rename(columns={"tag": "metric", "dir_name": "fold"})
    grouped = df.groupby(["fold","metric"])
    # ['step'].max().reset_index() # this is choosing the max AUROC but we want the latest!!
    if select=='latest':
        df = df.loc[grouped['step'].idxmax()].reset_index(drop=True)
    elif select=='highest':
        df = df.loc[grouped['value'].idxmax()].reset_index(drop=True)
    elif select=='lowest':
        df = df.loc[grouped['value'].idxmin()].reset_index(drop=True)
    
    df = df[df["metric"].isin(metrics)].set_index(["fold"]).sort_values("metric").reset_index()

    res = df.pivot(index='fold', columns='metric', values='value')#.rename_axis(columns=None)
    if SAVE_PATH:
        with open(SAVE_PATH,'a') as f:
            f.write("\n\n")
            f.write(title)
            f.write("\n")
        res.to_markdown(SAVE_PATH, mode="a", tablefmt="github", index=True,floatfmt='.3f')
    return res

In [159]:

# Get pretraining task val metrics 
tasks=['fi','cl','pc','rc']
metric_names=['FI: AUROC','CL: Accuracy','PC: MSE (deg)','RC: MSE (deg)']
metric_select=['highest','highest','lowest','lowest'] # which value to use when multiple checkpoints avail
varname = 'val_{task}_metric'
metrics=[varname.format(**locals()) for task in tasks]

pretrain_summary=[]
for i,metric in enumerate(metrics):
    name=metric_names[i]
    res=get_metric_results_from_eventfile(f"../lightning_logs/2023/{MODEL_TYPE}_pretraining_seed{SEED}", [metric], SAVE_PATH=None, title=name, select="latest")
    pretrain_summary.append({"metric":name, 
                       "mean": res[[metric]].mean().item(),
                       "min": res[[metric]].min().item(),
                       "max": res[[metric]].max().item()
                       })
    


In [160]:
    
tit="Pretraining validation metrics: summarized"
with open(SAVE_PATH, "a") as f:
    f.write("\n\n")
    f.write(tit)
    f.write("\n")
tb = pd.DataFrame.from_records(pretrain_summary)
tb.to_markdown(SAVE_PATH, mode="a", tablefmt="github", index=False,floatfmt='.3f')

In [161]:
labels = ['SVT',
'Rote_X', 'Rote_Y', "Rote_Z", "Rote_D",
          'Inference_X', "Inference_Y","Inference_Z", "Inference_D", 
          "Deep_X", "Deep_Z",
          "MW"]

In [162]:
metrics = ["train_auroc", "val_auroc", "train_accuracy_epoch", "val_accuracy_epoch"]
val_aurocs = []
tit=f"Held-out AUROCs: informer with fixed sequence length 500"

with open(SAVE_PATH, "a") as f:
    f.write("\n\n")
    f.write(tit)
    f.write("\n")

for label in labels:
    # if label=='SVT':
    #     continue
    res=get_metric_results_from_eventfile(f"../lightning_logs/2023/informer_{label}", metrics, SAVE_PATH=None, select="latest")
    res['label']=label
    val_aurocs.append({"label":label, 
                       "mean AUROC": res[['val_auroc']].mean().item(),
                       "min AUROC": res[['val_auroc']].min().item(),
                       "max AUROC": res[['val_auroc']].max().item()
                       })
tb = pd.DataFrame.from_records(val_aurocs)
tb_oldres = tb[['label','mean AUROC']]
tb_oldres.set_index('label', inplace=True)
tb.to_markdown(SAVE_PATH, mode="a", tablefmt="github", index=False,floatfmt='.3f')

In [163]:
metrics = ["train_auroc", "val_auroc", "train_accuracy_epoch", "val_accuracy_epoch"]
classifier_names = ['informer_4task_encoder_varlen_meanpool', 'informer_4task_encoder_varlen_maskmeanpool','informer_4task_encoder_varlen_finalpos',
'limu_125', 'limu_125_masked','limu_125_finalpos']
mean_aurocs=[]
for classifier in classifier_names:
    val_aurocs = []
    MODEL_TYPE='limu' if 'limu' in classifier else 'informer'
    for label in labels:
        # tit=f"# {label}"
        # with open(SAVE_PATH, "a") as f:
        #     f.write("\n\n")
        #     f.write(tit)
        #     f.write("\n")
        res=get_metric_results_from_eventfile(f"../lightning_logs/2024/classifiers/{classifier}/{label}_{MODEL_TYPE}", metrics, SAVE_PATH=None, select="latest")
        res['label']=label
        val_aurocs.append({"label":label, 
                        "mean AUROC": res[['val_auroc']].mean().item(),
                        "min AUROC": res[['val_auroc']].min().item(),
                        "max AUROC": res[['val_auroc']].max().item()
                        })
        mean_aurocs.append({"label":label, "classifier":classifier, "mean AUROC": res[['val_auroc']].mean().item()})
    tit=f"Held-out AUROCs: {classifier}"
    with open(SAVE_PATH, "a") as f:
        f.write("\n\n")
        f.write(tit)
        f.write("\n")
    tb = pd.DataFrame.from_records(val_aurocs)
    tb.to_markdown(SAVE_PATH, mode="a", tablefmt="github", index=False,floatfmt='.3f')

tb_all = pd.DataFrame.from_records(mean_aurocs)
# add result for old result
tb_all = tb_all.pivot(index='label', columns='classifier', values='mean AUROC')
# transpose to have label in  columns and one row per classifier
tb_all = tb_all.T



In [164]:
tb_all = pd.DataFrame.from_records(mean_aurocs)
tb_all = tb_all.pivot(index='label', columns='classifier', values='mean AUROC')
tb_oldres = pd.DataFrame(tb_oldres)



In [165]:
merged_df = tb_all.merge(tb_oldres, how='inner', left_index=True, right_index=True)
# .reindex(labels)
with open(SAVE_PATH, "a") as f:
    f.write("\n\n")
classifier_names = ['informer_4task_encoder_varlen_meanpool', 'informer_4task_encoder_varlen_maskmeanpool','informer_4task_encoder_varlen_finalpos',
'limu_125', 'limu_125_masked','limu_125_finalpos']
merged_df.rename(columns={'mean AUROC': 'informer_fixlen', 'informer_4task_encoder_varlen_meanpool':'informer_meanpool','informer_4task_encoder_varlen_maskmeanpool':'informer_maskmeanpool','informer_4task_encoder_varlen_finalpos':'informer_finalpos','limu_125':'limu','limu_125_masked':'limu_masked','limu_125_finalpos':'limu_finalpos'}, inplace=True)
cols = list(merged_df.columns)
cols.insert(0, cols.pop(cols.index('informer_fixlen')))
merged_df = merged_df.loc[:, cols]
with open(SAVE_PATH, "a") as f:
    f.write("\n\n")
    f.write("# OVERALL RESULTS")
    f.write("\n")
    f.write("mean AUROC over folds per classifier")
merged_df.to_markdown(SAVE_PATH, mode="a", tablefmt="github", index=True,floatfmt='.3f')

