# Load Packages

In [10]:
%load_ext autoreload
%autoreload 2

import sys
from os.path import join
from tqdm.auto import tqdm
import joblib
import torch
sys.path.append("../../")

from src.evaluation.consolidate import consolidate_pred_perf, consolidate_ue_perf, consolidate_pi_perf
from src.evaluation.perf_eval import display_pred_perf
from src.evaluation.ue_eval import display_ue_perf, restructure_ue_df
from src.evaluation.pi_eval import display_pi_perf
from src.df_display.latex import df_to_latex, df_to_latex_grouped
from src.misc import create_folder

seed_list=list(range(2023, 2023+5))
data_label = "physionet"
batch_size = 64

# File paths
fp_notebooks_folder = "../"
fp_project_folder = join(fp_notebooks_folder, "../")
fp_data_folder = join(fp_project_folder, "../", "data")
fp_output_data_folder = join(fp_data_folder, data_label)
fp_checkpoint_folder = join(fp_project_folder, "checkpoints")
fp_project_checkpoints = join(fp_checkpoint_folder, data_label)
fp_tuning = join(fp_project_checkpoints, "tuning")
fp_models = join(fp_project_checkpoints, "models")
fp_predictions = join(fp_project_checkpoints, "predictions")
fp_evaluation = join(fp_project_checkpoints, "model_evaluation")
fp_consolidated = join(fp_project_checkpoints, "consolidated_results")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Prediction Performance

In [15]:
pred_perf_df = consolidate_pred_perf(seed_list, fp_evaluation)
display_pred_perf(pred_perf_df, consolidated=True)
pred_perf_df.to_csv(join(fp_consolidated, "pred_perf.csv"))

Unnamed: 0_level_0,t+1,t+2,t+3
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
RUE,0.002 ± 0.000,0.0024 ± 0.000,0.0027 ± 0.000
MC Dropout,0.002 ± 0.000,0.0024 ± 0.000,0.0027 ± 0.000
GPR,0.0019 ± 0.000,0.0024 ± 0.000,0.0027 ± 0.000
Infer-Noise,0.002 ± 0.000,0.0024 ± 0.000,0.0027 ± 0.000
DER,0.0021 ± 0.000,0.0025 ± 0.000,0.003 ± 0.000


In [34]:
print(df_to_latex(pred_perf_df, column_format_dict={"t+1": "min", "t+2": "min", "t+3": "min"}))

\begin{tabular}{cccc}
 & \textbf{t+1} & \textbf{t+2} & \textbf{t+3} \\
Model &  &  &  \\
RUE & \underline{0.002 ± 0.000} & \textbf{0.0024 ± 0.000} & \textbf{0.0027 ± 0.000} \\
MC Dropout & \underline{0.002 ± 0.000} & \textbf{0.0024 ± 0.000} & \textbf{0.0027 ± 0.000} \\
GPR & \textbf{0.0019 ± 0.000} & \textbf{0.0024 ± 0.000} & \textbf{0.0027 ± 0.000} \\
Infer-Noise & \underline{0.002 ± 0.000} & \textbf{0.0024 ± 0.000} & \textbf{0.0027 ± 0.000} \\
DER & 0.0021 ± 0.000 & \underline{0.0025 ± 0.000} & \underline{0.003 ± 0.000} \\
\end{tabular}



# UE Performance

In [16]:
ue_perf_df = consolidate_ue_perf(seed_list, fp_evaluation)
ue_perf_df = restructure_ue_df(ue_perf_df)
display_ue_perf(ue_perf_df, consolidated=True)

t+1


Unnamed: 0_level_0,Unnamed: 1_level_0,Corr,Pval,AURC,Sigma=0.1,Sigma=0.2,Sigma=0.3,Sigma=0.4
Time Horizon,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
t+1,RUE,0.282 ± 0.014,0.0 ± 0.000,0.025 ± 0.001,0.02 ± 0.002,0.022 ± 0.001,0.024 ± 0.001,0.025 ± 0.001
t+1,MC Dropout,0.189 ± 0.012,0.0 ± 0.000,0.026 ± 0.001,0.024 ± 0.001,0.025 ± 0.001,0.026 ± 0.001,0.026 ± 0.001
t+1,GPR,0.174 ± 0.001,0.0 ± 0.000,0.024 ± 0.000,0.022 ± 0.000,0.024 ± 0.000,0.025 ± 0.000,0.025 ± 0.000
t+1,Infer-Noise,0.057 ± 0.012,0.0 ± 0.000,0.028 ± 0.001,0.028 ± 0.003,0.027 ± 0.001,0.028 ± 0.001,0.028 ± 0.001
t+1,DER,0.236 ± 0.013,0.0 ± 0.000,0.025 ± 0.001,0.022 ± 0.001,0.023 ± 0.001,0.025 ± 0.001,0.026 ± 0.001


t+2


Unnamed: 0_level_0,Unnamed: 1_level_0,Corr,Pval,AURC,Sigma=0.1,Sigma=0.2,Sigma=0.3,Sigma=0.4
Time Horizon,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
t+2,RUE,0.232 ± 0.007,0.0 ± 0.000,0.029 ± 0.000,0.025 ± 0.001,0.026 ± 0.000,0.027 ± 0.000,0.029 ± 0.000
t+2,MC Dropout,0.149 ± 0.007,0.0 ± 0.000,0.03 ± 0.000,0.029 ± 0.001,0.028 ± 0.001,0.029 ± 0.000,0.03 ± 0.000
t+2,GPR,0.148 ± 0.000,0.0 ± 0.000,0.028 ± 0.000,0.026 ± 0.000,0.028 ± 0.000,0.029 ± 0.000,0.029 ± 0.000
t+2,Infer-Noise,0.035 ± 0.006,0.002 ± 0.003,0.032 ± 0.000,0.042 ± 0.009,0.031 ± 0.002,0.032 ± 0.001,0.031 ± 0.001
t+2,DER,0.173 ± 0.007,0.0 ± 0.000,0.029 ± 0.001,0.029 ± 0.002,0.027 ± 0.001,0.029 ± 0.001,0.029 ± 0.001


t+3


Unnamed: 0_level_0,Unnamed: 1_level_0,Corr,Pval,AURC,Sigma=0.1,Sigma=0.2,Sigma=0.3,Sigma=0.4
Time Horizon,Model,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
t+3,RUE,0.231 ± 0.009,0.0 ± 0.000,0.031 ± 0.001,0.028 ± 0.001,0.028 ± 0.001,0.03 ± 0.000,0.031 ± 0.000
t+3,MC Dropout,0.136 ± 0.015,0.0 ± 0.000,0.033 ± 0.000,0.032 ± 0.001,0.031 ± 0.001,0.031 ± 0.000,0.032 ± 0.000
t+3,GPR,0.147 ± 0.000,0.0 ± 0.000,0.03 ± 0.000,0.029 ± 0.000,0.03 ± 0.000,0.031 ± 0.000,0.032 ± 0.000
t+3,Infer-Noise,0.03 ± 0.007,0.014 ± 0.027,0.034 ± 0.000,0.038 ± 0.005,0.033 ± 0.002,0.034 ± 0.001,0.034 ± 0.000
t+3,DER,0.186 ± 0.019,0.0 ± 0.000,0.032 ± 0.001,0.031 ± 0.003,0.031 ± 0.001,0.031 ± 0.001,0.032 ± 0.001


In [18]:
print(
    df_to_latex_grouped(
        ue_perf_df, 
        {"Corr": "max", "AURC":"min", "Sigma=0.1": "min", "Sigma=0.2": "min", "Sigma=0.3": "min", "Sigma=0.4": "min"})
)

\begin{tabular}{ccccccccc}
\toprule
 &  & Corr & Pval & AURC & Sigma=0.1 & Sigma=0.2 & Sigma=0.3 & Sigma=0.4 \\
Time Horizon & Model &  &  &  &  &  &  &  \\
\midrule
\multirow[t]{5}{*}{t+1} & RUE & \textbf{0.282 ± 0.014} & 0.0 ± 0.000 & \underline{0.025 ± 0.001} & \textbf{0.02 ± 0.002} & \textbf{0.022 ± 0.001} & \textbf{0.024 ± 0.001} & \textbf{0.025 ± 0.001} \\
 & MC Dropout & 0.189 ± 0.012 & 0.0 ± 0.000 & 0.026 ± 0.001 & 0.024 ± 0.001 & 0.025 ± 0.001 & 0.026 ± 0.001 & \underline{0.026 ± 0.001} \\
 & GPR & 0.174 ± 0.001 & 0.0 ± 0.000 & \textbf{0.024 ± 0.000} & \underline{0.022 ± 0.000} & 0.024 ± 0.000 & \underline{0.025 ± 0.000} & \textbf{0.025 ± 0.000} \\
 & Infer-Noise & 0.057 ± 0.012 & 0.0 ± 0.000 & 0.028 ± 0.001 & 0.028 ± 0.003 & 0.027 ± 0.001 & 0.028 ± 0.001 & 0.028 ± 0.001 \\
 & DER & \underline{0.236 ± 0.013} & 0.0 ± 0.000 & \underline{0.025 ± 0.001} & \underline{0.022 ± 0.001} & \underline{0.023 ± 0.001} & \underline{0.025 ± 0.001} & \underline{0.026 ± 0.001} \\
\cline{1-9}
\m

# PI Performance

In [21]:
pi_perf_df = consolidate_pi_perf(seed_list, fp_evaluation)
display_pi_perf(pi_perf_df, consolidated=True)
pi_perf_df.to_csv(join(fp_consolidated, "pi_perf.csv"))

t+1:


Unnamed: 0_level_0,Unnamed: 1_level_0,PICP,PINAW,PINAFD,CovP,CWFDC
Time Horizon,Method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
t+1,RUE Gaussian Copula,0.95725 ± 0.001,0.22075 ± 0.002,0.05407 ± 0.001,8e-05 ± 0.000,0.35331 ± 0.011
t+1,RUE Conditional Gaussian,0.95037 ± 0.001,0.20544 ± 0.001,0.05293 ± 0.001,9e-05 ± 0.000,0.34739 ± 0.014
t+1,RUE Weighted,0.96465 ± 0.001,0.30154 ± 0.006,0.05351 ± 0.001,0.00028 ± 0.000,0.63474 ± 0.030
t+1,RUE KNN,0.99918 ± 0.000,0.84647 ± 0.011,0.10399 ± 0.007,0.00232 ± 0.000,3.27213 ± 0.017
t+1,RUE Conformal,0.95122 ± 0.002,0.2346 ± 0.004,0.04919 ± 0.001,5e-05 ± 0.000,0.33146 ± 0.014
t+1,Infer-Noise Conformal,0.96544 ± 0.001,0.25125 ± 0.004,0.05709 ± 0.001,0.00024 ± 0.000,0.54378 ± 0.033
t+1,MC Dropout Conformal,0.95848 ± 0.001,0.27546 ± 0.006,0.05157 ± 0.001,9e-05 ± 0.000,0.41215 ± 0.015
t+1,GPR Conformal,0.95868 ± 0.000,0.23211 ± 0.000,0.05584 ± 0.000,0.00011 ± 0.000,0.39299 ± 0.001
t+1,DER Conformal,0.95946 ± 0.001,0.25884 ± 0.011,0.05835 ± 0.001,0.0001 ± 0.000,0.41468 ± 0.030


t+2:


Unnamed: 0_level_0,Unnamed: 1_level_0,PICP,PINAW,PINAFD,CovP,CWFDC
Time Horizon,Method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
t+2,RUE Gaussian Copula,0.95384 ± 0.001,0.24486 ± 0.001,0.05496 ± 0.001,5e-05 ± 0.000,0.34804 ± 0.008
t+2,RUE Conditional Gaussian,0.94607 ± 0.000,0.22654 ± 0.001,0.05547 ± 0.000,0.00017 ± 0.000,0.45683 ± 0.010
t+2,RUE Weighted,0.9647 ± 0.002,0.33517 ± 0.009,0.05381 ± 0.001,0.00029 ± 0.000,0.67854 ± 0.057
t+2,RUE KNN,0.99894 ± 0.000,0.90902 ± 0.002,0.11631 ± 0.003,0.0023 ± 0.000,3.32519 ± 0.011
t+2,RUE Conformal,0.95091 ± 0.001,0.27101 ± 0.006,0.05199 ± 0.000,9e-05 ± 0.000,0.41349 ± 0.030
t+2,Infer-Noise Conformal,0.96377 ± 0.002,0.27331 ± 0.005,0.05826 ± 0.000,0.00019 ± 0.000,0.52136 ± 0.049
t+2,MC Dropout Conformal,0.95961 ± 0.002,0.28262 ± 0.010,0.05571 ± 0.001,0.00013 ± 0.000,0.464 ± 0.030
t+2,GPR Conformal,0.95532 ± 0.000,0.24711 ± 0.000,0.05796 ± 0.000,9e-05 ± 0.000,0.39184 ± 0.001
t+2,DER Conformal,0.95579 ± 0.001,0.29533 ± 0.002,0.05842 ± 0.001,7e-05 ± 0.000,0.41993 ± 0.005


t+3:


Unnamed: 0_level_0,Unnamed: 1_level_0,PICP,PINAW,PINAFD,CovP,CWFDC
Time Horizon,Method,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
t+3,RUE Gaussian Copula,0.95312 ± 0.001,0.25851 ± 0.001,0.05679 ± 0.001,9e-05 ± 0.000,0.40207 ± 0.020
t+3,RUE Conditional Gaussian,0.94433 ± 0.001,0.23796 ± 0.001,0.05547 ± 0.000,0.00019 ± 0.000,0.48479 ± 0.014
t+3,RUE Weighted,0.96544 ± 0.001,0.36233 ± 0.005,0.05659 ± 0.002,0.00034 ± 0.000,0.75517 ± 0.024
t+3,RUE KNN,0.99924 ± 0.000,0.90521 ± 0.010,0.11245 ± 0.009,0.00233 ± 0.000,3.34503 ± 0.018
t+3,RUE Conformal,0.94933 ± 0.001,0.28845 ± 0.007,0.05198 ± 0.000,0.00015 ± 0.000,0.49398 ± 0.021
t+3,Infer-Noise Conformal,0.96283 ± 0.004,0.28855 ± 0.008,0.05825 ± 0.001,0.0002 ± 0.000,0.54925 ± 0.090
t+3,MC Dropout Conformal,0.95559 ± 0.001,0.30465 ± 0.020,0.05505 ± 0.001,0.00015 ± 0.000,0.50491 ± 0.040
t+3,GPR Conformal,0.95411 ± 0.000,0.26083 ± 0.000,0.05789 ± 0.000,8e-05 ± 0.000,0.39983 ± 0.001
t+3,DER Conformal,0.95654 ± 0.001,0.30417 ± 0.012,0.05854 ± 0.001,0.00011 ± 0.000,0.477 ± 0.009


In [22]:
print(
    df_to_latex_grouped(
        pi_perf_df, 
        {"PICP": "max", "PINAW":"min", "PINAFD": "min", "CovP":"min", "CWFDC":"min"})
)

\begin{tabular}{ccccccc}
\toprule
 &  & PICP & PINAW & PINAFD & CovP & CWFDC \\
Time Horizon & Method &  &  &  &  &  \\
\midrule
\multirow[t]{9}{*}{t+1} & RUE Gaussian Copula & 0.95725 ± 0.001 & \underline{0.22075 ± 0.002} & 0.05407 ± 0.001 & \underline{8e-05 ± 0.000} & 0.35331 ± 0.011 \\
 & RUE Conditional Gaussian & 0.95037 ± 0.001 & \textbf{0.20544 ± 0.001} & 0.05293 ± 0.001 & 9e-05 ± 0.000 & \underline{0.34739 ± 0.014} \\
 & RUE Weighted & 0.96465 ± 0.001 & 0.30154 ± 0.006 & 0.05351 ± 0.001 & 0.00028 ± 0.000 & 0.63474 ± 0.030 \\
 & RUE KNN & \textbf{0.99918 ± 0.000} & 0.84647 ± 0.011 & 0.10399 ± 0.007 & 0.00232 ± 0.000 & 3.27213 ± 0.017 \\
 & RUE Conformal & 0.95122 ± 0.002 & 0.2346 ± 0.004 & \textbf{0.04919 ± 0.001} & \textbf{5e-05 ± 0.000} & \textbf{0.33146 ± 0.014} \\
 & Infer-Noise Conformal & \underline{0.96544 ± 0.001} & 0.25125 ± 0.004 & 0.05709 ± 0.001 & 0.00024 ± 0.000 & 0.54378 ± 0.033 \\
 & MC Dropout Conformal & 0.95848 ± 0.001 & 0.27546 ± 0.006 & \underline{0.05157 ± 0