In [1]:
import sys
import os
import re
import numpy as np

sys.path.insert(0, "./src/")

import pandas as pd
from src.table_utils import (
    collect_scores_into_dict_with_std,
    extract_same_different_dataframes,
    ood_detection_pairs_,
    aggregate_over_measures,
)
from IPython.display import display

pd.set_option("display.max_rows", None)

  from .autonotebook import tqdm as notebook_tqdm
stty: 'standard input': Inappropriate ioctl for device


In [2]:
full_ood_rocauc = pd.read_csv("./tables/full_ood_rocauc_with_std.csv", index_col=0)
full_ood_rocauc = full_ood_rocauc[
    full_ood_rocauc.Dataset != full_ood_rocauc.training_dataset
]

In [3]:
# full_ood_rocauc[full_ood_rocauc.Dataset == full_ood_rocauc.training_dataset]

In [4]:
full_ood_rocauc = full_ood_rocauc[~full_ood_rocauc.UQMetric.str.endswith("Inner Inner")]

In [5]:
# DROP NEGLOG

full_ood_rocauc = full_ood_rocauc[full_ood_rocauc.base_rule != "Neglog"]

In [6]:
full_ood_rocauc.sample(10)

Unnamed: 0,UQMetric,Dataset,LossFunction,RocAucScores_array,architecture,training_dataset,RocAucScoresMean,RocAucScoresStd,base_rule,RiskType
3825,Excess Brier Outer Inner,cifar10,Brier,"[0.6841748200000001, 0.68315293, 0.688826285, ...",vgg,cifar100,0.68469,0.002281753,Brier,Excess
5631,Total Brier Outer,svhn,Brier,"[0.8029762081284573, 0.8798006472802705, 0.957...",resnet18,missed_class_cifar10,0.911637,0.06230155,Brier,Total
7040,Bayes Spherical Outer,cifar100,Spherical,"[0.79899081, 0.8179104799999999, 0.812788505, ...",resnet18,noisy_cifar10,0.802757,0.01074237,Spherical,Bayes
5123,Bregman Information Logscore,svhn,Spherical,"[0.6168736247695144, 0.6062281057928702, 0.595...",resnet18,noisy_cifar100,0.599064,0.01941959,Logscore,Bregman Information
3152,MV Brier,cifar10,Spherical,"[0.425494355, 0.42838627500000004, 0.434354665...",resnet18,cifar100,0.429337,0.003128977,Brier,MV
3036,Expected Pairwise Bregman Information Maxprob,svhn,Brier,"[0.6614998982022126, 0.6889206073294406, 0.674...",resnet18,cifar100,0.675549,0.01157423,Maxprob,Expected Pairwise Bregman Information
3156,MV Brier,svhn,Brier,"[0.6434261812384757, 0.66129835010756, 0.68803...",resnet18,cifar100,0.666047,0.01475335,Brier,MV
4165,Expected Pairwise Bregman Information Maxprob,blurred_cifar100,Logscore,"[0.6833198849999998, 0.6869936799999999, 0.685...",vgg,cifar100,0.684365,0.00167322,Maxprob,Expected Pairwise Bregman Information
6446,Expected Pairwise Bregman Information Spherical,blurred_cifar100,Spherical,"[0.7639298950000001, 0.862209495, 0.941516805,...",resnet18,missed_class_cifar10,0.890847,0.07073152,Spherical,Expected Pairwise Bregman Information
5285,Expected Pairwise Bregman Information Maxprob,cifar100,Spherical,"[0.5, 0.49999999999999994, 0.49999999999999983...",resnet18,noisy_cifar100,0.5,8.233634e-17,Maxprob,Expected Pairwise Bregman Information


In [7]:
full_ood_rocauc.base_rule.unique()

array(['Brier', 'Logscore', 'Maxprob', 'Spherical'], dtype=object)

In [8]:
from collections import namedtuple


def extract_logscore_others_dataframes_modified(
    dataframe_: pd.DataFrame,
):
    df = dataframe_.copy()
    df_logscore = df[(df["base_rule"] == "Logscore")]
    df_others = df[(df["base_rule"] != "Logscore")]

    dataframes_ = namedtuple(
        "SameDiffDF",
        [
            "logscore",
            "others",
        ],
    )
    return dataframes_(
        logscore=df_logscore,
        others=df_others,
    )

In [9]:
grouped_df = extract_logscore_others_dataframes_modified(
    dataframe_=full_ood_rocauc,
)

In [10]:
def enhance_latex_table(input_latex):
    lines = input_latex.split("\n")
    enhanced_lines = []

    for i, line in enumerate(lines):
        if "\\begin{tabular}" in line:
            # Start centering the table
            enhanced_lines.append(r"\begin{center}")

        if "\\toprule" in line:
            # Add multicolumn headers
            enhanced_lines.append(line)
            enhanced_lines.append(
                r"\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\"
            )
            enhanced_lines.append(r"\cmidrule(lr){1-2} \cmidrule(lr){3-7}")
            continue

        # Add row coloring
        if "\\midrule" in line:
            enhanced_lines.append(line)
            enhanced_lines.append(r"\rowcolor{gray!10}")
        elif "\\bottomrule" in line:
            enhanced_lines.append(r"\end{tabular}")
            enhanced_lines.append(r"\end{center}")
        else:
            enhanced_lines.append(line)

    return "\n".join(enhanced_lines)

In [11]:
index_pairs = [
    ("CIFAR10", "Blurred CIFAR10"),
    ("CIFAR10", "Blurred CIFAR100"),
    ("CIFAR10", "CIFAR100"),
    ("CIFAR10", "SVHN"),
    ("CIFAR100", "Blurred CIFAR10"),
    ("CIFAR100", "Blurred CIFAR100"),
    ("CIFAR100", "CIFAR10"),
    ("CIFAR100", "SVHN"),
]


def get_nice_df(df_):
    df_.index = pd.MultiIndex.from_tuples(index_pairs, names=["InD", "OOD"])
    df_.columns = [
        "Bayes",
        "Excess",
        "Total",
        # 'Bayes(O)',
        # 'Bayes(I)',
        # 'Total(O)',
        # 'Total(I)',
        "BI",
        "RBI",
        "EPBI",
        # 'Bias',
        # 'MV',
        # 'MVBI',
        # 'BiasBI',
    ]
    # df_ = df_[['Bayes', 'Excess', 'Total', 'BI', 'RBI']]
    df_ = (100 * df_).round(2)

    display(df_)

    return df_, df_.to_latex(float_format="%.2f")

In [20]:
# measures = [c for c in same_agg_df.columns if c not in ['OOD', 'InD', 'ScoringRule']]
# measures

# measures = [
#     'Bayes Outer',
#     'Bayes Inner',
#     'Total Outer',
#     'Total Inner',
#     'Bregman Information',
#     'Reverse Bregman Information',
#     'Expected Pairwise Bregman Information']

measures = [
    "Bayes",
    "Excess",
    "Total",
    "Bregman Information",
    "Reverse Bregman Information",
    "Expected Pairwise Bregman Information",
]

In [13]:
logscore_dict_mean, logscore_dict_std = collect_scores_into_dict_with_std(
    dataframes_list=[
        grouped_df.logscore,
    ],
    ood_detection_pairs=ood_detection_pairs_,
)
logscore_df_mean = pd.DataFrame.from_dict(logscore_dict_mean)
logscore_df_std = pd.DataFrame.from_dict(logscore_dict_std)

logscore_agg_df_mean = aggregate_over_measures(
    dataframe_=logscore_df_mean,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

logscore_agg_df_std = aggregate_over_measures(
    dataframe_=logscore_df_std,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

In [14]:
other_dict_mean, other_dict_std = collect_scores_into_dict_with_std(
    dataframes_list=[
        grouped_df.others,
    ],
    ood_detection_pairs=ood_detection_pairs_,
)
other_df_mean = pd.DataFrame.from_dict(other_dict_mean)
other_df_std = pd.DataFrame.from_dict(other_dict_std)

other_agg_df_mean = aggregate_over_measures(
    dataframe_=other_df_mean,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

other_agg_df_std = aggregate_over_measures(
    dataframe_=other_df_std,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

In [15]:
logscore_agg_df_mean

Unnamed: 0_level_0,Unnamed: 1_level_0,Total,Bayes,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
cifar10,blurred_cifar10,0.826258,0.810427,0.842324,0.84122,0.842985,0.842767,0.839513,0.841107,0.841453,0.84288
cifar10,blurred_cifar100,0.935675,0.926451,0.934064,0.93477,0.933366,0.934055,0.919269,0.933596,0.934144,0.933189
cifar10,cifar100,0.895048,0.896381,0.883373,0.884946,0.882108,0.883065,0.868268,0.884068,0.884295,0.881935
cifar10,svhn,0.93219,0.933032,0.918104,0.919379,0.917037,0.917895,0.900413,0.919156,0.919196,0.916232
cifar100,blurred_cifar10,0.884984,0.861705,0.840902,0.843844,0.837527,0.841336,0.815185,0.835672,0.840057,0.842064
cifar100,blurred_cifar100,0.727264,0.701509,0.747029,0.742435,0.749579,0.749073,0.737171,0.743822,0.744725,0.749647
cifar100,cifar10,0.780929,0.779954,0.709738,0.716799,0.703844,0.708572,0.665579,0.709412,0.712118,0.706327
cifar100,svhn,0.839809,0.836841,0.747345,0.754318,0.741123,0.746592,0.70946,0.74424,0.748289,0.747261


In [16]:
logscore_agg_df_std

Unnamed: 0_level_0,Unnamed: 1_level_0,Total,Bayes,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
cifar10,blurred_cifar10,0.006467,0.006724,0.006419,0.006456,0.006384,0.006417,0.005935,0.006469,0.006471,0.006354
cifar10,blurred_cifar100,0.003595,0.004193,0.002656,0.00273,0.002605,0.002633,0.002529,0.00265,0.002663,0.002673
cifar10,cifar100,0.001487,0.001616,0.001789,0.001781,0.001793,0.001793,0.002242,0.001736,0.001753,0.00191
cifar10,svhn,0.008434,0.009187,0.008702,0.008786,0.008664,0.008657,0.008815,0.008525,0.008603,0.008708
cifar100,blurred_cifar10,0.002484,0.002702,0.003909,0.003865,0.003955,0.003907,0.004143,0.003817,0.003808,0.004041
cifar100,blurred_cifar100,0.00339,0.003293,0.004043,0.003933,0.00411,0.004087,0.004602,0.003968,0.003976,0.004217
cifar100,cifar10,0.002233,0.00229,0.003403,0.003187,0.003593,0.00343,0.00471,0.003142,0.00313,0.003693
cifar100,svhn,0.013626,0.014056,0.017502,0.016796,0.018008,0.017701,0.021066,0.016531,0.016631,0.018819


In [17]:
other_agg_df_mean

Unnamed: 0_level_0,Unnamed: 1_level_0,Total,Bayes,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
cifar10,blurred_cifar10,0.822736,0.811172,0.79788,0.798112,0.796549,0.798979,0.509407,0.723799,0.798171,0.642769
cifar10,blurred_cifar100,0.932556,0.924893,0.897333,0.898327,0.895181,0.89849,0.520607,0.786157,0.898185,0.676995
cifar10,cifar100,0.893354,0.894017,0.837349,0.837926,0.836487,0.837635,0.511316,0.754593,0.837711,0.650429
cifar10,svhn,0.930043,0.930366,0.874657,0.875427,0.873656,0.874889,0.514843,0.777956,0.875229,0.663193
cifar100,blurred_cifar10,0.87129,0.855593,0.774596,0.78178,0.764461,0.777547,0.486373,0.673758,0.778797,0.572215
cifar100,blurred_cifar100,0.713643,0.696379,0.701115,0.70007,0.69959,0.703686,0.505876,0.637477,0.699466,0.578614
cifar100,cifar10,0.779421,0.778824,0.690515,0.695493,0.68556,0.690493,0.483653,0.624983,0.693714,0.541103
cifar100,svhn,0.824954,0.826959,0.699088,0.704894,0.694155,0.698214,0.478301,0.627434,0.702332,0.528436


In [18]:
other_agg_df_std

Unnamed: 0_level_0,Unnamed: 1_level_0,Total,Bayes,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
cifar10,blurred_cifar10,0.006442,0.006611,0.005935,0.005891,0.005872,0.006041,0.003605,0.004097,0.005891,0.006064
cifar10,blurred_cifar100,0.003562,0.003956,0.003338,0.003436,0.003308,0.00327,0.002391,0.00166,0.003428,0.002985
cifar10,cifar100,0.001507,0.001601,0.002024,0.002099,0.002023,0.001951,0.000989,0.001151,0.002105,0.001565
cifar10,svhn,0.008192,0.008565,0.012615,0.012789,0.012252,0.012804,0.007289,0.005276,0.012772,0.008323
cifar100,blurred_cifar10,0.002339,0.002418,0.003323,0.003347,0.003238,0.003386,0.001702,0.00244,0.003384,0.003184
cifar100,blurred_cifar100,0.002993,0.002983,0.003523,0.003517,0.00353,0.00352,0.002047,0.002367,0.003543,0.003348
cifar100,cifar10,0.001952,0.00204,0.002564,0.002533,0.002558,0.002602,0.00142,0.001675,0.002542,0.002373
cifar100,svhn,0.012836,0.013518,0.011459,0.011956,0.010805,0.011618,0.008487,0.007698,0.011892,0.012537


In [19]:
nice_logscore = get_nice_df(logscore_agg_df_mean[measures].copy())
enhanced_latex = enhance_latex_table(nice_logscore[1])
print(enhanced_latex)

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes,Excess,Total,BI,RBI,EPBI
InD,OOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CIFAR10,Blurred CIFAR10,81.04,84.23,82.63,84.12,84.3,84.28
CIFAR10,Blurred CIFAR100,92.65,93.41,93.57,93.48,93.34,93.41
CIFAR10,CIFAR100,89.64,88.34,89.5,88.49,88.21,88.31
CIFAR10,SVHN,93.3,91.81,93.22,91.94,91.7,91.79
CIFAR100,Blurred CIFAR10,86.17,84.09,88.5,84.38,83.75,84.13
CIFAR100,Blurred CIFAR100,70.15,74.7,72.73,74.24,74.96,74.91
CIFAR100,CIFAR10,78.0,70.97,78.09,71.68,70.38,70.86
CIFAR100,SVHN,83.68,74.73,83.98,75.43,74.11,74.66


\begin{center}
\begin{tabular}{llrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes & Excess & Total & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 81.04 & 84.23 & 82.63 & 84.12 & 84.30 & 84.28 \\
 & Blurred CIFAR100 & 92.65 & 93.41 & 93.57 & 93.48 & 93.34 & 93.41 \\
 & CIFAR100 & 89.64 & 88.34 & 89.50 & 88.49 & 88.21 & 88.31 \\
 & SVHN & 93.30 & 91.81 & 93.22 & 91.94 & 91.70 & 91.79 \\
\cline{1-8}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 86.17 & 84.09 & 88.50 & 84.38 & 83.75 & 84.13 \\
 & Blurred CIFAR100 & 70.15 & 74.70 & 72.73 & 74.24 & 74.96 & 74.91 \\
 & CIFAR10 & 78.00 & 70.97 & 78.09 & 71.68 & 70.38 & 70.86 \\
 & SVHN & 83.68 & 74.73 & 83.98 & 75.43 & 74.11 & 74.66 \\
\cline{1-8}
\end{tabular}
\end{center}
\end{tabular}



In [20]:
nice_logscore = get_nice_df(logscore_agg_df_std[measures].copy())
enhanced_latex = enhance_latex_table(nice_logscore[1])
print(enhanced_latex)

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes,Excess,Total,BI,RBI,EPBI
InD,OOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CIFAR10,Blurred CIFAR10,0.67,0.64,0.65,0.65,0.64,0.64
CIFAR10,Blurred CIFAR100,0.42,0.27,0.36,0.27,0.26,0.26
CIFAR10,CIFAR100,0.16,0.18,0.15,0.18,0.18,0.18
CIFAR10,SVHN,0.92,0.87,0.84,0.88,0.87,0.87
CIFAR100,Blurred CIFAR10,0.27,0.39,0.25,0.39,0.4,0.39
CIFAR100,Blurred CIFAR100,0.33,0.4,0.34,0.39,0.41,0.41
CIFAR100,CIFAR10,0.23,0.34,0.22,0.32,0.36,0.34
CIFAR100,SVHN,1.41,1.75,1.36,1.68,1.8,1.77


\begin{center}
\begin{tabular}{llrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes & Excess & Total & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 0.67 & 0.64 & 0.65 & 0.65 & 0.64 & 0.64 \\
 & Blurred CIFAR100 & 0.42 & 0.27 & 0.36 & 0.27 & 0.26 & 0.26 \\
 & CIFAR100 & 0.16 & 0.18 & 0.15 & 0.18 & 0.18 & 0.18 \\
 & SVHN & 0.92 & 0.87 & 0.84 & 0.88 & 0.87 & 0.87 \\
\cline{1-8}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 0.27 & 0.39 & 0.25 & 0.39 & 0.40 & 0.39 \\
 & Blurred CIFAR100 & 0.33 & 0.40 & 0.34 & 0.39 & 0.41 & 0.41 \\
 & CIFAR10 & 0.23 & 0.34 & 0.22 & 0.32 & 0.36 & 0.34 \\
 & SVHN & 1.41 & 1.75 & 1.36 & 1.68 & 1.80 & 1.77 \\
\cline{1-8}
\end{tabular}
\end{center}
\end{tabular}



In [21]:
nice_other = get_nice_df(other_agg_df_mean[measures].copy())
enhanced_latex = enhance_latex_table(nice_other[1])
print(enhanced_latex)

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes,Excess,Total,BI,RBI,EPBI
InD,OOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CIFAR10,Blurred CIFAR10,81.12,79.79,82.27,79.81,79.65,79.9
CIFAR10,Blurred CIFAR100,92.49,89.73,93.26,89.83,89.52,89.85
CIFAR10,CIFAR100,89.4,83.73,89.34,83.79,83.65,83.76
CIFAR10,SVHN,93.04,87.47,93.0,87.54,87.37,87.49
CIFAR100,Blurred CIFAR10,85.56,77.46,87.13,78.18,76.45,77.75
CIFAR100,Blurred CIFAR100,69.64,70.11,71.36,70.01,69.96,70.37
CIFAR100,CIFAR10,77.88,69.05,77.94,69.55,68.56,69.05
CIFAR100,SVHN,82.7,69.91,82.5,70.49,69.42,69.82


\begin{center}
\begin{tabular}{llrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes & Excess & Total & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 81.12 & 79.79 & 82.27 & 79.81 & 79.65 & 79.90 \\
 & Blurred CIFAR100 & 92.49 & 89.73 & 93.26 & 89.83 & 89.52 & 89.85 \\
 & CIFAR100 & 89.40 & 83.73 & 89.34 & 83.79 & 83.65 & 83.76 \\
 & SVHN & 93.04 & 87.47 & 93.00 & 87.54 & 87.37 & 87.49 \\
\cline{1-8}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 85.56 & 77.46 & 87.13 & 78.18 & 76.45 & 77.75 \\
 & Blurred CIFAR100 & 69.64 & 70.11 & 71.36 & 70.01 & 69.96 & 70.37 \\
 & CIFAR10 & 77.88 & 69.05 & 77.94 & 69.55 & 68.56 & 69.05 \\
 & SVHN & 82.70 & 69.91 & 82.50 & 70.49 & 69.42 & 69.82 \\
\cline{1-8}
\end{tabular}
\end{center}
\end{tabular}



In [22]:
nice_other = get_nice_df(other_agg_df_std[measures].copy())
enhanced_latex = enhance_latex_table(nice_other[1])
print(enhanced_latex)

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes,Excess,Total,BI,RBI,EPBI
InD,OOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
CIFAR10,Blurred CIFAR10,0.66,0.59,0.64,0.59,0.59,0.6
CIFAR10,Blurred CIFAR100,0.4,0.33,0.36,0.34,0.33,0.33
CIFAR10,CIFAR100,0.16,0.2,0.15,0.21,0.2,0.2
CIFAR10,SVHN,0.86,1.26,0.82,1.28,1.23,1.28
CIFAR100,Blurred CIFAR10,0.24,0.33,0.23,0.33,0.32,0.34
CIFAR100,Blurred CIFAR100,0.3,0.35,0.3,0.35,0.35,0.35
CIFAR100,CIFAR10,0.2,0.26,0.2,0.25,0.26,0.26
CIFAR100,SVHN,1.35,1.15,1.28,1.2,1.08,1.16


\begin{center}
\begin{tabular}{llrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes & Excess & Total & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 0.66 & 0.59 & 0.64 & 0.59 & 0.59 & 0.60 \\
 & Blurred CIFAR100 & 0.40 & 0.33 & 0.36 & 0.34 & 0.33 & 0.33 \\
 & CIFAR100 & 0.16 & 0.20 & 0.15 & 0.21 & 0.20 & 0.20 \\
 & SVHN & 0.86 & 1.26 & 0.82 & 1.28 & 1.23 & 1.28 \\
\cline{1-8}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 0.24 & 0.33 & 0.23 & 0.33 & 0.32 & 0.34 \\
 & Blurred CIFAR100 & 0.30 & 0.35 & 0.30 & 0.35 & 0.35 & 0.35 \\
 & CIFAR10 & 0.20 & 0.26 & 0.20 & 0.25 & 0.26 & 0.26 \\
 & SVHN & 1.35 & 1.15 & 1.28 & 1.20 & 1.08 & 1.16 \\
\cline{1-8}
\end{tabular}
\end{center}
\end{tabular}



In [10]:
def collect_scores_into_dict_miss_with_std(
    dataframes_list_,
):
    scores_dict_ = defaultdict(
        list, {val: [] for val in dataframes_list_[0].RiskType.unique()}
    )
    std_dict_ = defaultdict(
        list, {val: [] for val in dataframes_list_[0].RiskType.unique()}
    )

    for dataframe_ in dataframes_list_:
        for ind in dataframe_.training_dataset.unique():
            df_aux_ = dataframe_[(dataframe_["training_dataset"] == ind)]

            mean_rocauc_dict = dict(
                df_aux_.groupby(by=["RiskType"])
                .agg({"RocAucScoresMean": ["mean"]})[("RocAucScoresMean", "mean")]
                .reset_index()
                .values
            )
            std_rocauc_dict = dict(
                df_aux_.groupby(by=["RiskType"])
                .agg({"RocAucScoresStd": ["mean"]})[("RocAucScoresStd", "mean")]
                .reset_index()
                .values
            )
            next_iter = True
            for k in mean_rocauc_dict:
                if k in scores_dict_:
                    scores_dict_[k].append(mean_rocauc_dict[k])
                    std_dict_[k].append(std_rocauc_dict[k])
                    next_iter = False
            if next_iter:
                continue

            scores_dict_["InD"].append(ind)
            scores_dict_["ScoringRule"].append(df_aux_["LossFunction"].unique())

            std_dict_["InD"].append(ind)
            std_dict_["ScoringRule"].append(df_aux_["LossFunction"].unique())
    return scores_dict_, std_dict_

In [11]:
from collections import defaultdict

In [12]:
full_mis_rocauc = pd.read_csv("./tables/full_mis_rocauc_with_std.csv", index_col=0)

In [13]:
full_mis_rocauc = full_mis_rocauc[~full_mis_rocauc.UQMetric.str.endswith("Inner Inner")]

In [14]:
# DROP NEGLOG

full_mis_rocauc = full_mis_rocauc[full_mis_rocauc.base_rule != "Neglog"]

In [15]:
grouped_df = extract_logscore_others_dataframes_modified(
    dataframe_=full_mis_rocauc,
)

In [16]:
logscore_dict_mean, logscore_dict_std = collect_scores_into_dict_miss_with_std(
    dataframes_list_=[
        grouped_df.logscore,
    ],
)
logscore_df_mean = pd.DataFrame.from_dict(logscore_dict_mean)
logscore_df_std = pd.DataFrame.from_dict(logscore_dict_std)

logscore_agg_df_mean = aggregate_over_measures(
    dataframe_=logscore_df_mean,
    agg_func_="mean",
    by_=["InD"],
)

logscore_agg_df_std = aggregate_over_measures(
    dataframe_=logscore_df_std,
    agg_func_="mean",
    by_=["InD"],
)

In [17]:
other_dict_mean, other_dict_std = collect_scores_into_dict_miss_with_std(
    dataframes_list_=[
        grouped_df.others,
    ],
)
other_df_mean = pd.DataFrame.from_dict(other_dict_mean)
other_df_std = pd.DataFrame.from_dict(other_dict_std)

other_agg_df_mean = aggregate_over_measures(
    dataframe_=other_df_mean,
    agg_func_="mean",
    by_=["InD"],
)

other_agg_df_std = aggregate_over_measures(
    dataframe_=other_df_std,
    agg_func_="mean",
    by_=["InD"],
)

In [18]:
def enhance_latex_table(input_latex):
    lines = input_latex.split("\n")
    enhanced_lines = []

    for i, line in enumerate(lines):
        if "\\toprule" in line:
            # Add multicolumn headers
            enhanced_lines.append(line)
            enhanced_lines.append(
                r"\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\"
            )
            enhanced_lines.append(r"\cmidrule(lr){1-2} \cmidrule(lr){3-7}")
            continue

        # Add row coloring
        if "\\midrule" in line:
            enhanced_lines.append(line)
            enhanced_lines.append(r"\rowcolor{gray!10}")
        elif "\\bottomrule" in line:
            enhanced_lines.append(r"\end{tabular}")
        else:
            enhanced_lines.append(line)

    return "\n".join(enhanced_lines)


def get_nice_df(df_):
    df_.index = pd.Index(
        data=[
            "CIFAR10",
            "CIFAR100",
            "Missed class CIFAR10",
            "Noisy CIFAR10",
            "Noisy CIFAR100",
        ],
        name="InD",
    )
    df_.columns = [
        "Bayes",
        "Excess",
        "Total",
        # 'Bayes(O)',
        # 'Bayes(I)',
        # 'Total(O)',
        # 'Total(I)',
        "BI",
        "RBI",
        "EPBI",
        # 'Bias',
        # 'MV',
        # 'MVBI',
        # 'BiasBI',
    ]
    # df_ = df_[['Bayes', 'Excess', 'Total', 'BI', 'RBI']]
    df_ = (100 * df_).round(2)

    display(df_)

    return df_, df_.to_latex(float_format="%.2f")

In [21]:
nice_logscore = get_nice_df(logscore_agg_df_mean[measures].copy())
enhanced_latex = enhance_latex_table(nice_logscore[1])
print(enhanced_latex)

Unnamed: 0_level_0,Bayes,Excess,Total,BI,RBI,EPBI
InD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CIFAR10,93.57,92.97,93.77,93.05,92.89,92.95
CIFAR100,85.63,80.21,85.93,81.1,79.48,80.06
Missed class CIFAR10,90.05,90.72,91.27,90.44,90.87,90.86
Noisy CIFAR10,79.33,66.75,78.45,67.3,66.27,66.68
Noisy CIFAR100,80.94,68.96,80.87,69.75,68.25,68.88


\begin{tabular}{lrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 & Bayes & Excess & Total & BI & RBI & EPBI \\
InD &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
CIFAR10 & 93.57 & 92.97 & 93.77 & 93.05 & 92.89 & 92.95 \\
CIFAR100 & 85.63 & 80.21 & 85.93 & 81.10 & 79.48 & 80.06 \\
Missed class CIFAR10 & 90.05 & 90.72 & 91.27 & 90.44 & 90.87 & 90.86 \\
Noisy CIFAR10 & 79.33 & 66.75 & 78.45 & 67.30 & 66.27 & 66.68 \\
Noisy CIFAR100 & 80.94 & 68.96 & 80.87 & 69.75 & 68.25 & 68.88 \\
\end{tabular}
\end{tabular}



In [22]:
nice_logscore = get_nice_df(logscore_agg_df_std[measures].copy())
enhanced_latex = enhance_latex_table(nice_logscore[1])
print(enhanced_latex)

Unnamed: 0_level_0,Bayes,Excess,Total,BI,RBI,EPBI
InD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CIFAR10,0.22,0.25,0.21,0.25,0.25,0.25
CIFAR100,0.24,0.31,0.22,0.31,0.32,0.31
Missed class CIFAR10,6.01,3.9,4.05,4.39,3.62,3.69
Noisy CIFAR10,3.55,6.35,3.78,6.36,6.35,6.34
Noisy CIFAR100,0.52,0.91,0.54,0.91,0.92,0.91


\begin{tabular}{lrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 & Bayes & Excess & Total & BI & RBI & EPBI \\
InD &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
CIFAR10 & 0.22 & 0.25 & 0.21 & 0.25 & 0.25 & 0.25 \\
CIFAR100 & 0.24 & 0.31 & 0.22 & 0.31 & 0.32 & 0.31 \\
Missed class CIFAR10 & 6.01 & 3.90 & 4.05 & 4.39 & 3.62 & 3.69 \\
Noisy CIFAR10 & 3.55 & 6.35 & 3.78 & 6.36 & 6.35 & 6.34 \\
Noisy CIFAR100 & 0.52 & 0.91 & 0.54 & 0.91 & 0.92 & 0.91 \\
\end{tabular}
\end{tabular}



In [23]:
nice_logscore = get_nice_df(other_agg_df_mean[measures].copy())
enhanced_latex = enhance_latex_table(nice_logscore[1])
print(enhanced_latex)

Unnamed: 0_level_0,Bayes,Excess,Total,BI,RBI,EPBI
InD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CIFAR10,93.64,89.25,93.96,89.38,89.03,89.35
CIFAR100,86.62,79.25,87.21,80.24,78.07,79.44
Missed class CIFAR10,90.21,87.7,91.48,87.83,87.52,87.74
Noisy CIFAR10,79.13,75.47,80.36,75.77,74.91,75.71
Noisy CIFAR100,84.11,72.51,84.14,73.27,71.83,72.42


\begin{tabular}{lrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 & Bayes & Excess & Total & BI & RBI & EPBI \\
InD &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
CIFAR10 & 93.64 & 89.25 & 93.96 & 89.38 & 89.03 & 89.35 \\
CIFAR100 & 86.62 & 79.25 & 87.21 & 80.24 & 78.07 & 79.44 \\
Missed class CIFAR10 & 90.21 & 87.70 & 91.48 & 87.83 & 87.52 & 87.74 \\
Noisy CIFAR10 & 79.13 & 75.47 & 80.36 & 75.77 & 74.91 & 75.71 \\
Noisy CIFAR100 & 84.11 & 72.51 & 84.14 & 73.27 & 71.83 & 72.42 \\
\end{tabular}
\end{tabular}



In [24]:
nice_logscore = get_nice_df(other_agg_df_std[measures].copy())
enhanced_latex = enhance_latex_table(nice_logscore[1])
print(enhanced_latex)

Unnamed: 0_level_0,Bayes,Excess,Total,BI,RBI,EPBI
InD,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
CIFAR10,0.23,0.42,0.22,0.45,0.41,0.41
CIFAR100,0.22,0.32,0.2,0.32,0.32,0.32
Missed class CIFAR10,5.95,2.81,4.05,2.83,2.73,2.87
Noisy CIFAR10,2.97,3.73,2.98,3.9,3.65,3.64
Noisy CIFAR100,0.36,0.73,0.39,0.71,0.75,0.73


\begin{tabular}{lrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 & Bayes & Excess & Total & BI & RBI & EPBI \\
InD &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
CIFAR10 & 0.23 & 0.42 & 0.22 & 0.45 & 0.41 & 0.41 \\
CIFAR100 & 0.22 & 0.32 & 0.20 & 0.32 & 0.32 & 0.32 \\
Missed class CIFAR10 & 5.95 & 2.81 & 4.05 & 2.83 & 2.73 & 2.87 \\
Noisy CIFAR10 & 2.97 & 3.73 & 2.98 & 3.90 & 3.65 & 3.64 \\
Noisy CIFAR100 & 0.36 & 0.73 & 0.39 & 0.71 & 0.75 & 0.73 \\
\end{tabular}
\end{tabular}

