In [1]:
import pandas as pd
import numpy as np
from psruq.source.table_utils import (
    collect_scores_into_dict,
    extract_same_different_dataframes,
    ood_detection_pairs_,
    aggregate_over_measures,
)
from psruq.source.path_config import REPOSITORY_ROOT
from source.metrics.constants import GName
from source.losses.constants import LossName
from source.datasets.constants import DatasetName
from source.models.constants import ModelSource
from IPython.display import display

pd.set_option("display.max_rows", None)

stty: 'standard input': Inappropriate ioctl for device


In [2]:
REPOSITORY_ROOT

'/home/nkotelevskii/github/uncertainty_from_proper_scoring_rules'

In [3]:
# prefix = "imagenet_"
# # prefix = ""

# full_ood_rocauc = pd.read_pickle(
#     f"{REPOSITORY_ROOT}/tables/central_tables/{prefix}full_ood_rocauc.pkl"
# )


# full_ood_rocauc = pd.read_pickle(
#     f"{REPOSITORY_ROOT}/tables/central_tables/new_models_full_ood_rocauc.pkl"
# )

In [4]:
ind_dataset = DatasetName.TINY_IMAGENET.value
model_source = ModelSource.TORCH_UNCERTAINTY.value

full_ood_rocauc = pd.read_pickle(
    f"{REPOSITORY_ROOT}/tables/central_tables/final/{ind_dataset}_{model_source}_full_ood_rocauc.pkl"
)

In [5]:
full_ood_rocauc.sample(10)

Unnamed: 0,UQMetric,Dataset,LossFunction,RocAucScores_array,architecture,training_dataset,base_rule,RiskType
290,SphericalScore ExcessRisk outer outer,imagenet_r,CrossEntropy,"[0.7192343266666666, 0.7168223683333332, 0.718...",resnet18,tiny_imagenet,SphericalScore,outer outer
301,SphericalScore ExcessRisk inner outer,imagenet_a,CrossEntropy,"[0.7064904333333333, 0.7004888466666666, 0.709...",resnet18,tiny_imagenet,SphericalScore,inner outer
48,LogScore ExcessRisk inner outer,tiny_imagenet,CrossEntropy,"[0.5000000000000001, 0.5, 0.5, 0.4999999999999...",resnet18,tiny_imagenet,LogScore,inner outer
326,SphericalScore BayesRisk outer,imagenet_r,CrossEntropy,"[0.81478848, 0.8172575766666667, 0.82251637166...",resnet18,tiny_imagenet,SphericalScore,outer
141,BrierScore ExcessRisk inner central,imagenet_a,CrossEntropy,"[0.5, 0.5, 0.5, 0.5, 0.5]",resnet18,tiny_imagenet,BrierScore,inner central
18,LogScore TotalRisk inner inner,imagenet_r,CrossEntropy,"[0.826212925, 0.8288979750000001, 0.8345040666...",resnet18,tiny_imagenet,LogScore,inner inner
265,SphericalScore TotalRisk inner outer,imagenet_a,CrossEntropy,"[0.8307094733333333, 0.8329856533333333, 0.836...",resnet18,tiny_imagenet,SphericalScore,inner outer
248,ZeroOneScore BayesRisk central,tiny_imagenet,CrossEntropy,"[0.5, 0.5, 0.5, 0.49999999999999994, 0.4999999...",resnet18,tiny_imagenet,ZeroOneScore,central
293,SphericalScore ExcessRisk outer inner,imagenet_a,CrossEntropy,"[0.7276804400000001, 0.7226189666666667, 0.731...",resnet18,tiny_imagenet,SphericalScore,outer inner
238,ZeroOneScore ExcessRisk central central,imagenet_r,CrossEntropy,"[0.5, 0.5, 0.5, 0.5, 0.5]",resnet18,tiny_imagenet,ZeroOneScore,central central


In [6]:
full_ood_rocauc.columns

Index(['UQMetric', 'Dataset', 'LossFunction', 'RocAucScores_array',
       'architecture', 'training_dataset', 'base_rule', 'RiskType'],
      dtype='object')

In [7]:
full_ood_rocauc.Dataset.unique()

array(['tiny_imagenet', 'imagenet_a', 'imagenet_r', 'imagenet_o'],
      dtype=object)

In [8]:
type(full_ood_rocauc.RocAucScores_array.values[0])

list

In [9]:
# full_ood_rocauc[full_ood_rocauc.UQMetric == "LogScore energy outer"]

In [10]:
np.argmin(np.vstack(full_ood_rocauc.RocAucScores_array.values), axis=0)

array([ 12,  60, 268,  56, 180])

In [11]:
full_ood_rocauc.RiskType.unique()

array(['outer outer', 'outer inner', 'outer central', 'inner outer',
       'inner inner', 'inner central', 'central outer', 'central inner',
       'central central', 'outer', 'inner', 'central', 'energy outer',
       'energy inner'], dtype=object)

In [12]:
full_ood_rocauc.training_dataset.unique()

array(['tiny_imagenet'], dtype=object)

In [13]:
full_ood_rocauc.base_rule.unique()

array(['LogScore', 'BrierScore', 'ZeroOneScore', 'SphericalScore'],
      dtype=object)

In [14]:
# full_ood_rocauc.UQMetric.unique()

In [15]:
# full_ood_rocauc[full_ood_rocauc.training_dataset == 'cifar100']

In [16]:
def selector(
    df,
    ood_dataset,
    architecture,
    UQMetric,
):
    arr = np.array(
        df[
            (df.UQMetric == UQMetric)
            & (df.Dataset == ood_dataset)
            & (df.architecture == architecture)
        ].RocAucScores_array.values[0]
    )
    # print(arr)

    # return f"Mean: {arr.mean()}, Std: {arr.std()}"
    return float(arr.mean()), float(arr.std())

In [17]:
architecture = "resnet18"

In [18]:
def get_specific_stats(
    architecture_,
    loss_function_,
    base_rule_,
):
    selected_results = full_ood_rocauc[
        (full_ood_rocauc.base_rule == base_rule_)
        & (full_ood_rocauc.LossFunction == loss_function_)
    ]

    full_res = {}
    for ood_dataset in [el for el in full_ood_rocauc.Dataset.unique()]:
        res_dict = {}
        for uqmetric_name in [
            el for el in full_ood_rocauc.UQMetric.unique() if el.startswith(base_rule_)
        ]:
            # for uqmetric_name in [
            #     # f"{base_rule_} energy outer",
            #     # f"{base_rule_} energy inner",
            #     f"{base_rule_} ExcessRisk central outer",
            #     f"{base_rule_} ExcessRisk inner central",
            #     f"{base_rule_} ExcessRisk central inner",
            #     f"{base_rule_} ExcessRisk outer outer",
            #     f"{base_rule_} ExcessRisk central inner",
            #     f"{base_rule_} BayesRisk inner",
            # ]:
            mean, std = selector(
                df=selected_results,
                UQMetric=uqmetric_name,
                ood_dataset=ood_dataset,
                architecture=architecture_,
            )
            res_dict[uqmetric_name] = {"mean": mean, "std": std}
        full_res[ood_dataset] = res_dict

    return full_res

In [19]:
ce_full_res = get_specific_stats(
    architecture_=architecture,
    loss_function_=LossName.CROSS_ENTROPY.value,
    base_rule_=GName.LOG_SCORE.value,
)

pd.DataFrame.from_dict(ce_full_res).sort_index()

Unnamed: 0,tiny_imagenet,imagenet_a,imagenet_r,imagenet_o
LogScore BayesRisk central,"{'mean': 0.5, 'std': 4.965068306494546e-17}","{'mean': 0.8361467253333335, 'std': 0.00201284...","{'mean': 0.8267308406666667, 'std': 0.00367415...","{'mean': 0.72859282, 'std': 0.003018632720761152}"
LogScore BayesRisk inner,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.8375950986666666, 'std': 0.00236460...","{'mean': 0.8277554460000001, 'std': 0.00372613...","{'mean': 0.731827825, 'std': 0.002010999858155..."
LogScore BayesRisk outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.832205124, 'std': 0.002434524826144...","{'mean': 0.8222900506666667, 'std': 0.00394079...","{'mean': 0.722096485, 'std': 0.002156473536331..."
LogScore ExcessRisk central central,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.5, 'std': 0.0}","{'mean': 0.5, 'std': 0.0}","{'mean': 0.5, 'std': 0.0}"
LogScore ExcessRisk central inner,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.7040270466666667, 'std': 0.00445115...","{'mean': 0.6974803783333332, 'std': 0.00327481...","{'mean': 0.70313953, 'std': 0.0033326589720296..."
LogScore ExcessRisk central outer,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.7755801053333333, 'std': 0.00284380...","{'mean': 0.7701716660000001, 'std': 0.00171799...","{'mean': 0.7445801700000001, 'std': 0.00218360..."
LogScore ExcessRisk inner central,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.6594383266666666, 'std': 0.00440240...","{'mean': 0.653436132, 'std': 0.003666640682976...","{'mean': 0.6749291449999999, 'std': 0.00363165..."
LogScore ExcessRisk inner inner,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.5, 'std': 0.0}","{'mean': 0.5, 'std': 0.0}","{'mean': 0.5, 'std': 0.0}"
LogScore ExcessRisk inner outer,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.7593745893333333, 'std': 0.00360794...","{'mean': 0.7540450683333334, 'std': 0.00225334...","{'mean': 0.7407405300000001, 'std': 0.00180516..."
LogScore ExcessRisk outer central,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.7621196626666665, 'std': 0.00432383...","{'mean': 0.7555273603333333, 'std': 0.00214011...","{'mean': 0.73835863, 'std': 0.0017426875440249..."


In [45]:
full_mean_tab = None
full_std_tab = None

# for (loss_function_, base_rule_) in [
#     (LossName.CROSS_ENTROPY.value, GName.LOG_SCORE.value),
#     (LossName.BRIER_SCORE.value, GName.BRIER_SCORE.value),
#     (LossName.SPHERICAL_SCORE.value, GName.SPHERICAL_SCORE.value),
# ]:
for (loss_function_, base_rule_) in [
    # (LossName.CROSS_ENTROPY.value, GName.LOG_SCORE.value),
    # (LossName.CROSS_ENTROPY.value, GName.BRIER_SCORE.value),
    # (LossName.CROSS_ENTROPY.value, GName.SPHERICAL_SCORE.value),
    (LossName.CROSS_ENTROPY.value, GName.ZERO_ONE_SCORE.value),
]:

    print(loss_function_, base_rule_)
    ce_full_res = get_specific_stats(
        architecture_=architecture,
        loss_function_=loss_function_,
        base_rule_=base_rule_,
    )
    
    pd.DataFrame.from_dict(ce_full_res).sort_index()
    
    # df_aux = pd.DataFrame.from_dict(ce_full_res).loc[
    #     [
    #         f"{base_rule_} BayesRisk outer",
    #         f"{base_rule_} BayesRisk inner",
    #         f"{base_rule_} BayesRisk central",
    #         f"{base_rule_} ExcessRisk outer outer",
    #         f"{base_rule_} ExcessRisk outer inner",
    #         f"{base_rule_} ExcessRisk inner outer",
    #         f"{base_rule_} ExcessRisk outer central",
    #         f"{base_rule_} ExcessRisk central outer",
    #         f"{base_rule_} TotalRisk outer outer",
    #         f"{base_rule_} TotalRisk outer inner",
    #         f"{base_rule_} TotalRisk outer central",
    #         f"{base_rule_} TotalRisk central outer",
    #         f"{base_rule_} energy inner",
    #         f"{base_rule_} energy outer",
    #     ]
    # ]
    df_aux = pd.DataFrame.from_dict(ce_full_res)
    
    df_aux = df_aux.drop(columns=[ind_dataset])
    if DatasetName.CIFAR10_NOISY_LABEL.value in df_aux.columns:
        df_aux = df_aux.drop(columns=[DatasetName.CIFAR10_NOISY_LABEL.value])

    if DatasetName.CIFAR100_NOISY_LABEL.value in df_aux.columns:
        df_aux = df_aux.drop(columns=[DatasetName.CIFAR100_NOISY_LABEL.value])
    
    mean_tab = 100 * df_aux.applymap(lambda x: x['mean']).round(4)
    std_tab = 100 * df_aux.applymap(lambda x: x['std']).round(4)

    if full_mean_tab is None:
        full_mean_tab = mean_tab
        full_std_tab = std_tab
    else:
        full_mean_tab = pd.concat([full_mean_tab, mean_tab])
        full_std_tab = pd.concat([full_std_tab, std_tab])

CrossEntropy ZeroOneScore


  mean_tab = 100 * df_aux.applymap(lambda x: x['mean']).round(4)
  std_tab = 100 * df_aux.applymap(lambda x: x['std']).round(4)


In [46]:
full_mean_tab = full_mean_tab.sort_index()
full_mean_tab

Unnamed: 0,imagenet_a,imagenet_r,imagenet_o
ZeroOneScore BayesRisk central,82.41,81.41,73.18
ZeroOneScore BayesRisk inner,82.41,81.41,73.18
ZeroOneScore BayesRisk outer,82.27,81.2,71.9
ZeroOneScore ExcessRisk central central,50.0,50.0,50.0
ZeroOneScore ExcessRisk central inner,50.0,50.0,50.0
ZeroOneScore ExcessRisk central outer,67.15,67.28,65.96
ZeroOneScore ExcessRisk inner central,50.0,50.0,50.0
ZeroOneScore ExcessRisk inner inner,50.0,50.0,50.0
ZeroOneScore ExcessRisk inner outer,67.15,67.28,65.96
ZeroOneScore ExcessRisk outer central,70.83,70.66,69.13


In [47]:
full_std_tab = full_std_tab.sort_index()
full_std_tab

Unnamed: 0,imagenet_a,imagenet_r,imagenet_o
ZeroOneScore BayesRisk central,0.25,0.31,0.16
ZeroOneScore BayesRisk inner,0.25,0.31,0.16
ZeroOneScore BayesRisk outer,0.23,0.34,0.14
ZeroOneScore ExcessRisk central central,0.0,0.0,0.0
ZeroOneScore ExcessRisk central inner,0.0,0.0,0.0
ZeroOneScore ExcessRisk central outer,0.31,0.19,0.33
ZeroOneScore ExcessRisk inner central,0.0,0.0,0.0
ZeroOneScore ExcessRisk inner inner,0.0,0.0,0.0
ZeroOneScore ExcessRisk inner outer,0.31,0.19,0.33
ZeroOneScore ExcessRisk outer central,0.24,0.13,0.31


In [48]:
# full_mean_tab.to_csv("mean_tables.csv", index=True)

In [49]:
# full_std_tab.to_csv("std_tables.csv", index=True)

In [50]:
# There are two tables, one for the mean values of some quantify, second for its standard deviation. 
# I want you to provide me a dump of these two tables to Latex. Moreover, I need values in each cell to be in format mean \pm std.

In [51]:
# Merge the mean and std tables based on their index to create a combined LaTeX table.
merged_table = full_mean_tab.round(3).copy()

# For each numeric column, combine the mean and std in the format: mean \pm std
for col in full_mean_tab.columns:
    merged_table[col] = full_mean_tab.round(3)[col].astype(str) + " $\\pm$ " + full_std_tab[col].round(3).astype(str)

# Create LaTeX format
latex_table = merged_table.to_latex(index=True, escape=False, float_format="%.2f")

# Output the resulting LaTeX table for the user
print(latex_table)

\begin{tabular}{llll}
\toprule
 & imagenet_a & imagenet_r & imagenet_o \\
\midrule
ZeroOneScore BayesRisk central & 82.41 $\pm$ 0.25 & 81.41 $\pm$ 0.31 & 73.18 $\pm$ 0.16 \\
ZeroOneScore BayesRisk inner & 82.41 $\pm$ 0.25 & 81.41 $\pm$ 0.31 & 73.18 $\pm$ 0.16 \\
ZeroOneScore BayesRisk outer & 82.27 $\pm$ 0.23 & 81.2 $\pm$ 0.34 & 71.9 $\pm$ 0.14 \\
ZeroOneScore ExcessRisk central central & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 \\
ZeroOneScore ExcessRisk central inner & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 \\
ZeroOneScore ExcessRisk central outer & 67.15 $\pm$ 0.31 & 67.28 $\pm$ 0.19 & 65.96 $\pm$ 0.33 \\
ZeroOneScore ExcessRisk inner central & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 \\
ZeroOneScore ExcessRisk inner inner & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 \\
ZeroOneScore ExcessRisk inner outer & 67.15 $\pm$ 0.31 & 67.28 $\pm$ 0.19 & 65.96 $\pm$ 0.33 \\
ZeroOneScore ExcessRisk outer central & 70.83 $\pm$ 0.24 & 70.66 $\pm$ 0.13 & 69.13 $\pm

In [52]:
replace_dictionary = {
    "BayesRisk central": r"\(\Rtildebayes^{(3)}\)",
    "BayesRisk inner": r"\(\Rtildebayes^{(2)}\)",
    "BayesRisk outer": r"\(\Rtildebayes^{(1)}\)",
    "ExcessRisk central central": r"\(\Rtildeexc^{(3, 3)}\)",
    "ExcessRisk central inner": r"\(\Rtildeexc^{(3, 2)}\)",
    "ExcessRisk central outer": r"\(\Rtildeexc^{(3, 1)}\)",
    "ExcessRisk inner central": r"\(\Rtildeexc^{(2, 3)}\)",
    "ExcessRisk inner inner": r"\(\Rtildeexc^{(2, 2)}\)",
    "ExcessRisk inner outer": r"\(\Rtildeexc^{(2, 1)}\)",
    "ExcessRisk outer central": r"\(\Rtildeexc^{(1, 3)}\)",
    "ExcessRisk outer inner": r"\(\Rtildeexc^{(1, 2)}\)",
    "ExcessRisk outer outer": r"\(\Rtildeexc^{(1, 1)}\)",
    "TotalRisk central central": r"\(\Rtildetot^{(3, 3)}\)",
    "TotalRisk central inner": r"\(\Rtildetot^{(3, 2)}\)",
    "TotalRisk central outer": r"\(\Rtildetot^{(3, 1)}\)",
    "TotalRisk inner central": r"\(\Rtildetot^{(2, 3)}\)",
    "TotalRisk inner inner": r"\(\Rtildetot^{(2, 2)}\)",
    "TotalRisk inner outer": r"\(\Rtildetot^{(2, 1)}\)",
    "TotalRisk outer central": r"\(\Rtildetot^{(1, 3)}\)",
    "TotalRisk outer inner": r"\(\Rtildetot^{(1, 2)}\)",
    "TotalRisk outer outer": r"\(\Rtildetot^{(1, 1)}\)",
    "energy inner": r"\( E(x;\E_{\theta}f_{\theta}) \)",
    "energy outer": r"\( \E_{\theta} E(x;f_{\theta}) \)",
    
} 
for key in replace_dictionary.keys():
    latex_table = latex_table.replace(key, replace_dictionary[key])

print(latex_table)

\begin{tabular}{llll}
\toprule
 & imagenet_a & imagenet_r & imagenet_o \\
\midrule
ZeroOneScore \(\Rtildebayes^{(3)}\) & 82.41 $\pm$ 0.25 & 81.41 $\pm$ 0.31 & 73.18 $\pm$ 0.16 \\
ZeroOneScore \(\Rtildebayes^{(2)}\) & 82.41 $\pm$ 0.25 & 81.41 $\pm$ 0.31 & 73.18 $\pm$ 0.16 \\
ZeroOneScore \(\Rtildebayes^{(1)}\) & 82.27 $\pm$ 0.23 & 81.2 $\pm$ 0.34 & 71.9 $\pm$ 0.14 \\
ZeroOneScore \(\Rtildeexc^{(3, 3)}\) & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 \\
ZeroOneScore \(\Rtildeexc^{(3, 2)}\) & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 \\
ZeroOneScore \(\Rtildeexc^{(3, 1)}\) & 67.15 $\pm$ 0.31 & 67.28 $\pm$ 0.19 & 65.96 $\pm$ 0.33 \\
ZeroOneScore \(\Rtildeexc^{(2, 3)}\) & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 \\
ZeroOneScore \(\Rtildeexc^{(2, 2)}\) & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 & 50.0 $\pm$ 0.0 \\
ZeroOneScore \(\Rtildeexc^{(2, 1)}\) & 67.15 $\pm$ 0.31 & 67.28 $\pm$ 0.19 & 65.96 $\pm$ 0.33 \\
ZeroOneScore \(\Rtildeexc^{(1, 3)}\) & 70.83 $\pm$ 0.24 & 70.66 $\pm$ 0.1

In [41]:
# проверить: амплитуды значений?

# равномерное распределение для OOD? overconfident на InD?

In [42]:
ce_full_res = get_specific_stats(
    architecture_=architecture,
    loss_function_=LossName.CROSS_ENTROPY.value,
    base_rule_=GName.BRIER_SCORE.value,
)

pd.DataFrame.from_dict(ce_full_res)

Unnamed: 0,tiny_imagenet,imagenet_a,imagenet_r,imagenet_o
BrierScore TotalRisk outer outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.8174803706666666, 'std': 0.00248751...","{'mean': 0.8107769313333334, 'std': 0.00224206...","{'mean': 0.746820095, 'std': 0.002712686477295..."
BrierScore TotalRisk outer inner,"{'mean': 0.5, 'std': 4.965068306494546e-17}","{'mean': 0.8316045213333334, 'std': 0.00237264...","{'mean': 0.821363414, 'std': 0.003357853110047...","{'mean': 0.7329632500000001, 'std': 0.00180659..."
BrierScore TotalRisk outer central,"{'mean': 0.5, 'std': 4.965068306494546e-17}","{'mean': 0.8316045213333334, 'std': 0.00237264...","{'mean': 0.821363414, 'std': 0.003357853110047...","{'mean': 0.7329632500000001, 'std': 0.00180659..."
BrierScore TotalRisk inner outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.8174803746666666, 'std': 0.00248750...","{'mean': 0.810776933, 'std': 0.002242070834101...","{'mean': 0.74682014, 'std': 0.002712691918629889}"
BrierScore TotalRisk inner inner,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.8316045293333334, 'std': 0.00237265...","{'mean': 0.8213634216666665, 'std': 0.00335785...","{'mean': 0.7329632450000001, 'std': 0.00180657..."
BrierScore TotalRisk inner central,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.8316045293333334, 'std': 0.00237265...","{'mean': 0.8213634216666665, 'std': 0.00335785...","{'mean': 0.7329632450000001, 'std': 0.00180657..."
BrierScore TotalRisk central outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.8174803746666666, 'std': 0.00248750...","{'mean': 0.810776933, 'std': 0.002242070834101...","{'mean': 0.74682014, 'std': 0.002712691918629889}"
BrierScore TotalRisk central inner,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.8316045293333334, 'std': 0.00237265...","{'mean': 0.8213634216666665, 'std': 0.00335785...","{'mean': 0.7329632450000001, 'std': 0.00180657..."
BrierScore TotalRisk central central,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.8316045293333334, 'std': 0.00237265...","{'mean': 0.8213634216666665, 'std': 0.00335785...","{'mean': 0.7329632450000001, 'std': 0.00180657..."
BrierScore ExcessRisk outer outer,"{'mean': 0.5, 'std': 7.021666937153402e-17}","{'mean': 0.6546161960000001, 'std': 0.00387848...","{'mean': 0.6599426599999999, 'std': 0.00371557...","{'mean': 0.688149935, 'std': 0.002533815422075..."


In [43]:
ce_full_res = get_specific_stats(
    architecture_=architecture,
    loss_function_=LossName.CROSS_ENTROPY.value,
    base_rule_=GName.SPHERICAL_SCORE.value,
)

pd.DataFrame.from_dict(ce_full_res)

Unnamed: 0,tiny_imagenet,imagenet_a,imagenet_r,imagenet_o
SphericalScore TotalRisk outer outer,"{'mean': 0.5, 'std': 6.080941944488117e-17}","{'mean': 0.8325111546666667, 'std': 0.00227809...","{'mean': 0.8226636923333333, 'std': 0.00312053...","{'mean': 0.738567125, 'std': 0.001876281127856..."
SphericalScore TotalRisk outer inner,"{'mean': 0.5, 'std': 4.965068306494546e-17}","{'mean': 0.8316045320000001, 'std': 0.00237265...","{'mean': 0.82136341, 'std': 0.0033578633494768...","{'mean': 0.73296322, 'std': 0.0018065949192057..."
SphericalScore TotalRisk outer central,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.832745032, 'std': 0.002283770441074...","{'mean': 0.8228304893333334, 'std': 0.00317657...","{'mean': 0.7376285800000002, 'std': 0.00188845..."
SphericalScore TotalRisk inner outer,"{'mean': 0.5, 'std': 4.965068306494546e-17}","{'mean': 0.8325111506666666, 'std': 0.00227809...","{'mean': 0.8226636926666668, 'std': 0.00312053...","{'mean': 0.73856701, 'std': 0.0018762929314609..."
SphericalScore TotalRisk inner inner,"{'mean': 0.5, 'std': 8.599750569898517e-17}","{'mean': 0.831604528, 'std': 0.002372650629408...","{'mean': 0.8213634163333333, 'std': 0.00335785...","{'mean': 0.7329631400000001, 'std': 0.00180657..."
SphericalScore TotalRisk inner central,"{'mean': 0.5, 'std': 4.965068306494546e-17}","{'mean': 0.8327450373333335, 'std': 0.00228377...","{'mean': 0.8228304980000001, 'std': 0.00317658...","{'mean': 0.73762849, 'std': 0.0018884798684126..."
SphericalScore TotalRisk central outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.8047249013333333, 'std': 0.00259768...","{'mean': 0.7974148939999999, 'std': 0.00192912...","{'mean': 0.7411492899999998, 'std': 0.00233672..."
SphericalScore TotalRisk central inner,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.8062448506666667, 'std': 0.00240624...","{'mean': 0.7991751953333333, 'std': 0.00186108...","{'mean': 0.7421441200000001, 'std': 0.00245443..."
SphericalScore TotalRisk central central,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.8150740346666667, 'std': 0.00224791...","{'mean': 0.8067200393333334, 'std': 0.00230541...","{'mean': 0.7416704700000001, 'std': 0.00223813..."
SphericalScore ExcessRisk outer outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.7190379906666667, 'std': 0.00290410...","{'mean': 0.7191183720000001, 'std': 0.00182947...","{'mean': 0.71557963, 'std': 0.0020726510594766..."


In [44]:
ce_full_res = get_specific_stats(
    architecture_=architecture,
    loss_function_=LossName.CROSS_ENTROPY.value,
    base_rule_=GName.ZERO_ONE_SCORE.value,
)

pd.DataFrame.from_dict(ce_full_res)

Unnamed: 0,tiny_imagenet,imagenet_a,imagenet_r,imagenet_o
ZeroOneScore TotalRisk outer outer,"{'mean': 0.5, 'std': 7.447602459741819e-17}","{'mean': 0.825180248, 'std': 0.002314353636050...","{'mean': 0.8156331553333332, 'std': 0.00297307...","{'mean': 0.73349592, 'std': 0.0018959039083903..."
ZeroOneScore TotalRisk outer inner,"{'mean': 0.5, 'std': 9.930136612989092e-17}","{'mean': 0.824068012, 'std': 0.002455920371464...","{'mean': 0.8141347856666666, 'std': 0.00306674...","{'mean': 0.731773355, 'std': 0.001641490144761..."
ZeroOneScore TotalRisk outer central,"{'mean': 0.5, 'std': 9.930136612989092e-17}","{'mean': 0.824068012, 'std': 0.002455920371464...","{'mean': 0.8141347856666666, 'std': 0.00306674...","{'mean': 0.731773355, 'std': 0.001641490144761..."
ZeroOneScore TotalRisk inner outer,"{'mean': 0.5, 'std': 6.080941944488117e-17}","{'mean': 0.825180228, 'std': 0.002314357176795...","{'mean': 0.8156331510000001, 'std': 0.00297308...","{'mean': 0.733495855, 'std': 0.001895880162971..."
ZeroOneScore TotalRisk inner inner,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.824068012, 'std': 0.002455915659830...","{'mean': 0.8141347880000002, 'std': 0.00306673...","{'mean': 0.73177328, 'std': 0.0016414867588865..."
ZeroOneScore TotalRisk inner central,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.824068012, 'std': 0.002455915659830...","{'mean': 0.8141347880000002, 'std': 0.00306673...","{'mean': 0.73177328, 'std': 0.0016414867588865..."
ZeroOneScore TotalRisk central outer,"{'mean': 0.5, 'std': 6.080941944488117e-17}","{'mean': 0.825180228, 'std': 0.002314357176795...","{'mean': 0.8156331510000001, 'std': 0.00297308...","{'mean': 0.733495855, 'std': 0.001895880162971..."
ZeroOneScore TotalRisk central inner,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.824068012, 'std': 0.002455915659830...","{'mean': 0.8141347880000002, 'std': 0.00306673...","{'mean': 0.73177328, 'std': 0.0016414867588865..."
ZeroOneScore TotalRisk central central,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.824068012, 'std': 0.002455915659830...","{'mean': 0.8141347880000002, 'std': 0.00306673...","{'mean': 0.73177328, 'std': 0.0016414867588865..."
ZeroOneScore ExcessRisk outer outer,"{'mean': 0.5, 'std': 3.510833468576701e-17}","{'mean': 0.680455436, 'std': 0.003177184750543...","{'mean': 0.682064934, 'std': 0.001765297978431...","{'mean': 0.6822142099999999, 'std': 0.00265264..."


In [20]:
full_dataframe = pd.read_pickle(
    f"{REPOSITORY_ROOT}/tables/central_tables/full_dataframe.pkl"
)

In [21]:
np.vstack(full_dataframe["Scores"].values[0]).shape

(5, 10000)

In [22]:
full_dataframe.sample(10)

Unnamed: 0,UQMetric,LossFunction,Dataset,Scores,architecture,training_dataset,base_rule,RiskType
130,LogScore TotalRisk central inner,CrossEntropy,blurred_cifar10,"[[3.1486583, 3.875536, 2.3613393, 4.192751, 3....",resnet18,cifar100,LogScore,central inner
2748,SphericalScore BayesRisk central,CrossEntropy,cifar10c_4,"[[0.6546670727489119, 0.6584339565820865, 0.61...",resnet18,cifar10,SphericalScore,central
1343,BrierScore BayesRisk inner,SphericalScore,cifar100,"[[0.22786504, 0.7358822, 0.58551323, 0.8268909...",resnet18,cifar10,BrierScore,inner
571,BrierScore ExcessRisk outer inner,SphericalScore,cifar100,"[[0.003167109, 0.023984171, 0.003575621, 0.019...",resnet18,cifar100,BrierScore,outer inner
251,LogScore TotalRisk central inner,BrierScore,cifar10c_4,"[[2.0540764, 2.0483916, 2.4768152, 2.4612465, ...",resnet18,cifar10,LogScore,central inner
1872,ZeroOneScore ExcessRisk inner central,SphericalScore,svhn,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",resnet18,cifar10,ZeroOneScore,inner central
16,LogScore TotalRisk outer outer,BrierScore,blurred_cifar100,"[[1.1263578, 1.8448267, 2.4445064, 1.8783755, ...",resnet18,cifar10,LogScore,outer outer
2362,SphericalScore TotalRisk central central,BrierScore,cifar10c_3,"[[0.6374770876914801, 0.660290645614021, 0.637...",resnet18,cifar10,SphericalScore,central central
938,ZeroOneScore ExcessRisk outer inner,CrossEntropy,svhn,"[[0.13629013, 0.0, 0.02032382, 0.033893272, -7...",resnet18,cifar100,ZeroOneScore,outer inner
452,LogScore ExcessRisk inner inner,SphericalScore,cifar100,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",resnet18,cifar10,LogScore,inner inner


In [6]:
full_ood_rocauc.loc[
    (full_ood_rocauc.RiskType == "Bayes")
    & full_ood_rocauc.UQMetric.str.endswith("Outer"),
    "RiskType",
] = "Bayes Outer"
full_ood_rocauc.loc[
    (full_ood_rocauc.RiskType == "Bayes")
    & full_ood_rocauc.UQMetric.str.endswith("Inner"),
    "RiskType",
] = "Bayes Inner"

full_ood_rocauc.loc[
    (full_ood_rocauc.RiskType == "Total")
    & full_ood_rocauc.UQMetric.str.endswith("Outer"),
    "RiskType",
] = "Total Outer"
full_ood_rocauc.loc[
    (full_ood_rocauc.RiskType == "Total")
    & full_ood_rocauc.UQMetric.str.endswith("Inner"),
    "RiskType",
] = "Total Inner"

In [7]:
# trunc_df = full_ood_rocauc[
# ~full_ood_rocauc.RiskType.isin(['Bias', 'MV', 'MVBI', 'BiasBI', 'Bregman Information', 'Reverse Bregman Information']) &
# # full_ood_rocauc.base_rule.isin(['Brier', 'Logscore', 'Spherical']) &
# # full_ood_rocauc.LossFunction.isin(['Brier', 'Logscore', 'Spherical']) &
# ~(np.isclose(full_ood_rocauc.RocAucScore, np.float64(0.5)))
# ]

# # trunc_df.sort_values(by='RocAucScore')

# trunc_df.to_csv(os.path.join('tables', 'full_ood_rocauc_only_risks.csv'), index=False)

In [8]:
# full_ood_rocauc[
# (full_ood_rocauc.RiskType != 'Bias') &
# (full_ood_rocauc.base_rule == 'Neglog')
# ].sort_values(by=['RocAucScore'])

In [9]:
# full_ood_rocauc = full_ood_rocauc[full_ood_rocauc.base_rule != 'Neglog']

In [10]:
grouped_df = extract_same_different_dataframes(
    dataframe_=full_ood_rocauc,
)

In [11]:
same_dict, _ = collect_scores_into_dict(
    dataframes_list=[
        grouped_df.logscore_logscore,
        grouped_df.brier_brier,
        grouped_df.spherical_spherical,
    ],
    ood_detection_pairs=ood_detection_pairs_,
)
same_df = pd.DataFrame.from_dict(same_dict)

same_agg_df = aggregate_over_measures(
    dataframe_=same_df,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

In [12]:
different_dict, _ = collect_scores_into_dict(
    dataframes_list=[
        grouped_df.logscore_not_logscore,
        grouped_df.brier_not_brier,
        grouped_df.spherical_not_spherical,
    ],
    ood_detection_pairs=ood_detection_pairs_,
)
different_df = pd.DataFrame.from_dict(different_dict)

different_agg_df = aggregate_over_measures(
    dataframe_=different_df,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

In [13]:
all_dict, _ = collect_scores_into_dict(
    dataframes_list=[
        full_ood_rocauc,
    ],
    ood_detection_pairs=ood_detection_pairs_,
)
all_df = pd.DataFrame.from_dict(all_dict)

all_agg_df = aggregate_over_measures(
    dataframe_=all_df,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

In [14]:
display(all_agg_df)
display(same_agg_df)
display(different_agg_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,0.866536,0.85309,0.85309,0.826965,0.868306,0.8678,0.867567,0.869551,0.66066,0.802464,0.865765,0.750017
cifar10,blurred_cifar100,0.956324,0.9508,0.9508,0.935119,0.950863,0.951213,0.949054,0.952321,0.674925,0.863725,0.950518,0.787422
cifar10,cifar100,0.90591,0.90992,0.90992,0.909563,0.885812,0.887123,0.884213,0.8861,0.648434,0.818162,0.88798,0.744753
cifar10,svhn,0.941423,0.945143,0.945143,0.944335,0.92178,0.922266,0.920479,0.922596,0.661076,0.84447,0.924062,0.76313
cifar100,blurred_cifar10,0.878724,0.889896,0.889896,0.864338,0.784151,0.787492,0.769291,0.79567,0.541619,0.730607,0.807259,0.620265
cifar100,blurred_cifar100,0.747711,0.725706,0.725706,0.695672,0.73775,0.728504,0.739952,0.744794,0.568505,0.699635,0.744755,0.62885
cifar100,cifar10,0.752243,0.791126,0.791126,0.788203,0.658923,0.664299,0.650057,0.662414,0.482271,0.623093,0.671924,0.541911
cifar100,svhn,0.803006,0.849334,0.849334,0.848893,0.671667,0.679762,0.660314,0.674925,0.486484,0.625822,0.680426,0.539022


Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,0.862998,0.854926,0.854926,0.832594,0.878007,0.876301,0.879345,0.878376,0.742875,0.876733,0.876695,0.879063
cifar10,blurred_cifar100,0.956178,0.952183,0.952183,0.937482,0.956472,0.956019,0.956756,0.956642,0.788508,0.954771,0.955415,0.957182
cifar10,cifar100,0.907467,0.909927,0.909927,0.911068,0.90077,0.901993,0.899631,0.900687,0.754901,0.899628,0.900749,0.900815
cifar10,svhn,0.943443,0.945436,0.945436,0.945453,0.933077,0.933888,0.932275,0.933069,0.776912,0.93176,0.932753,0.933303
cifar100,blurred_cifar10,0.914538,0.89184,0.89184,0.867418,0.853572,0.858332,0.844763,0.85762,0.705564,0.84149,0.856035,0.860863
cifar100,blurred_cifar100,0.756761,0.726193,0.726193,0.701755,0.775527,0.767559,0.780346,0.778675,0.683394,0.767353,0.770708,0.779179
cifar100,cifar10,0.790967,0.794445,0.794445,0.792496,0.726959,0.734864,0.718211,0.727801,0.609855,0.722217,0.730338,0.72968
cifar100,svhn,0.843255,0.848508,0.848508,0.849544,0.73994,0.750673,0.728042,0.741104,0.625403,0.729552,0.742918,0.746241


Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,0.86742,0.852631,0.852631,0.825558,0.865881,0.865675,0.864623,0.867344,0.640106,0.783897,0.863032,0.717755
cifar10,blurred_cifar100,0.956361,0.950454,0.950454,0.934528,0.94946,0.950012,0.947128,0.951241,0.64653,0.840964,0.949293,0.744982
cifar10,cifar100,0.905521,0.909918,0.909918,0.909187,0.882072,0.883405,0.880358,0.882453,0.621818,0.797796,0.884787,0.705737
cifar10,svhn,0.940918,0.945069,0.945069,0.944056,0.918956,0.919361,0.91753,0.919977,0.632117,0.822647,0.92189,0.720587
cifar100,blurred_cifar10,0.869771,0.88941,0.88941,0.863569,0.766796,0.769782,0.750424,0.780182,0.500633,0.702887,0.795065,0.560116
cifar100,blurred_cifar100,0.745449,0.725584,0.725584,0.694151,0.728306,0.71874,0.729854,0.736324,0.539783,0.682705,0.738267,0.591268
cifar100,cifar10,0.742562,0.790296,0.790296,0.787129,0.641915,0.646658,0.633018,0.646067,0.450375,0.598313,0.65732,0.494969
cifar100,svhn,0.792944,0.849541,0.849541,0.848731,0.654599,0.662034,0.643382,0.658381,0.451754,0.59989,0.664803,0.487217


In [15]:
(same_agg_df - different_agg_df) / different_agg_df * 100

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,-0.509738,0.269179,0.269179,0.852162,1.400479,1.22738,1.702765,1.271907,16.054943,11.842995,1.583135,22.473856
cifar10,blurred_cifar100,-0.019136,0.181881,0.181881,0.316115,0.738526,0.63231,1.016533,0.567799,21.960057,13.532907,0.644933,28.483916
cifar10,cifar100,0.214914,0.001027,0.001027,0.206975,2.119807,2.10412,2.189179,2.066305,21.402198,12.764164,1.803994,27.641685
cifar10,svhn,0.268292,0.038803,0.038803,0.147923,1.536685,1.58017,1.607072,1.42303,22.906476,13.26357,1.178338,29.51983
cifar100,blurred_cifar10,5.147023,0.273158,0.273158,0.445772,11.31668,11.503281,12.571472,9.925638,40.934306,19.719178,7.668554,53.693719
cifar100,blurred_cifar100,1.517402,0.083983,0.083983,1.095461,6.483645,6.79228,6.918211,5.751633,26.605389,12.398986,4.39412,31.780974
cifar100,cifar10,6.518565,0.525031,0.525031,0.68181,13.248542,13.640329,13.458176,12.650998,35.410572,20.709056,11.108369,47.419366
cifar100,svhn,6.344892,-0.121529,-0.121529,0.095839,13.037129,13.388909,13.158715,12.564581,38.438683,21.614327,11.750093,53.163875


In [16]:
full_scores = pd.read_csv(
    "./tables/full_dataframe.csv",
)

In [19]:
def enhance_latex_table(input_latex):
    lines = input_latex.split("\n")
    enhanced_lines = []

    for i, line in enumerate(lines):
        if "\\begin{tabular}" in line:
            # Start centering the table
            enhanced_lines.append(r"\begin{center}")

        if "\\toprule" in line:
            # Add multicolumn headers
            enhanced_lines.append(line)
            enhanced_lines.append(
                r"\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\"
            )
            enhanced_lines.append(r"\cmidrule(lr){1-2} \cmidrule(lr){3-7}")
            continue

        # Add row coloring
        if "\\midrule" in line:
            enhanced_lines.append(line)
            enhanced_lines.append(r"\rowcolor{gray!10}")
        elif "\\bottomrule" in line:
            enhanced_lines.append(r"\end{tabular}")
            enhanced_lines.append(r"\end{center}")
        else:
            enhanced_lines.append(line)

    return "\n".join(enhanced_lines)

In [20]:
index_pairs = [
    ("CIFAR10", "Blurred CIFAR10"),
    ("CIFAR10", "Blurred CIFAR100"),
    ("CIFAR10", "CIFAR100"),
    ("CIFAR10", "SVHN"),
    ("CIFAR100", "Blurred CIFAR10"),
    ("CIFAR100", "Blurred CIFAR100"),
    ("CIFAR100", "CIFAR10"),
    ("CIFAR100", "SVHN"),
]


def get_nice_df(df_):
    df_.index = pd.MultiIndex.from_tuples(index_pairs, names=["InD", "OOD"])
    df_.columns = [
        # 'Bayes',
        # 'Excess',
        # 'Total',
        "Bayes(O)",
        "Bayes(I)",
        "Total(O)",
        "Total(I)",
        "BI",
        "RBI",
        "EPBI",
        # 'Bias',
        # 'MV',
        # 'MVBI',
        # 'BiasBI',
    ]
    # df_ = df_[['Bayes', 'Excess', 'Total', 'BI', 'RBI']]
    df_ = (100 * df_).round(2)

    display(df_)

    return df_, df_.to_latex(float_format="%.2f")

In [21]:
# measures = [c for c in same_agg_df.columns if c not in ['OOD', 'InD', 'ScoringRule']]
# measures

measures = [
    "Bayes Outer",
    "Bayes Inner",
    "Total Outer",
    "Total Inner",
    "Bregman Information",
    "Reverse Bregman Information",
    "Expected Pairwise Bregman Information",
]

# measures = ['Bayes', 'Excess', 'Total', 'Bregman Information', 'Reverse Bregman Information', 'Expected Pairwise Bregman Information']

In [22]:
nice_same = get_nice_df(same_agg_df[measures].copy())
enhanced_latex = enhance_latex_table(nice_same[1])
print(enhanced_latex)

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes(O),Bayes(I),Total(O),Total(I),BI,RBI,EPBI
InD,OOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CIFAR10,Blurred CIFAR10,83.26,85.49,86.3,85.49,87.63,87.93,87.84
CIFAR10,Blurred CIFAR100,93.75,95.22,95.62,95.22,95.6,95.68,95.66
CIFAR10,CIFAR100,91.11,90.99,90.75,90.99,90.2,89.96,90.07
CIFAR10,SVHN,94.55,94.54,94.34,94.54,93.39,93.23,93.31
CIFAR100,Blurred CIFAR10,86.74,89.18,91.45,89.18,85.83,84.48,85.76
CIFAR100,Blurred CIFAR100,70.18,72.62,75.68,72.62,76.76,78.03,77.87
CIFAR100,CIFAR10,79.25,79.44,79.1,79.44,73.49,71.82,72.78
CIFAR100,SVHN,84.95,84.85,84.33,84.85,75.07,72.8,74.11


\begin{center}
\begin{tabular}{llrrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes(O) & Bayes(I) & Total(O) & Total(I) & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 83.26 & 85.49 & 86.30 & 85.49 & 87.63 & 87.93 & 87.84 \\
 & Blurred CIFAR100 & 93.75 & 95.22 & 95.62 & 95.22 & 95.60 & 95.68 & 95.66 \\
 & CIFAR100 & 91.11 & 90.99 & 90.75 & 90.99 & 90.20 & 89.96 & 90.07 \\
 & SVHN & 94.55 & 94.54 & 94.34 & 94.54 & 93.39 & 93.23 & 93.31 \\
\cline{1-9}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 86.74 & 89.18 & 91.45 & 89.18 & 85.83 & 84.48 & 85.76 \\
 & Blurred CIFAR100 & 70.18 & 72.62 & 75.68 & 72.62 & 76.76 & 78.03 & 77.87 \\
 & CIFAR10 & 79.25 & 79.44 & 79.10 & 79.44 & 73.49 & 71.82 & 72.78 \\
 & SVHN & 84.95 & 84.85 & 84.33 & 84.85 & 75.07 & 72.80 & 74.11 \\
\cline{1-9}
\end{tabular}
\end{center}
\end{tabular}



In [23]:
nice_same[0].std()

Bayes(O)    8.124515
Bayes(I)    7.692756
Total(O)    7.170457
Total(I)    7.692756
BI          8.584582
RBI         9.116546
EPBI        8.741953
dtype: float64

In [24]:
nice_same[0].mean()

Bayes(O)    85.47375
Bayes(I)    86.54125
Total(O)    87.19625
Total(I)    86.54125
BI          84.74625
RBI         84.24125
EPBI        84.67500
dtype: float64

In [25]:
nice_different = get_nice_df(different_agg_df[measures].copy())
enhanced_latex = enhance_latex_table(nice_different[1])
print(enhanced_latex)

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes(O),Bayes(I),Total(O),Total(I),BI,RBI,EPBI
InD,OOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CIFAR10,Blurred CIFAR10,82.56,85.26,86.74,85.26,86.57,86.46,86.73
CIFAR10,Blurred CIFAR100,93.45,95.05,95.64,95.05,95.0,94.71,95.12
CIFAR10,CIFAR100,90.92,90.99,90.55,90.99,88.34,88.04,88.25
CIFAR10,SVHN,94.41,94.51,94.09,94.51,91.94,91.75,92.0
CIFAR100,Blurred CIFAR10,86.36,88.94,86.98,88.94,76.98,75.04,78.02
CIFAR100,Blurred CIFAR100,69.42,72.56,74.54,72.56,71.87,72.99,73.63
CIFAR100,CIFAR10,78.71,79.03,74.26,79.03,64.67,63.3,64.61
CIFAR100,SVHN,84.87,84.95,79.29,84.95,66.2,64.34,65.84


\begin{center}
\begin{tabular}{llrrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes(O) & Bayes(I) & Total(O) & Total(I) & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 82.56 & 85.26 & 86.74 & 85.26 & 86.57 & 86.46 & 86.73 \\
 & Blurred CIFAR100 & 93.45 & 95.05 & 95.64 & 95.05 & 95.00 & 94.71 & 95.12 \\
 & CIFAR100 & 90.92 & 90.99 & 90.55 & 90.99 & 88.34 & 88.04 & 88.25 \\
 & SVHN & 94.41 & 94.51 & 94.09 & 94.51 & 91.94 & 91.75 & 92.00 \\
\cline{1-9}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 86.36 & 88.94 & 86.98 & 88.94 & 76.98 & 75.04 & 78.02 \\
 & Blurred CIFAR100 & 69.42 & 72.56 & 74.54 & 72.56 & 71.87 & 72.99 & 73.63 \\
 & CIFAR10 & 78.71 & 79.03 & 74.26 & 79.03 & 64.67 & 63.30 & 64.61 \\
 & SVHN & 84.87 & 84.95 & 79.29 & 84.95 & 66.20 & 64.34 & 65.84 \\
\cline{1-9}
\end{tabular}
\end{center}
\end{tabular}



In [26]:
nice_different[0].mean()

Bayes(O)    85.08750
Bayes(I)    86.41125
Total(O)    85.26125
Total(I)    86.41125
BI          80.19625
RBI         79.57875
EPBI        80.52500
dtype: float64

In [27]:
nice_different[0].std()

Bayes(O)     8.324288
Bayes(I)     7.721749
Total(O)     8.370061
Total(I)     7.721749
BI          11.836036
RBI         12.289760
EPBI        11.745028
dtype: float64

In [28]:
print(
    enhance_latex_table(
        pd.concat([nice_same[0], nice_different[0]], axis=1).to_latex(
            float_format="%.2f"
        )
    )
)

\begin{center}
\begin{tabular}{llrrrrrrrrrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes(O) & Bayes(I) & Total(O) & Total(I) & BI & RBI & EPBI & Bayes(O) & Bayes(I) & Total(O) & Total(I) & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  &  &  &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 83.26 & 85.49 & 86.30 & 85.49 & 87.63 & 87.93 & 87.84 & 82.56 & 85.26 & 86.74 & 85.26 & 86.57 & 86.46 & 86.73 \\
 & Blurred CIFAR100 & 93.75 & 95.22 & 95.62 & 95.22 & 95.60 & 95.68 & 95.66 & 93.45 & 95.05 & 95.64 & 95.05 & 95.00 & 94.71 & 95.12 \\
 & CIFAR100 & 91.11 & 90.99 & 90.75 & 90.99 & 90.20 & 89.96 & 90.07 & 90.92 & 90.99 & 90.55 & 90.99 & 88.34 & 88.04 & 88.25 \\
 & SVHN & 94.55 & 94.54 & 94.34 & 94.54 & 93.39 & 93.23 & 93.31 & 94.41 & 94.51 & 94.09 & 94.51 & 91.94 & 91.75 & 92.00 \\
\cline{1-16}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 86.74 & 89.18 & 91.45

In [29]:
(same_agg_df - all_agg_df) > 0

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,False,True,True,True,True,True,True,True,True,True,True,True
cifar10,blurred_cifar100,False,True,True,True,True,True,True,True,True,True,True,True
cifar10,cifar100,True,True,True,True,True,True,True,True,True,True,True,True
cifar10,svhn,True,True,True,True,True,True,True,True,True,True,True,True
cifar100,blurred_cifar10,True,True,True,True,True,True,True,True,True,True,True,True
cifar100,blurred_cifar100,True,True,True,True,True,True,True,True,True,True,True,True
cifar100,cifar10,True,True,True,True,True,True,True,True,True,True,True,True
cifar100,svhn,True,False,False,True,True,True,True,True,True,True,True,True
