In [1]:
import pandas as pd
import numpy as np
from source.source.table_utils import (
    collect_scores_into_dict,
    extract_same_different_dataframes,
    ood_detection_pairs_,
    aggregate_over_measures,
)
from source.source.path_config import REPOSITORY_ROOT
from source.metrics.constants import GName
from source.losses.constants import LossName
from IPython.display import display

pd.set_option("display.max_rows", None)

stty: 'standard input': Inappropriate ioctl for device
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
REPOSITORY_ROOT

'/home/nkotelevskii/github/uncertainty_from_proper_scoring_rules'

In [3]:
# full_ood_rocauc = pd.read_pickle(
#     f"{REPOSITORY_ROOT}/tables/central_tables/full_ood_rocauc.pkl"
# )

full_ood_rocauc = pd.read_pickle(
    f"{REPOSITORY_ROOT}/tables/central_tables/new_models_full_ood_rocauc.pkl"
)

In [4]:
full_ood_rocauc.sample(10)

Unnamed: 0,UQMetric,Dataset,LossFunction,RocAucScores_array,architecture,training_dataset,base_rule,RiskType
226,SphericalScore ExcessRisk inner outer,cifar100,CrossEntropy,"[0.5, 0.5, 0.5, 0.49999999999999994, 0.5]",resnet18,cifar100,SphericalScore,inner outer
453,ZeroOneScore ExcessRisk inner central,cifar10c_3,CrossEntropy,"[0.5, 0.5, 0.5, 0.5, 0.5]",resnet18,cifar10,ZeroOneScore,inner central
345,ZeroOneScore TotalRisk outer inner,cifar100,CrossEntropy,"[0.46498675, 0.475269735, 0.47469197999999996,...",resnet18,cifar10,ZeroOneScore,outer inner
27,LogScore ExcessRisk outer outer,cifar10,CrossEntropy,"[0.938092135, 0.93635339, 0.9405588950000001, ...",resnet18,cifar100,LogScore,outer outer
675,LogScore energy outer,cifar10c_1,CrossEntropy,"[0.6407641086842106, 0.6380257989473685, 0.639...",resnet18,cifar10,LogScore,energy outer
38,LogScore TotalRisk inner inner,cifar10c_4,CrossEntropy,"[0.6527022389473685, 0.6621381084210527, 0.657...",resnet18,cifar10,LogScore,inner inner
528,SphericalScore TotalRisk inner outer,cifar10,CrossEntropy,"[0.49999999999999994, 0.5, 0.5, 0.5, 0.5]",resnet18,cifar10,SphericalScore,inner outer
667,SphericalScore BayesRisk central,cifar10c_1,CrossEntropy,"[0.5157192094736842, 0.5249959157894737, 0.520...",resnet18,cifar10,SphericalScore,central
243,BrierScore ExcessRisk outer outer,cifar10c_1,CrossEntropy,"[0.40996269815789477, 0.416791092631579, 0.412...",resnet18,cifar10,BrierScore,outer outer
95,BrierScore ExcessRisk outer inner,tiny_imagenet,CrossEntropy,"[0.49780814, 0.486238775, 0.4879622, 0.4935620...",resnet18,cifar100,BrierScore,outer inner


In [5]:
full_ood_rocauc.columns

Index(['UQMetric', 'Dataset', 'LossFunction', 'RocAucScores_array',
       'architecture', 'training_dataset', 'base_rule', 'RiskType'],
      dtype='object')

In [6]:
type(full_ood_rocauc.RocAucScores_array.values[0])

list

In [49]:
full_ood_rocauc[full_ood_rocauc.UQMetric == "LogScore energy outer"]

Unnamed: 0,UQMetric,Dataset,LossFunction,RocAucScores_array,architecture,training_dataset,base_rule,RiskType
672,LogScore energy outer,cifar10,CrossEntropy,"[0.5, 0.49999999999999994, 0.5000000000000001,...",resnet18,cifar10,LogScore,energy outer
673,LogScore energy outer,cifar100,CrossEntropy,"[0.98006484, 0.981468245, 0.981547905, 0.98112...",resnet18,cifar10,LogScore,energy outer
674,LogScore energy outer,tiny_imagenet,CrossEntropy,"[0.981349295, 0.983212575, 0.98279849, 0.98183...",resnet18,cifar10,LogScore,energy outer
675,LogScore energy outer,cifar10c_1,CrossEntropy,"[0.6407641086842106, 0.6380257989473685, 0.639...",resnet18,cifar10,LogScore,energy outer
676,LogScore energy outer,cifar10c_2,CrossEntropy,"[0.7179372907894737, 0.7177149255263158, 0.719...",resnet18,cifar10,LogScore,energy outer
677,LogScore energy outer,cifar10c_3,CrossEntropy,"[0.7746372402631578, 0.7784178381578948, 0.781...",resnet18,cifar10,LogScore,energy outer
678,LogScore energy outer,cifar10c_4,CrossEntropy,"[0.8327735331578947, 0.8408525463157894, 0.840...",resnet18,cifar10,LogScore,energy outer
679,LogScore energy outer,cifar10c_5,CrossEntropy,"[0.900485587105263, 0.9109105942105262, 0.9071...",resnet18,cifar10,LogScore,energy outer
252,LogScore energy outer,cifar10,CrossEntropy,"[0.019935159999999997, 0.018531754999999994, 0...",resnet18,cifar100,LogScore,energy outer
253,LogScore energy outer,cifar100,CrossEntropy,"[0.49999999999999994, 0.5, 0.5, 0.499999999999...",resnet18,cifar100,LogScore,energy outer


In [50]:
full_ood_rocauc.RiskType.unique()

array(['outer outer', 'outer inner', 'outer central', 'inner outer',
       'inner inner', 'inner central', 'central outer', 'central inner',
       'central central', 'outer', 'inner', 'central', 'energy outer',
       'energy inner'], dtype=object)

In [51]:
full_ood_rocauc.training_dataset.unique()

array(['cifar10', 'cifar100'], dtype=object)

In [52]:
# full_ood_rocauc.UQMetric.unique()

In [53]:
# full_ood_rocauc[full_ood_rocauc.training_dataset == 'cifar100']

In [54]:
def selector(
    df,
    ind_dataset,
    ood_dataset,
    architecture,
    UQMetric,
):
    arr = np.array(
        df[
            (df.UQMetric == UQMetric)
            & (df.training_dataset == ind_dataset)
            & (df.Dataset == ood_dataset)
            & (df.architecture == architecture)
        ].RocAucScores_array.values[0]
    )
    # print(arr)

    # return f"Mean: {arr.mean()}, Std: {arr.std()}"
    return float(arr.mean()), float(arr.std())

In [61]:
ind_dataset = "cifar10"
architecture = "resnet18"

In [62]:
def get_specific_stats(
    ind_dataset_,
    architecture_,
    loss_function_,
    base_rule_,
):
    selected_results = full_ood_rocauc[
        (full_ood_rocauc.base_rule == base_rule_)
        & (full_ood_rocauc.LossFunction == loss_function_)
    ]

    full_res = {}
    for ood_dataset in [el for el in full_ood_rocauc.Dataset.unique()]:
        if ind_dataset_ == "cifar100" and ood_dataset.find("cifar10c") > -1:
            continue
        res_dict = {}
        for uqmetric_name in [
            el for el in full_ood_rocauc.UQMetric.unique() if el.startswith(base_rule_)
        ]:
            # for uqmetric_name in [
            #     # f"{base_rule_} energy outer",
            #     # f"{base_rule_} energy inner",
            #     f"{base_rule_} ExcessRisk central outer",
            #     f"{base_rule_} ExcessRisk inner central",
            #     f"{base_rule_} ExcessRisk central inner",
            #     f"{base_rule_} ExcessRisk outer outer",
            #     f"{base_rule_} ExcessRisk central inner",
            #     f"{base_rule_} BayesRisk inner",
            # ]:
            mean, std = selector(
                df=selected_results,
                UQMetric=uqmetric_name,
                ind_dataset=ind_dataset_,
                ood_dataset=ood_dataset,
                architecture=architecture_,
            )
            res_dict[uqmetric_name] = {"mean": mean, "std": std}
        full_res[ood_dataset] = res_dict

    return full_res

In [63]:
ce_full_res = get_specific_stats(
    ind_dataset_=ind_dataset,
    architecture_=architecture,
    loss_function_=LossName.CROSS_ENTROPY.value,
    base_rule_=GName.LOG_SCORE.value,
)

pd.DataFrame.from_dict(ce_full_res)

Unnamed: 0,cifar10,cifar100,tiny_imagenet,cifar10c_1,cifar10c_2,cifar10c_3,cifar10c_4,cifar10c_5
LogScore TotalRisk outer outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.074017851, 'std': 0.001734156418099...","{'mean': 0.071707891, 'std': 0.001734455650065...","{'mean': 0.4069924781578947, 'std': 0.00242265...","{'mean': 0.3474448868421052, 'std': 0.00212649...","{'mean': 0.2959016695263158, 'std': 0.00364254...","{'mean': 0.23834166989473685, 'std': 0.0037864...","{'mean': 0.16092696805263157, 'std': 0.0037079..."
LogScore TotalRisk outer inner,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.738932402, 'std': 0.004931639915320...","{'mean': 0.751031595, 'std': 0.004431888171200...","{'mean': 0.5712767429473684, 'std': 0.00219910...","{'mean': 0.6113409926842106, 'std': 0.00191645...","{'mean': 0.6374094389473683, 'std': 0.00295417...","{'mean': 0.6589252737368421, 'std': 0.00346838...","{'mean': 0.6743769994736842, 'std': 0.00552889..."
LogScore TotalRisk outer central,"{'mean': 0.4999999999999999, 'std': 7.85046229...","{'mean': 0.695851049, 'std': 0.003294337354867...","{'mean': 0.6982019640000001, 'std': 0.00299253...","{'mean': 0.5633452051578948, 'std': 0.00272485...","{'mean': 0.6050494411052632, 'std': 0.00285031...","{'mean': 0.6331585744210526, 'std': 0.00203143...","{'mean': 0.6586229566842106, 'std': 0.00193275...","{'mean': 0.676103912, 'std': 0.003068163449436..."
LogScore TotalRisk inner outer,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.074017835, 'std': 0.001734152067533...","{'mean': 0.071707889, 'std': 0.001734445234832...","{'mean': 0.40699247621052637, 'std': 0.0024226...","{'mean': 0.34744488557894737, 'std': 0.0021264...","{'mean': 0.295901672368421, 'std': 0.003642539...","{'mean': 0.23834167121052632, 'std': 0.0037864...","{'mean': 0.1609269705263158, 'std': 0.00370793..."
LogScore TotalRisk inner inner,"{'mean': 0.5, 'std': 4.965068306494546e-17}","{'mean': 0.7389324009999999, 'std': 0.00493163...","{'mean': 0.751031603, 'std': 0.00443189212258222}","{'mean': 0.5712767486842105, 'std': 0.00219910...","{'mean': 0.6113409946842105, 'std': 0.00191645...","{'mean': 0.6374094387368421, 'std': 0.00295417...","{'mean': 0.6589252713157895, 'std': 0.00346838...","{'mean': 0.6743770019473684, 'std': 0.00552889..."
LogScore TotalRisk inner central,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.695851043, 'std': 0.003294325132537...","{'mean': 0.698201952, 'std': 0.002992537481791...","{'mean': 0.5633452060526316, 'std': 0.00272486...","{'mean': 0.6050494397894737, 'std': 0.00285031...","{'mean': 0.6331585738421053, 'std': 0.00203143...","{'mean': 0.6586229556842105, 'std': 0.00193275...","{'mean': 0.6761039127894737, 'std': 0.00306816..."
LogScore TotalRisk central outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.111454537, 'std': 0.002278588249167...","{'mean': 0.10958946899999997, 'std': 0.0028031...","{'mean': 0.4226652882631579, 'std': 0.00231219...","{'mean': 0.36923794131578946, 'std': 0.0025461...","{'mean': 0.32213396884210527, 'std': 0.0034769...","{'mean': 0.2677508244736842, 'std': 0.00306150...","{'mean': 0.19276069810526314, 'std': 0.0030333..."
LogScore TotalRisk central inner,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.528128734, 'std': 0.003828042850578...","{'mean': 0.533172231, 'std': 0.003471510683337...","{'mean': 0.5084340510526315, 'std': 0.00272826...","{'mean': 0.5140151617894737, 'std': 0.00324491...","{'mean': 0.516691086263158, 'std': 0.004243659...","{'mean': 0.5168559441578948, 'std': 0.00423082...","{'mean': 0.5034582301578947, 'std': 0.00467226..."
LogScore TotalRisk central central,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.5582162329999999, 'std': 0.00439414...","{'mean': 0.567766295, 'std': 0.00525284897974995}","{'mean': 0.5118606812631579, 'std': 0.00262820...","{'mean': 0.5178261409473683, 'std': 0.00319870...","{'mean': 0.5209836949473685, 'std': 0.00418280...","{'mean': 0.520756144368421, 'std': 0.004424050...","{'mean': 0.5118789419473685, 'std': 0.00556339..."
LogScore ExcessRisk outer outer,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.06140412799999999, 'std': 0.0014787...","{'mean': 0.059079321, 'std': 0.001405158029993...","{'mean': 0.39916192252631577, 'std': 0.0023776...","{'mean': 0.33525018605263157, 'std': 0.0021301...","{'mean': 0.2812022843157895, 'std': 0.00355298...","{'mean': 0.22233548652631577, 'std': 0.0036597...","{'mean': 0.14569223126315795, 'std': 0.0036099..."


In [64]:
# проверить: амплитуды значений?

# равномерное распределение для OOD? overconfident на InD?

In [65]:
ce_full_res = get_specific_stats(
    ind_dataset_=ind_dataset,
    architecture_=architecture,
    loss_function_=LossName.CROSS_ENTROPY.value,
    base_rule_=GName.BRIER_SCORE.value,
)

pd.DataFrame.from_dict(ce_full_res)

Unnamed: 0,cifar10,cifar100,tiny_imagenet,cifar10c_1,cifar10c_2,cifar10c_3,cifar10c_4,cifar10c_5
BrierScore TotalRisk outer outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.249963313, 'std': 0.001833133044041...","{'mean': 0.250345237, 'std': 0.001793062970920...","{'mean': 0.4439912003684211, 'std': 0.00290520...","{'mean': 0.41056504463157895, 'std': 0.0021179...","{'mean': 0.38331375005263163, 'std': 0.0030394...","{'mean': 0.35297900063157894, 'std': 0.0025634...","{'mean': 0.30724522505263163, 'std': 0.0019086..."
BrierScore TotalRisk outer inner,"{'mean': 0.5, 'std': 3.510833468576701e-17}","{'mean': 0.6377423, 'std': 0.004656957724005007}","{'mean': 0.647366251, 'std': 0.004724138458388...","{'mean': 0.5534629442631579, 'std': 0.00252060...","{'mean': 0.5808575871052633, 'std': 0.00201807...","{'mean': 0.5960359574210526, 'std': 0.00298327...","{'mean': 0.605817195, 'std': 0.002915038599842...","{'mean': 0.6049005232631578, 'std': 0.00437398..."
BrierScore TotalRisk outer central,"{'mean': 0.5, 'std': 3.510833468576701e-17}","{'mean': 0.6377423, 'std': 0.004656957724005007}","{'mean': 0.647366251, 'std': 0.004724138458388...","{'mean': 0.5534629442631579, 'std': 0.00252060...","{'mean': 0.5808575871052633, 'std': 0.00201807...","{'mean': 0.5960359574210526, 'std': 0.00298327...","{'mean': 0.605817195, 'std': 0.002915038599842...","{'mean': 0.6049005232631578, 'std': 0.00437398..."
BrierScore TotalRisk inner outer,"{'mean': 0.5, 'std': 4.965068306494546e-17}","{'mean': 0.24996332599999999, 'std': 0.0018331...","{'mean': 0.25034524999999996, 'std': 0.0017930...","{'mean': 0.44399130389473684, 'std': 0.0029052...","{'mean': 0.4105651715263158, 'std': 0.00211796...","{'mean': 0.3833138067368421, 'std': 0.00303941...","{'mean': 0.35297901726315795, 'std': 0.0025634...","{'mean': 0.3072452255263158, 'std': 0.00190869..."
BrierScore TotalRisk inner inner,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.637742297, 'std': 0.004656964288656...","{'mean': 0.647366272, 'std': 0.004724167199670...","{'mean': 0.5534629169473684, 'std': 0.00252059...","{'mean': 0.5808575839473684, 'std': 0.00201806...","{'mean': 0.5960359676842105, 'std': 0.00298327...","{'mean': 0.6058171955789474, 'std': 0.00291501...","{'mean': 0.6049005307368421, 'std': 0.00437397..."
BrierScore TotalRisk inner central,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.637742297, 'std': 0.004656964288656...","{'mean': 0.647366272, 'std': 0.004724167199670...","{'mean': 0.5534629169473684, 'std': 0.00252059...","{'mean': 0.5808575839473684, 'std': 0.00201806...","{'mean': 0.5960359676842105, 'std': 0.00298327...","{'mean': 0.6058171955789474, 'std': 0.00291501...","{'mean': 0.6049005307368421, 'std': 0.00437397..."
BrierScore TotalRisk central outer,"{'mean': 0.5, 'std': 4.965068306494546e-17}","{'mean': 0.24996332599999999, 'std': 0.0018331...","{'mean': 0.25034524999999996, 'std': 0.0017930...","{'mean': 0.44399130389473684, 'std': 0.0029052...","{'mean': 0.4105651715263158, 'std': 0.00211796...","{'mean': 0.3833138067368421, 'std': 0.00303941...","{'mean': 0.35297901726315795, 'std': 0.0025634...","{'mean': 0.3072452255263158, 'std': 0.00190869..."
BrierScore TotalRisk central inner,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.637742297, 'std': 0.004656964288656...","{'mean': 0.647366272, 'std': 0.004724167199670...","{'mean': 0.5534629169473684, 'std': 0.00252059...","{'mean': 0.5808575839473684, 'std': 0.00201806...","{'mean': 0.5960359676842105, 'std': 0.00298327...","{'mean': 0.6058171955789474, 'std': 0.00291501...","{'mean': 0.6049005307368421, 'std': 0.00437397..."
BrierScore TotalRisk central central,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.637742297, 'std': 0.004656964288656...","{'mean': 0.647366272, 'std': 0.004724167199670...","{'mean': 0.5534629169473684, 'std': 0.00252059...","{'mean': 0.5808575839473684, 'std': 0.00201806...","{'mean': 0.5960359676842105, 'std': 0.00298327...","{'mean': 0.6058171955789474, 'std': 0.00291501...","{'mean': 0.6049005307368421, 'std': 0.00437397..."
BrierScore ExcessRisk outer outer,"{'mean': 0.5, 'std': 5.551115123125783e-17}","{'mean': 0.131446648, 'std': 0.001679035210665...","{'mean': 0.128409416, 'std': 0.001011507365051...","{'mean': 0.4125977447894737, 'std': 0.00235317...","{'mean': 0.3581586184210526, 'std': 0.00177365...","{'mean': 0.3144913402105263, 'std': 0.00245601...","{'mean': 0.268687861, 'std': 0.001834386744040...","{'mean': 0.2084989978421053, 'std': 0.00202853..."


In [66]:
ce_full_res = get_specific_stats(
    ind_dataset_=ind_dataset,
    architecture_=architecture,
    loss_function_=LossName.CROSS_ENTROPY.value,
    base_rule_=GName.SPHERICAL_SCORE.value,
)

pd.DataFrame.from_dict(ce_full_res)

Unnamed: 0,cifar10,cifar100,tiny_imagenet,cifar10c_1,cifar10c_2,cifar10c_3,cifar10c_4,cifar10c_5
SphericalScore TotalRisk outer outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.503431611, 'std': 0.004440329003798...","{'mean': 0.5072789759999999, 'std': 0.00388667...","{'mean': 0.5267678396315791, 'std': 0.00296437...","{'mean': 0.5371091722631579, 'std': 0.00244944...","{'mean': 0.5381776735263158, 'std': 0.00338967...","{'mean': 0.5318393635789475, 'std': 0.00323084...","{'mean': 0.5075680440526316, 'std': 0.00349469..."
SphericalScore TotalRisk outer inner,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.637742216, 'std': 0.004656958276935...","{'mean': 0.6473662320000001, 'std': 0.00472419...","{'mean': 0.553462872, 'std': 0.002520570443348...","{'mean': 0.5808574952105264, 'std': 0.00201804...","{'mean': 0.5960358835263159, 'std': 0.00298328...","{'mean': 0.6058171240526316, 'std': 0.00291503...","{'mean': 0.604900492736842, 'std': 0.004373986..."
SphericalScore TotalRisk outer central,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.589531358, 'std': 0.004763501044204...","{'mean': 0.5975896440000001, 'std': 0.00499693...","{'mean': 0.5433782741052632, 'std': 0.00266010...","{'mean': 0.5647019836842105, 'std': 0.00214008...","{'mean': 0.574808258, 'std': 0.003239698325886...","{'mean': 0.5786325971578947, 'std': 0.00309390...","{'mean': 0.5685563622105263, 'std': 0.00409248..."
SphericalScore TotalRisk inner outer,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.5034316520000001, 'std': 0.00444033...","{'mean': 0.507278947, 'std': 0.003886670582998...","{'mean': 0.5267678912105265, 'std': 0.00296440...","{'mean': 0.5371092810526317, 'std': 0.00244952...","{'mean': 0.5381777201578948, 'std': 0.00338971...","{'mean': 0.5318394416842105, 'std': 0.00323087...","{'mean': 0.5075680872631578, 'std': 0.00349467..."
SphericalScore TotalRisk inner inner,"{'mean': 0.5, 'std': 2.482534153247273e-17}","{'mean': 0.6377422239999999, 'std': 0.00465697...","{'mean': 0.6473662179999999, 'std': 0.00472419...","{'mean': 0.5534628352631579, 'std': 0.00252061...","{'mean': 0.5808574991052631, 'std': 0.00201807...","{'mean': 0.5960358659473683, 'std': 0.00298329...","{'mean': 0.6058171142105263, 'std': 0.00291503...","{'mean': 0.6049004874210527, 'std': 0.00437399..."
SphericalScore TotalRisk inner central,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.5895313700000001, 'std': 0.00476349...","{'mean': 0.5975896559999999, 'std': 0.00499694...","{'mean': 0.543378250631579, 'std': 0.002660099...","{'mean': 0.5647019737894736, 'std': 0.00214007...","{'mean': 0.5748082709473684, 'std': 0.00323968...","{'mean': 0.5786325907368421, 'std': 0.00309390...","{'mean': 0.5685563518947369, 'std': 0.00409248..."
SphericalScore TotalRisk central outer,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.282841622, 'std': 0.002463684756850...","{'mean': 0.284386778, 'std': 0.002983227072828...","{'mean': 0.443983534, 'std': 0.003023097000565...","{'mean': 0.4126137557894737, 'std': 0.00231349...","{'mean': 0.387951479368421, 'std': 0.003241217...","{'mean': 0.3616254173684211, 'std': 0.00275891...","{'mean': 0.32177591736842104, 'std': 0.0022191..."
SphericalScore TotalRisk central inner,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.32899820999999996, 'std': 0.0032268...","{'mean': 0.330583214, 'std': 0.003788786941444...","{'mean': 0.4644587994736842, 'std': 0.00266300...","{'mean': 0.4434531691578948, 'std': 0.00226690...","{'mean': 0.42419761294736846, 'std': 0.0030180...","{'mean': 0.40090522557894737, 'std': 0.0029690...","{'mean': 0.3606032601052632, 'std': 0.00293634..."
SphericalScore TotalRisk central central,"{'mean': 0.5, 'std': 0.0}","{'mean': 0.47990783400000003, 'std': 0.0041883...","{'mean': 0.483566654, 'std': 0.003983420102447...","{'mean': 0.5201028204210527, 'std': 0.00313758...","{'mean': 0.5268349826315789, 'std': 0.00233905...","{'mean': 0.5250314250526316, 'std': 0.00337971...","{'mean': 0.5154642967368421, 'std': 0.00328300...","{'mean': 0.48603547905263156, 'std': 0.0034660..."
SphericalScore ExcessRisk outer outer,"{'mean': 0.5, 'std': 4.2998752849492583e-17}","{'mean': 0.17737180600000002, 'std': 0.0017044...","{'mean': 0.17453259599999998, 'std': 0.0011751...","{'mean': 0.42437893863157894, 'std': 0.0026816...","{'mean': 0.3786926705263158, 'std': 0.00209953...","{'mean': 0.3418497851052632, 'std': 0.00276889...","{'mean': 0.3022260901052632, 'std': 0.00212832...","{'mean': 0.24759551768421048, 'std': 0.0018592..."


In [30]:
full_dataframe = pd.read_pickle(
    f"{REPOSITORY_ROOT}/tables/central_tables/full_dataframe.pkl"
)

In [33]:
np.vstack(full_dataframe["Scores"].values[0]).shape

(5, 10000)

In [6]:
full_ood_rocauc.loc[
    (full_ood_rocauc.RiskType == "Bayes")
    & full_ood_rocauc.UQMetric.str.endswith("Outer"),
    "RiskType",
] = "Bayes Outer"
full_ood_rocauc.loc[
    (full_ood_rocauc.RiskType == "Bayes")
    & full_ood_rocauc.UQMetric.str.endswith("Inner"),
    "RiskType",
] = "Bayes Inner"

full_ood_rocauc.loc[
    (full_ood_rocauc.RiskType == "Total")
    & full_ood_rocauc.UQMetric.str.endswith("Outer"),
    "RiskType",
] = "Total Outer"
full_ood_rocauc.loc[
    (full_ood_rocauc.RiskType == "Total")
    & full_ood_rocauc.UQMetric.str.endswith("Inner"),
    "RiskType",
] = "Total Inner"

In [7]:
# trunc_df = full_ood_rocauc[
# ~full_ood_rocauc.RiskType.isin(['Bias', 'MV', 'MVBI', 'BiasBI', 'Bregman Information', 'Reverse Bregman Information']) &
# # full_ood_rocauc.base_rule.isin(['Brier', 'Logscore', 'Spherical']) &
# # full_ood_rocauc.LossFunction.isin(['Brier', 'Logscore', 'Spherical']) &
# ~(np.isclose(full_ood_rocauc.RocAucScore, np.float64(0.5)))
# ]

# # trunc_df.sort_values(by='RocAucScore')

# trunc_df.to_csv(os.path.join('tables', 'full_ood_rocauc_only_risks.csv'), index=False)

In [8]:
# full_ood_rocauc[
# (full_ood_rocauc.RiskType != 'Bias') &
# (full_ood_rocauc.base_rule == 'Neglog')
# ].sort_values(by=['RocAucScore'])

In [9]:
# full_ood_rocauc = full_ood_rocauc[full_ood_rocauc.base_rule != 'Neglog']

In [10]:
grouped_df = extract_same_different_dataframes(
    dataframe_=full_ood_rocauc,
)

In [11]:
same_dict, _ = collect_scores_into_dict(
    dataframes_list=[
        grouped_df.logscore_logscore,
        grouped_df.brier_brier,
        grouped_df.spherical_spherical,
    ],
    ood_detection_pairs=ood_detection_pairs_,
)
same_df = pd.DataFrame.from_dict(same_dict)

same_agg_df = aggregate_over_measures(
    dataframe_=same_df,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

In [12]:
different_dict, _ = collect_scores_into_dict(
    dataframes_list=[
        grouped_df.logscore_not_logscore,
        grouped_df.brier_not_brier,
        grouped_df.spherical_not_spherical,
    ],
    ood_detection_pairs=ood_detection_pairs_,
)
different_df = pd.DataFrame.from_dict(different_dict)

different_agg_df = aggregate_over_measures(
    dataframe_=different_df,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

In [13]:
all_dict, _ = collect_scores_into_dict(
    dataframes_list=[
        full_ood_rocauc,
    ],
    ood_detection_pairs=ood_detection_pairs_,
)
all_df = pd.DataFrame.from_dict(all_dict)

all_agg_df = aggregate_over_measures(
    dataframe_=all_df,
    agg_func_="mean",
    by_=["InD", "OOD"],
)

In [14]:
display(all_agg_df)
display(same_agg_df)
display(different_agg_df)

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,0.866536,0.85309,0.85309,0.826965,0.868306,0.8678,0.867567,0.869551,0.66066,0.802464,0.865765,0.750017
cifar10,blurred_cifar100,0.956324,0.9508,0.9508,0.935119,0.950863,0.951213,0.949054,0.952321,0.674925,0.863725,0.950518,0.787422
cifar10,cifar100,0.90591,0.90992,0.90992,0.909563,0.885812,0.887123,0.884213,0.8861,0.648434,0.818162,0.88798,0.744753
cifar10,svhn,0.941423,0.945143,0.945143,0.944335,0.92178,0.922266,0.920479,0.922596,0.661076,0.84447,0.924062,0.76313
cifar100,blurred_cifar10,0.878724,0.889896,0.889896,0.864338,0.784151,0.787492,0.769291,0.79567,0.541619,0.730607,0.807259,0.620265
cifar100,blurred_cifar100,0.747711,0.725706,0.725706,0.695672,0.73775,0.728504,0.739952,0.744794,0.568505,0.699635,0.744755,0.62885
cifar100,cifar10,0.752243,0.791126,0.791126,0.788203,0.658923,0.664299,0.650057,0.662414,0.482271,0.623093,0.671924,0.541911
cifar100,svhn,0.803006,0.849334,0.849334,0.848893,0.671667,0.679762,0.660314,0.674925,0.486484,0.625822,0.680426,0.539022


Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,0.862998,0.854926,0.854926,0.832594,0.878007,0.876301,0.879345,0.878376,0.742875,0.876733,0.876695,0.879063
cifar10,blurred_cifar100,0.956178,0.952183,0.952183,0.937482,0.956472,0.956019,0.956756,0.956642,0.788508,0.954771,0.955415,0.957182
cifar10,cifar100,0.907467,0.909927,0.909927,0.911068,0.90077,0.901993,0.899631,0.900687,0.754901,0.899628,0.900749,0.900815
cifar10,svhn,0.943443,0.945436,0.945436,0.945453,0.933077,0.933888,0.932275,0.933069,0.776912,0.93176,0.932753,0.933303
cifar100,blurred_cifar10,0.914538,0.89184,0.89184,0.867418,0.853572,0.858332,0.844763,0.85762,0.705564,0.84149,0.856035,0.860863
cifar100,blurred_cifar100,0.756761,0.726193,0.726193,0.701755,0.775527,0.767559,0.780346,0.778675,0.683394,0.767353,0.770708,0.779179
cifar100,cifar10,0.790967,0.794445,0.794445,0.792496,0.726959,0.734864,0.718211,0.727801,0.609855,0.722217,0.730338,0.72968
cifar100,svhn,0.843255,0.848508,0.848508,0.849544,0.73994,0.750673,0.728042,0.741104,0.625403,0.729552,0.742918,0.746241


Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,0.86742,0.852631,0.852631,0.825558,0.865881,0.865675,0.864623,0.867344,0.640106,0.783897,0.863032,0.717755
cifar10,blurred_cifar100,0.956361,0.950454,0.950454,0.934528,0.94946,0.950012,0.947128,0.951241,0.64653,0.840964,0.949293,0.744982
cifar10,cifar100,0.905521,0.909918,0.909918,0.909187,0.882072,0.883405,0.880358,0.882453,0.621818,0.797796,0.884787,0.705737
cifar10,svhn,0.940918,0.945069,0.945069,0.944056,0.918956,0.919361,0.91753,0.919977,0.632117,0.822647,0.92189,0.720587
cifar100,blurred_cifar10,0.869771,0.88941,0.88941,0.863569,0.766796,0.769782,0.750424,0.780182,0.500633,0.702887,0.795065,0.560116
cifar100,blurred_cifar100,0.745449,0.725584,0.725584,0.694151,0.728306,0.71874,0.729854,0.736324,0.539783,0.682705,0.738267,0.591268
cifar100,cifar10,0.742562,0.790296,0.790296,0.787129,0.641915,0.646658,0.633018,0.646067,0.450375,0.598313,0.65732,0.494969
cifar100,svhn,0.792944,0.849541,0.849541,0.848731,0.654599,0.662034,0.643382,0.658381,0.451754,0.59989,0.664803,0.487217


In [15]:
(same_agg_df - different_agg_df) / different_agg_df * 100

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,-0.509738,0.269179,0.269179,0.852162,1.400479,1.22738,1.702765,1.271907,16.054943,11.842995,1.583135,22.473856
cifar10,blurred_cifar100,-0.019136,0.181881,0.181881,0.316115,0.738526,0.63231,1.016533,0.567799,21.960057,13.532907,0.644933,28.483916
cifar10,cifar100,0.214914,0.001027,0.001027,0.206975,2.119807,2.10412,2.189179,2.066305,21.402198,12.764164,1.803994,27.641685
cifar10,svhn,0.268292,0.038803,0.038803,0.147923,1.536685,1.58017,1.607072,1.42303,22.906476,13.26357,1.178338,29.51983
cifar100,blurred_cifar10,5.147023,0.273158,0.273158,0.445772,11.31668,11.503281,12.571472,9.925638,40.934306,19.719178,7.668554,53.693719
cifar100,blurred_cifar100,1.517402,0.083983,0.083983,1.095461,6.483645,6.79228,6.918211,5.751633,26.605389,12.398986,4.39412,31.780974
cifar100,cifar10,6.518565,0.525031,0.525031,0.68181,13.248542,13.640329,13.458176,12.650998,35.410572,20.709056,11.108369,47.419366
cifar100,svhn,6.344892,-0.121529,-0.121529,0.095839,13.037129,13.388909,13.158715,12.564581,38.438683,21.614327,11.750093,53.163875


In [16]:
full_scores = pd.read_csv(
    "./tables/full_dataframe.csv",
)

In [19]:
def enhance_latex_table(input_latex):
    lines = input_latex.split("\n")
    enhanced_lines = []

    for i, line in enumerate(lines):
        if "\\begin{tabular}" in line:
            # Start centering the table
            enhanced_lines.append(r"\begin{center}")

        if "\\toprule" in line:
            # Add multicolumn headers
            enhanced_lines.append(line)
            enhanced_lines.append(
                r"\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\"
            )
            enhanced_lines.append(r"\cmidrule(lr){1-2} \cmidrule(lr){3-7}")
            continue

        # Add row coloring
        if "\\midrule" in line:
            enhanced_lines.append(line)
            enhanced_lines.append(r"\rowcolor{gray!10}")
        elif "\\bottomrule" in line:
            enhanced_lines.append(r"\end{tabular}")
            enhanced_lines.append(r"\end{center}")
        else:
            enhanced_lines.append(line)

    return "\n".join(enhanced_lines)

In [20]:
index_pairs = [
    ("CIFAR10", "Blurred CIFAR10"),
    ("CIFAR10", "Blurred CIFAR100"),
    ("CIFAR10", "CIFAR100"),
    ("CIFAR10", "SVHN"),
    ("CIFAR100", "Blurred CIFAR10"),
    ("CIFAR100", "Blurred CIFAR100"),
    ("CIFAR100", "CIFAR10"),
    ("CIFAR100", "SVHN"),
]


def get_nice_df(df_):
    df_.index = pd.MultiIndex.from_tuples(index_pairs, names=["InD", "OOD"])
    df_.columns = [
        # 'Bayes',
        # 'Excess',
        # 'Total',
        "Bayes(O)",
        "Bayes(I)",
        "Total(O)",
        "Total(I)",
        "BI",
        "RBI",
        "EPBI",
        # 'Bias',
        # 'MV',
        # 'MVBI',
        # 'BiasBI',
    ]
    # df_ = df_[['Bayes', 'Excess', 'Total', 'BI', 'RBI']]
    df_ = (100 * df_).round(2)

    display(df_)

    return df_, df_.to_latex(float_format="%.2f")

In [21]:
# measures = [c for c in same_agg_df.columns if c not in ['OOD', 'InD', 'ScoringRule']]
# measures

measures = [
    "Bayes Outer",
    "Bayes Inner",
    "Total Outer",
    "Total Inner",
    "Bregman Information",
    "Reverse Bregman Information",
    "Expected Pairwise Bregman Information",
]

# measures = ['Bayes', 'Excess', 'Total', 'Bregman Information', 'Reverse Bregman Information', 'Expected Pairwise Bregman Information']

In [22]:
nice_same = get_nice_df(same_agg_df[measures].copy())
enhanced_latex = enhance_latex_table(nice_same[1])
print(enhanced_latex)

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes(O),Bayes(I),Total(O),Total(I),BI,RBI,EPBI
InD,OOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CIFAR10,Blurred CIFAR10,83.26,85.49,86.3,85.49,87.63,87.93,87.84
CIFAR10,Blurred CIFAR100,93.75,95.22,95.62,95.22,95.6,95.68,95.66
CIFAR10,CIFAR100,91.11,90.99,90.75,90.99,90.2,89.96,90.07
CIFAR10,SVHN,94.55,94.54,94.34,94.54,93.39,93.23,93.31
CIFAR100,Blurred CIFAR10,86.74,89.18,91.45,89.18,85.83,84.48,85.76
CIFAR100,Blurred CIFAR100,70.18,72.62,75.68,72.62,76.76,78.03,77.87
CIFAR100,CIFAR10,79.25,79.44,79.1,79.44,73.49,71.82,72.78
CIFAR100,SVHN,84.95,84.85,84.33,84.85,75.07,72.8,74.11


\begin{center}
\begin{tabular}{llrrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes(O) & Bayes(I) & Total(O) & Total(I) & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 83.26 & 85.49 & 86.30 & 85.49 & 87.63 & 87.93 & 87.84 \\
 & Blurred CIFAR100 & 93.75 & 95.22 & 95.62 & 95.22 & 95.60 & 95.68 & 95.66 \\
 & CIFAR100 & 91.11 & 90.99 & 90.75 & 90.99 & 90.20 & 89.96 & 90.07 \\
 & SVHN & 94.55 & 94.54 & 94.34 & 94.54 & 93.39 & 93.23 & 93.31 \\
\cline{1-9}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 86.74 & 89.18 & 91.45 & 89.18 & 85.83 & 84.48 & 85.76 \\
 & Blurred CIFAR100 & 70.18 & 72.62 & 75.68 & 72.62 & 76.76 & 78.03 & 77.87 \\
 & CIFAR10 & 79.25 & 79.44 & 79.10 & 79.44 & 73.49 & 71.82 & 72.78 \\
 & SVHN & 84.95 & 84.85 & 84.33 & 84.85 & 75.07 & 72.80 & 74.11 \\
\cline{1-9}
\end{tabular}
\end{center}
\end{tabular}



In [23]:
nice_same[0].std()

Bayes(O)    8.124515
Bayes(I)    7.692756
Total(O)    7.170457
Total(I)    7.692756
BI          8.584582
RBI         9.116546
EPBI        8.741953
dtype: float64

In [24]:
nice_same[0].mean()

Bayes(O)    85.47375
Bayes(I)    86.54125
Total(O)    87.19625
Total(I)    86.54125
BI          84.74625
RBI         84.24125
EPBI        84.67500
dtype: float64

In [25]:
nice_different = get_nice_df(different_agg_df[measures].copy())
enhanced_latex = enhance_latex_table(nice_different[1])
print(enhanced_latex)

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes(O),Bayes(I),Total(O),Total(I),BI,RBI,EPBI
InD,OOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
CIFAR10,Blurred CIFAR10,82.56,85.26,86.74,85.26,86.57,86.46,86.73
CIFAR10,Blurred CIFAR100,93.45,95.05,95.64,95.05,95.0,94.71,95.12
CIFAR10,CIFAR100,90.92,90.99,90.55,90.99,88.34,88.04,88.25
CIFAR10,SVHN,94.41,94.51,94.09,94.51,91.94,91.75,92.0
CIFAR100,Blurred CIFAR10,86.36,88.94,86.98,88.94,76.98,75.04,78.02
CIFAR100,Blurred CIFAR100,69.42,72.56,74.54,72.56,71.87,72.99,73.63
CIFAR100,CIFAR10,78.71,79.03,74.26,79.03,64.67,63.3,64.61
CIFAR100,SVHN,84.87,84.95,79.29,84.95,66.2,64.34,65.84


\begin{center}
\begin{tabular}{llrrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes(O) & Bayes(I) & Total(O) & Total(I) & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 82.56 & 85.26 & 86.74 & 85.26 & 86.57 & 86.46 & 86.73 \\
 & Blurred CIFAR100 & 93.45 & 95.05 & 95.64 & 95.05 & 95.00 & 94.71 & 95.12 \\
 & CIFAR100 & 90.92 & 90.99 & 90.55 & 90.99 & 88.34 & 88.04 & 88.25 \\
 & SVHN & 94.41 & 94.51 & 94.09 & 94.51 & 91.94 & 91.75 & 92.00 \\
\cline{1-9}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 86.36 & 88.94 & 86.98 & 88.94 & 76.98 & 75.04 & 78.02 \\
 & Blurred CIFAR100 & 69.42 & 72.56 & 74.54 & 72.56 & 71.87 & 72.99 & 73.63 \\
 & CIFAR10 & 78.71 & 79.03 & 74.26 & 79.03 & 64.67 & 63.30 & 64.61 \\
 & SVHN & 84.87 & 84.95 & 79.29 & 84.95 & 66.20 & 64.34 & 65.84 \\
\cline{1-9}
\end{tabular}
\end{center}
\end{tabular}



In [26]:
nice_different[0].mean()

Bayes(O)    85.08750
Bayes(I)    86.41125
Total(O)    85.26125
Total(I)    86.41125
BI          80.19625
RBI         79.57875
EPBI        80.52500
dtype: float64

In [27]:
nice_different[0].std()

Bayes(O)     8.324288
Bayes(I)     7.721749
Total(O)     8.370061
Total(I)     7.721749
BI          11.836036
RBI         12.289760
EPBI        11.745028
dtype: float64

In [28]:
print(
    enhance_latex_table(
        pd.concat([nice_same[0], nice_different[0]], axis=1).to_latex(
            float_format="%.2f"
        )
    )
)

\begin{center}
\begin{tabular}{llrrrrrrrrrrrrrr}
\toprule
\multicolumn{2}{c}{Dataset} & \multicolumn{5}{c}{Metrics} \\
\cmidrule(lr){1-2} \cmidrule(lr){3-7}
 &  & Bayes(O) & Bayes(I) & Total(O) & Total(I) & BI & RBI & EPBI & Bayes(O) & Bayes(I) & Total(O) & Total(I) & BI & RBI & EPBI \\
InD & OOD &  &  &  &  &  &  &  &  &  &  &  &  &  &  \\
\midrule
\rowcolor{gray!10}
\multirow[t]{4}{*}{CIFAR10} & Blurred CIFAR10 & 83.26 & 85.49 & 86.30 & 85.49 & 87.63 & 87.93 & 87.84 & 82.56 & 85.26 & 86.74 & 85.26 & 86.57 & 86.46 & 86.73 \\
 & Blurred CIFAR100 & 93.75 & 95.22 & 95.62 & 95.22 & 95.60 & 95.68 & 95.66 & 93.45 & 95.05 & 95.64 & 95.05 & 95.00 & 94.71 & 95.12 \\
 & CIFAR100 & 91.11 & 90.99 & 90.75 & 90.99 & 90.20 & 89.96 & 90.07 & 90.92 & 90.99 & 90.55 & 90.99 & 88.34 & 88.04 & 88.25 \\
 & SVHN & 94.55 & 94.54 & 94.34 & 94.54 & 93.39 & 93.23 & 93.31 & 94.41 & 94.51 & 94.09 & 94.51 & 91.94 & 91.75 & 92.00 \\
\cline{1-16}
\multirow[t]{4}{*}{CIFAR100} & Blurred CIFAR10 & 86.74 & 89.18 & 91.45

In [29]:
(same_agg_df - all_agg_df) > 0

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Outer,Total Inner,Bayes Inner,Bayes Outer,Excess,Bregman Information,Reverse Bregman Information,Expected Pairwise Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
cifar10,blurred_cifar10,False,True,True,True,True,True,True,True,True,True,True,True
cifar10,blurred_cifar100,False,True,True,True,True,True,True,True,True,True,True,True
cifar10,cifar100,True,True,True,True,True,True,True,True,True,True,True,True
cifar10,svhn,True,True,True,True,True,True,True,True,True,True,True,True
cifar100,blurred_cifar10,True,True,True,True,True,True,True,True,True,True,True,True
cifar100,blurred_cifar100,True,True,True,True,True,True,True,True,True,True,True,True
cifar100,cifar10,True,True,True,True,True,True,True,True,True,True,True,True
cifar100,svhn,True,False,False,True,True,True,True,True,True,True,True,True
