In [None]:
import sys
import re
sys.path.insert(0, 'src/')
sys.path.insert(1, 'external_repos/pytorch_cifar100/')
sys.path.insert(1, 'external_repos/pytorch_cifar10/')
import numpy as np
from tqdm.auto import tqdm
from src.data_utils import load_model_checkpoint
from src.postprocessing_utils import (
    get_metrics_results,
    uq_funcs_with_names,
    get_uncertainty_scores,
    get_predicted_labels,
    make_aggregation,
    get_missclassification_dataframe,
    get_ood_detection_dataframe,
    get_raw_scores_dataframe,
    ravel_df,
)
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from IPython.display import display
pd.set_option('display.max_rows', None)

In [2]:
base_score_dict = {
    "cross_entropy": "Logscore",
    "brier_score": "Brier",
    "spherical_score": "Spherical",
}

In [3]:
training_dataset_names = [
    'cifar10',
    'cifar100',
    'noisy_cifar100',
    'missed_class_cifar10',
    'noisy_cifar10',
]
temperature = 1.
model_ids = np.arange(20)
list_extraction_datasets=['cifar10', 'cifar100', 'svhn', 'blurred_cifar100', 'blurred_cifar10']
list_ood_datasets = [el for el in list_extraction_datasets]
loss_function_names = [
    'brier_score',
    'cross_entropy',
    'spherical_score'
]
use_cheating_approximation = False
gt_prob_approx = 'same'

full_dataframe = None
full_ood_rocauc_dataframe = None
full_mis_rocauc_dataframe = None

for training_dataset_name in training_dataset_names:
    if training_dataset_name not in ['missed_class_cifar10', 'noisy_cifar10', 'noisy_cifar100']:
        architectures = ['resnet18', 'vgg']
        training_dataset_name_aux = training_dataset_name
    else:
        architectures = ['resnet18']
        training_dataset_name_aux = training_dataset_name.split('_')[-1]
    for architecture in architectures:
        # try:
        uq_results, embeddings_per_dataset, targets_per_dataset = get_uncertainty_scores(
            loss_function_names=loss_function_names,
            training_dataset_name=training_dataset_name,
            architecture=architecture,
            model_ids=model_ids,
            list_extraction_datasets=list_extraction_datasets,
            temperature=temperature,
            use_cheating_approximation=use_cheating_approximation,
            use_cached=False
        )

        df_ood = get_ood_detection_dataframe(
            ind_dataset=training_dataset_name_aux,
            uq_results=uq_results,
            list_ood_datasets=list_ood_datasets,
        )
        df_ood['architecture'] = architecture
        df_ood['training_dataset'] = training_dataset_name

        
        max_ind = int(targets_per_dataset[training_dataset_name_aux].shape[0] / len(model_ids))
        true_labels = targets_per_dataset[training_dataset_name_aux][:max_ind]
        
        pred_labels = get_predicted_labels(
            embeddings_per_dataset=embeddings_per_dataset,
            training_dataset_name=training_dataset_name_aux,
        )


        df_misclassification = get_missclassification_dataframe(
            ind_dataset=training_dataset_name_aux,
            uq_results=uq_results,
            true_labels=true_labels,
            pred_labels=pred_labels,
        )
        df_misclassification['architecture'] = architecture
        df_misclassification['training_dataset'] = training_dataset_name

        # except Exception as ex:
        #     print(training_dataset_name, ex)
        #     continue
        
        scores_df_unravel = get_raw_scores_dataframe(uq_results=uq_results)
        scores_df_unravel['architecture'] = architecture
        scores_df_unravel['training_dataset'] = training_dataset_name

        if full_dataframe is None:
            full_dataframe = scores_df_unravel
            full_ood_rocauc_dataframe = df_ood
            full_mis_rocauc_dataframe = df_misclassification
        else:
            full_dataframe = pd.concat([full_dataframe, scores_df_unravel])
            full_ood_rocauc_dataframe = pd.concat([full_ood_rocauc_dataframe, df_ood])
            full_mis_rocauc_dataframe = pd.concat([full_mis_rocauc_dataframe, df_misclassification])

100%|██████████████████████████████████████████████████████████████████████████████████████| 75/75 [01:45<00:00,  1.41s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████| 75/75 [01:46<00:00,  1.42s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████| 75/75 [09:55<00:00,  7.93s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████| 75/75 [09:47<00:00,  7.84s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████| 75/75 [09:48<00:00,  7.84s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████| 75/75 [01:45<00:00,  1.41s/it]
100%|██████████████████████████████████████████████████████████████████████████████████████| 75/75 [01:45<00:00,  1.41s/it]


In [4]:
pattern_baserule = r'(Logscore|Brier|Neglog|Maxprob|Spherical)'
pattern_risk = r'(Total|Bayes|Excess|Reverse Bregman Information|Bregman Information|Expected Pairwise Bregman Information|MVBI|MV|BiasBI|Bias)'

full_ood_rocauc_dataframe['base_rule'] = full_ood_rocauc_dataframe['UQMetric'].str.extract(pattern_baserule)
full_ood_rocauc_dataframe['RiskType'] = full_ood_rocauc_dataframe['UQMetric'].str.extract(pattern_risk)
full_ood_rocauc_dataframe['LossFunction'] = full_ood_rocauc_dataframe['LossFunction'].replace(base_score_dict)

full_mis_rocauc_dataframe['base_rule'] = full_mis_rocauc_dataframe['UQMetric'].str.extract(pattern_baserule)
full_mis_rocauc_dataframe['RiskType'] = full_mis_rocauc_dataframe['UQMetric'].str.extract(pattern_risk)
full_mis_rocauc_dataframe['LossFunction'] = full_mis_rocauc_dataframe['LossFunction'].replace(base_score_dict)

full_dataframe['base_rule'] = full_dataframe['UQMetric'].str.extract(pattern_baserule)
full_dataframe['RiskType'] = full_dataframe['UQMetric'].str.extract(pattern_risk)
full_dataframe['LossFunction'] = full_dataframe['LossFunction'].replace(base_score_dict)

In [5]:
full_dataframe.to_csv('./tables/full_dataframe.csv')
full_ood_rocauc_dataframe.to_csv('./tables/full_ood_rocauc.csv')
full_mis_rocauc_dataframe.to_csv('./tables/full_mis_rocauc.csv')

In [30]:
full_dataframe.shape

(6825, 8)

In [7]:
full_dataframe.sample(10)

Unnamed: 0,UQMetric,LossFunction,Dataset,Scores,architecture,training_dataset,base_rule,RiskType
331,Excess Logscore Inner Outer,Brier,cifar100,"[0.15261963, 0.31864625, 0.26091367, 0.3227746...",resnet18,cifar100,Logscore,Excess
812,MV Maxprob,Brier,svhn,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",vgg,cifar10,Maxprob,MV
253,Bayes Neglog Inner,Spherical,blurred_cifar100,"[-32.27174, -26.688385, -28.664572, -25.667175...",resnet18,noisy_cifar10,Neglog,Bayes
463,Excess Brier Outer Inner,Spherical,blurred_cifar100,"[0.14342609, 0.15466687, 0.15530184, 0.2733425...",vgg,cifar100,Brier,Excess
913,BiasBI Spherical,Spherical,blurred_cifar100,"[0.0233406, 0.02150424, 0.028492952, 0.0523934...",resnet18,noisy_cifar100,Spherical,BiasBI
334,Excess Logscore Inner Outer,Brier,blurred_cifar10,"[0.13772236, 0.53354174, 1.3595185, 0.15080823...",vgg,cifar10,Logscore,Excess
89,Total Brier Inner,Spherical,blurred_cifar10,"[0.97879845, 0.98287225, 0.80758, 0.9847312, 0...",resnet18,cifar100,Brier,Total
943,MV Neglog,Spherical,blurred_cifar100,"[8.880266, 4.4897137, 3.7953174, 8.126781, 4.5...",resnet18,cifar10,Neglog,MV
642,Reverse Bregman Information Maxprob,Spherical,svhn,"[0.030070066, 0.034939885, 0.043468416, 0.0112...",resnet18,noisy_cifar100,Maxprob,Reverse Bregman Information
450,Excess Brier Outer Inner,Brier,cifar10,"[0.0005352667, 0.0022103514, 0.044019453, 0.02...",resnet18,noisy_cifar10,Brier,Excess


In [8]:
full_ood_rocauc_dataframe.sample(10)

Unnamed: 0,UQMetric,Dataset,LossFunction,RocAucScore,architecture,training_dataset,base_rule,RiskType
772,MVBI Brier,svhn,Logscore,0.953036,resnet18,cifar10,Brier,MVBI
61,Total Spherical Outer,cifar10,Logscore,0.5,resnet18,cifar10,Spherical,Total
843,BiasBI Maxprob,cifar100,Brier,0.5,vgg,cifar100,Maxprob,BiasBI
798,Bias Maxprob,cifar100,Brier,0.178472,vgg,cifar10,Maxprob,Bias
697,MV Logscore,svhn,Logscore,0.797149,resnet18,noisy_cifar100,Logscore,MV
908,BiasBI Spherical,svhn,Spherical,0.904193,vgg,cifar10,Spherical,BiasBI
207,Bayes Logscore Outer,blurred_cifar10,Brier,0.846005,vgg,cifar100,Logscore,Bayes
630,Reverse Bregman Information Maxprob,cifar10,Brier,0.692058,resnet18,cifar100,Maxprob,Reverse Bregman Information
504,Excess Neglog Outer Inner,blurred_cifar100,Brier,0.966675,resnet18,cifar10,Neglog,Excess
317,Excess Brier Outer Outer,cifar10,Spherical,0.394145,resnet18,cifar100,Brier,Excess


In [9]:
full_mis_rocauc_dataframe.sample(10)

Unnamed: 0,UQMetric,LossFunction,RocAucScore,architecture,training_dataset,base_rule,RiskType
70,Excess Logscore Outer Outer,Logscore,0.717919,resnet18,noisy_cifar10,Logscore,Excess
141,MVBI Logscore,Brier,0.807318,resnet18,missed_class_cifar10,Logscore,MVBI
33,Bayes Brier Outer,Brier,0.810598,resnet18,noisy_cifar10,Brier,Bayes
11,Total Maxprob Outer,Spherical,0.950845,resnet18,cifar10,Maxprob,Total
14,Total Spherical Outer,Spherical,0.803304,resnet18,noisy_cifar10,Spherical,Total
82,Excess Neglog Outer Outer,Logscore,0.208363,resnet18,cifar100,Neglog,Excess
109,Bregman Information Logscore,Logscore,0.868259,vgg,cifar100,Logscore,Bregman Information
131,Reverse Bregman Information Neglog,Spherical,0.202594,resnet18,cifar100,Neglog,Reverse Bregman Information
108,Bregman Information Logscore,Brier,0.746873,resnet18,noisy_cifar100,Logscore,Bregman Information
85,Excess Spherical Inner Outer,Logscore,0.950672,resnet18,cifar10,Spherical,Excess


In [10]:
full_ood_rocauc_dataframe['training_dataset'].unique()

array(['cifar10', 'cifar100', 'noisy_cifar100', 'missed_class_cifar10',
       'noisy_cifar10'], dtype=object)

In [11]:
full_ood_rocauc_dataframe['Dataset'].unique()

array(['cifar10', 'cifar100', 'svhn', 'blurred_cifar100',
       'blurred_cifar10'], dtype=object)

In [12]:
full_ood_rocauc_dataframe['UQMetric'].unique()

array(['Total Brier Outer', 'Total Logscore Outer', 'Total Neglog Outer',
       'Total Maxprob Outer', 'Total Spherical Outer',
       'Total Brier Inner', 'Total Logscore Inner', 'Total Neglog Inner',
       'Total Maxprob Inner', 'Total Spherical Inner',
       'Bayes Brier Inner', 'Bayes Brier Outer', 'Bayes Logscore Inner',
       'Bayes Logscore Outer', 'Bayes Maxprob Inner',
       'Bayes Maxprob Outer', 'Bayes Neglog Inner', 'Bayes Neglog Outer',
       'Bayes Spherical Inner', 'Bayes Spherical Outer',
       'Excess Brier Inner Outer', 'Excess Brier Outer Outer',
       'Excess Logscore Inner Outer', 'Excess Logscore Outer Outer',
       'Excess Maxprob Inner Outer', 'Excess Maxprob Outer Outer',
       'Excess Neglog Inner Outer', 'Excess Neglog Outer Outer',
       'Excess Spherical Inner Outer', 'Excess Spherical Outer Outer',
       'Excess Brier Outer Inner', 'Excess Logscore Outer Inner',
       'Excess Maxprob Outer Inner', 'Excess Neglog Outer Inner',
       'Excess Sp

In [13]:
df = full_ood_rocauc_dataframe.copy()
df_logscore_logscore = df[(df['base_rule'] == 'Logscore') & (df['LossFunction'] == 'Logscore')]
df_brier_brier = df[(df['base_rule'] == 'Brier') & (df['LossFunction'] == 'Brier')]
df_spherical_spherical = df[(df['base_rule'] == 'Spherical') & (df['LossFunction'] == 'Spherical')]

df_logscore_not_logscore = df[(df['base_rule'] != 'Logscore') & (df['LossFunction'] == 'Logscore')]
df_brier_not_brier = df[(df['base_rule'] != 'Brier') & (df['LossFunction'] == 'Brier')]
df_spherical_not_spherical = df[(df['base_rule'] != 'Spherical') & (df['LossFunction'] == 'Spherical')]

In [14]:
ood_detection_pairs_ = [
    ("cifar10", "cifar100"),
    ("cifar10", "svhn"),
    ("cifar10", "blurred_cifar10"),
    ("cifar10", "blurred_cifar100"),
    
    ("cifar100", "cifar10"),
    ("cifar100", "svhn"),
    ("cifar100", "blurred_cifar100"),
    ("cifar100", "blurred_cifar10"),
]

In [15]:
def collect_scores_into_dict(
    dataframes_list,
    ood_detection_pairs,
):
    scores_dict_ = {
        "InD": [],
        "OOD": [],
        "ScoringRule": [],
        "Bayes": [],
        "Excess": [],
        "Total": [],
        "Bregman Information": [],
        "Reverse Bregman Information": [],
        "Bias": [],
        "MV": [],
        "MVBI": [],
        "BiasBI": [],
    }
    std_dict_ = {
        "InD": [],
        "OOD": [],
        "ScoringRule": [],
        "Bayes": [],
        "Excess": [],
        "Total": [],
        "Bregman Information": [],
        "Reverse Bregman Information": [],
        "Bias": [],
        "MV": [],
        "MVBI": [],
        "BiasBI": [],
    }
    
    for dataframe_ in dataframes_list:
        for ind, ood in ood_detection_pairs:
            df_aux_ = dataframe_[(dataframe_['training_dataset'] == ind) & (dataframe_['Dataset'] == ood)]

            mean_rocauc_dict = dict(
                df_aux_.groupby(
                    by=['RiskType']
                ).agg({'RocAucScore': ['mean']}
                )[('RocAucScore', 'mean')].reset_index().values
            )
            std_rocauc_dict = dict(
                df_aux_.groupby(
                    by=['RiskType']
                ).agg({'RocAucScore': ['std']}
                )[('RocAucScore', 'std')].reset_index().values
            )
            
            next_iter = True
            for k in mean_rocauc_dict:
                if k in scores_dict_:
                    scores_dict_[k].append(mean_rocauc_dict[k])
                    std_dict_[k].append(std_rocauc_dict[k])
                    next_iter = False
            if next_iter:
                continue
            
            scores_dict_['InD'].append(ind)
            scores_dict_['OOD'].append(ood)
            scores_dict_['ScoringRule'].append(df_aux_['LossFunction'].unique())

            std_dict_['InD'].append(ind)
            std_dict_['OOD'].append(ood)
            std_dict_['ScoringRule'].append(df_aux_['LossFunction'].unique())
    return scores_dict_, std_dict_

In [16]:
dataframes_list_ = [
        df_logscore_logscore,
        df_brier_brier,
        df_spherical_spherical,
    ]

same_loss_and_rule_auc_ood_scores, same_loss_and_rule_auc_ood_stds = collect_scores_into_dict(
    dataframes_list_,
    ood_detection_pairs_,
)

In [17]:
[(k, len(v)) for k, v in same_loss_and_rule_auc_ood_scores.items()]

[('InD', 24),
 ('OOD', 24),
 ('ScoringRule', 24),
 ('Bayes', 24),
 ('Excess', 24),
 ('Total', 24),
 ('Bregman Information', 24),
 ('Reverse Bregman Information', 24),
 ('Bias', 24),
 ('MV', 24),
 ('MVBI', 24),
 ('BiasBI', 24)]

In [18]:
df_same_loss_and_rule_auc_ood_scores = pd.DataFrame.from_dict(same_loss_and_rule_auc_ood_scores)
df_same_loss_and_rule_auc_ood_stds = pd.DataFrame.from_dict(same_loss_and_rule_auc_ood_stds)
display(df_same_loss_and_rule_auc_ood_scores)
# display(df_same_loss_and_rule_auc_ood_stds)

Unnamed: 0,InD,OOD,ScoringRule,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
0,cifar10,cifar100,[Logscore],0.914079,0.906951,0.911875,0.909895,0.904494,0.873392,0.905585,0.907065,0.906188
1,cifar10,svhn,[Logscore],0.946279,0.941402,0.947366,0.943206,0.939799,0.907629,0.939128,0.940624,0.941274
2,cifar10,blurred_cifar10,[Logscore],0.853335,0.884232,0.870077,0.880864,0.886334,0.877347,0.879818,0.881034,0.888893
3,cifar10,blurred_cifar100,[Logscore],0.949466,0.963244,0.960294,0.962704,0.963299,0.929539,0.95896,0.960683,0.965982
4,cifar100,cifar10,[Logscore],0.798088,0.777089,0.797498,0.78453,0.770263,0.650342,0.775659,0.779585,0.771545
5,cifar100,svhn,[Logscore],0.863542,0.814139,0.86292,0.824921,0.80377,0.671158,0.807161,0.814989,0.814691
6,cifar100,blurred_cifar100,[Logscore],0.737009,0.806826,0.768277,0.789543,0.818764,0.788267,0.799479,0.798489,0.816391
7,cifar100,blurred_cifar10,[Logscore],0.893088,0.937911,0.920143,0.932675,0.94005,0.831969,0.932128,0.93533,0.940021
8,cifar10,cifar100,[Brier],0.909967,0.898636,0.907978,0.898636,0.898636,0.5,0.898636,0.90383,0.898636
9,cifar10,svhn,[Brier],0.946496,0.932397,0.944622,0.932397,0.932398,0.5,0.932397,0.937498,0.932397


In [19]:
agg_func_ = 'mean'

In [20]:
same_ = df_same_loss_and_rule_auc_ood_scores.groupby(by=['InD', 'OOD']).agg(
    {
        "Bayes": [agg_func_],
        "Excess": [agg_func_],
        "Total": [agg_func_],
        "Bregman Information": [agg_func_],
        "Reverse Bregman Information": [agg_func_],
        "Bias": [agg_func_],
        "MV": [agg_func_],
        "MVBI": [agg_func_],
        "BiasBI": [agg_func_],
    }
)

same_std_ = df_same_loss_and_rule_auc_ood_stds.groupby(by=['InD', 'OOD']).agg(
    {
        "Bayes": [agg_func_],
        "Excess": [agg_func_],
        "Total": [agg_func_],
        "Bregman Information": [agg_func_],
        "Reverse Bregman Information": [agg_func_],
        "Bias": [agg_func_],
        "MV": [agg_func_],
        "MVBI": [agg_func_],
        "BiasBI": [agg_func_],
    }
)

In [21]:
dataframes_list_ = [
        df_logscore_not_logscore,
        df_brier_not_brier,
        df_spherical_not_spherical,
    ]

different_loss_and_rule_auc_ood_scores, different_loss_and_rule_auc_ood_stds = collect_scores_into_dict(
    dataframes_list_,
    ood_detection_pairs_,
)

In [22]:
df_different_loss_and_rule_auc_ood_scores = pd.DataFrame.from_dict(different_loss_and_rule_auc_ood_scores)
df_different_loss_and_rule_auc_ood_stds = pd.DataFrame.from_dict(different_loss_and_rule_auc_ood_stds)
display(df_different_loss_and_rule_auc_ood_scores)
display(df_different_loss_and_rule_auc_ood_stds)

Unnamed: 0,InD,OOD,ScoringRule,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
0,cifar10,cifar100,[Logscore],0.911669,0.880713,0.907463,0.880809,0.879046,0.577084,0.800445,0.887039,0.72111
1,cifar10,svhn,[Logscore],0.943459,0.922179,0.942361,0.921385,0.921473,0.588438,0.826014,0.92753,0.738937
2,cifar10,blurred_cifar10,[Logscore],0.847425,0.865071,0.867846,0.864098,0.86276,0.593717,0.782951,0.862787,0.711494
3,cifar10,blurred_cifar100,[Logscore],0.94524,0.948981,0.955106,0.948205,0.947617,0.5938,0.842119,0.95128,0.754968
4,cifar100,cifar10,[Logscore],0.793787,0.674897,0.75742,0.679104,0.666191,0.449614,0.627539,0.69479,0.624747
5,cifar100,svhn,[Logscore],0.857668,0.683172,0.811584,0.689508,0.672893,0.438739,0.619545,0.699754,0.616514
6,cifar100,blurred_cifar100,[Logscore],0.726072,0.740933,0.745895,0.724778,0.743342,0.511609,0.69447,0.75711,0.64479
7,cifar100,blurred_cifar10,[Logscore],0.884895,0.79831,0.875106,0.793925,0.783013,0.452923,0.742174,0.853014,0.69807
8,cifar10,cifar100,[Brier],0.910021,0.884589,0.908833,0.885405,0.882483,0.689025,0.798137,0.88598,0.720628
9,cifar10,svhn,[Brier],0.947106,0.922424,0.945194,0.922898,0.920889,0.704567,0.824763,0.925284,0.73732


Unnamed: 0,InD,OOD,ScoringRule,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
0,cifar10,cifar100,[Logscore],0.00931,0.028026,0.011649,0.029784,0.029842,0.290801,0.185647,0.030756,0.338323
1,cifar10,svhn,[Logscore],0.023556,0.020672,0.018957,0.022499,0.022293,0.323475,0.201542,0.020016,0.366966
2,cifar10,blurred_cifar10,[Logscore],0.048905,0.050919,0.043401,0.052279,0.055085,0.282564,0.17792,0.050393,0.302889
3,cifar10,blurred_cifar100,[Logscore],0.019947,0.015424,0.013107,0.014417,0.019405,0.327348,0.211381,0.017286,0.371647
4,cifar100,cifar10,[Logscore],0.01354,0.172815,0.118419,0.187464,0.174005,0.227628,0.14954,0.160627,0.253636
5,cifar100,svhn,[Logscore],0.031599,0.211041,0.139608,0.227997,0.213334,0.27144,0.179948,0.205181,0.288262
6,cifar100,blurred_cifar100,[Logscore],0.033963,0.057734,0.030398,0.077674,0.050508,0.195899,0.121357,0.036948,0.225463
7,cifar100,blurred_cifar10,[Logscore],0.020402,0.18313,0.103956,0.212391,0.174228,0.307265,0.164065,0.106357,0.347617
8,cifar10,cifar100,[Brier],0.007595,0.024006,0.008039,0.02596,0.026271,0.319691,0.184296,0.026103,0.337247
9,cifar10,svhn,[Brier],0.025825,0.02927,0.026862,0.03116,0.031483,0.35105,0.201413,0.029183,0.36789


In [23]:
different_ = df_different_loss_and_rule_auc_ood_scores.groupby(by=['InD', 'OOD']).agg(
    {
        "Bayes": [agg_func_],
        "Excess": [agg_func_],
        "Total": [agg_func_],
        "Bregman Information": [agg_func_],
        "Reverse Bregman Information": [agg_func_],
        "Bias": [agg_func_],
        "MV": [agg_func_],
        "MVBI": [agg_func_],
        "BiasBI": [agg_func_],
    }
)

different_std_ = df_different_loss_and_rule_auc_ood_stds.groupby(by=['InD', 'OOD']).agg(
    {
        "Bayes": [agg_func_],
        "Excess": [agg_func_],
        "Total": [agg_func_],
        "Bregman Information": [agg_func_],
        "Reverse Bregman Information": [agg_func_],
        "Bias": [agg_func_],
        "MV": [agg_func_],
        "MVBI": [agg_func_],
        "BiasBI": [agg_func_],
    }
)

In [24]:
dataframes_list_ = [
       full_ood_rocauc_dataframe,
    ]

all_loss_and_rule_auc_ood_scores, all_loss_and_rule_auc_ood_stds = collect_scores_into_dict(
    dataframes_list_,
    ood_detection_pairs_,
)

all_loss_and_rule_auc_ood_scores = pd.DataFrame.from_dict(all_loss_and_rule_auc_ood_scores)
all_loss_and_rule_auc_ood_stds = pd.DataFrame.from_dict(all_loss_and_rule_auc_ood_stds)
display(all_loss_and_rule_auc_ood_scores)
# display(all_loss_and_rule_auc_ood_stds)

Unnamed: 0,InD,OOD,ScoringRule,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
0,cifar10,cifar100,"[Brier, Logscore, Spherical]",0.909741,0.885812,0.907915,0.886837,0.884004,0.648434,0.818162,0.888744,0.755701
1,cifar10,svhn,"[Brier, Logscore, Spherical]",0.944739,0.92178,0.943283,0.922217,0.920411,0.661076,0.84447,0.925017,0.775065
2,cifar10,blurred_cifar10,"[Brier, Logscore, Spherical]",0.840028,0.868306,0.859813,0.86785,0.867112,0.66066,0.802464,0.866577,0.748029
3,cifar10,blurred_cifar100,"[Brier, Logscore, Spherical]",0.94296,0.950863,0.953562,0.951221,0.948995,0.674925,0.863725,0.951724,0.796075
4,cifar100,cifar10,"[Brier, Logscore, Spherical]",0.789664,0.658923,0.771684,0.664183,0.650016,0.482271,0.623093,0.683333,0.621224
5,cifar100,svhn,"[Brier, Logscore, Spherical]",0.849114,0.671667,0.82617,0.679712,0.660304,0.486484,0.625822,0.701315,0.623151
6,cifar100,blurred_cifar100,"[Brier, Logscore, Spherical]",0.710689,0.73775,0.736708,0.727106,0.739945,0.568505,0.699635,0.752579,0.674617
7,cifar100,blurred_cifar10,"[Brier, Logscore, Spherical]",0.877117,0.784151,0.88431,0.786013,0.769282,0.541619,0.730607,0.831519,0.713139


In [25]:
all_ = all_loss_and_rule_auc_ood_scores.groupby(by=['InD', 'OOD']).agg(
    {
        "Bayes": [agg_func_],
        "Excess": [agg_func_],
        "Total": [agg_func_],
        "Bregman Information": [agg_func_],
        "Reverse Bregman Information": [agg_func_],
        "Bias": [agg_func_],
        "MV": [agg_func_],
        "MVBI": [agg_func_],
        "BiasBI": [agg_func_],
    }
)

In [26]:
display(all_)
display(same_)
# display(same_std_)
display(different_)

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
cifar10,blurred_cifar10,0.840028,0.868306,0.859813,0.86785,0.867112,0.66066,0.802464,0.866577,0.748029
cifar10,blurred_cifar100,0.94296,0.950863,0.953562,0.951221,0.948995,0.674925,0.863725,0.951724,0.796075
cifar10,cifar100,0.909741,0.885812,0.907915,0.886837,0.884004,0.648434,0.818162,0.888744,0.755701
cifar10,svhn,0.944739,0.92178,0.943283,0.922217,0.920411,0.661076,0.84447,0.925017,0.775065
cifar100,blurred_cifar10,0.877117,0.784151,0.88431,0.786013,0.769282,0.541619,0.730607,0.831519,0.713139
cifar100,blurred_cifar100,0.710689,0.73775,0.736708,0.727106,0.739945,0.568505,0.699635,0.752579,0.674617
cifar100,cifar10,0.789664,0.658923,0.771684,0.664183,0.650016,0.482271,0.623093,0.683333,0.621224
cifar100,svhn,0.849114,0.671667,0.82617,0.679712,0.660304,0.486484,0.625822,0.701315,0.623151


Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
cifar10,blurred_cifar10,0.84376,0.878007,0.858962,0.8763,0.879336,0.742875,0.876733,0.877689,0.879062
cifar10,blurred_cifar100,0.944833,0.956472,0.954181,0.956019,0.956755,0.788508,0.954771,0.957421,0.957182
cifar10,cifar100,0.910498,0.90077,0.908697,0.901993,0.89963,0.754901,0.899628,0.90248,0.900815
cifar10,svhn,0.945444,0.933077,0.944439,0.933888,0.932275,0.776912,0.93176,0.934453,0.933303
cifar100,blurred_cifar10,0.879629,0.853572,0.903189,0.858332,0.844763,0.705564,0.84149,0.897856,0.860863
cifar100,blurred_cifar100,0.713974,0.775527,0.741477,0.767559,0.780346,0.683394,0.767353,0.783094,0.779179
cifar100,cifar10,0.793471,0.726959,0.792706,0.734864,0.718211,0.609855,0.722217,0.749536,0.72968
cifar100,svhn,0.849026,0.73994,0.845882,0.750673,0.728042,0.625403,0.729552,0.779153,0.746241


Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
cifar10,blurred_cifar10,0.839095,0.865881,0.860026,0.865738,0.864056,0.640106,0.783897,0.863799,0.715271
cifar10,blurred_cifar100,0.942491,0.94946,0.953408,0.950022,0.947056,0.64653,0.840964,0.9503,0.755798
cifar10,cifar100,0.909552,0.882072,0.90772,0.883048,0.880097,0.621818,0.797796,0.88531,0.719423
cifar10,svhn,0.944563,0.918956,0.942994,0.9193,0.917445,0.632117,0.822647,0.922658,0.735505
cifar100,blurred_cifar10,0.876489,0.766796,0.87959,0.767933,0.750412,0.500633,0.702887,0.814934,0.676208
cifar100,blurred_cifar100,0.709868,0.728306,0.735516,0.716993,0.729845,0.539783,0.682705,0.744951,0.648477
cifar100,cifar10,0.788713,0.641915,0.766429,0.646513,0.632968,0.450375,0.598313,0.666782,0.59411
cifar100,svhn,0.849136,0.654599,0.821242,0.661972,0.64337,0.451754,0.59989,0.681856,0.592378


In [30]:
((same_ - different_) / different_) * 100

Unnamed: 0_level_0,Unnamed: 1_level_0,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,OOD,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
cifar10,blurred_cifar10,0.555968,1.400479,-0.123627,1.220094,1.768375,16.054943,11.842995,1.608018,22.899135
cifar10,blurred_cifar100,0.248431,0.738526,0.081061,0.631248,1.024183,21.960057,13.532907,0.749366,26.64523
cifar10,cifar100,0.10396,2.119807,0.107712,2.145375,2.219375,21.402198,12.764164,1.939416,25.213547
cifar10,svhn,0.093334,1.536685,0.153295,1.586851,1.616415,22.906476,13.26357,1.278345,26.892702
cifar100,blurred_cifar10,0.358193,11.31668,2.682884,11.771812,12.573196,40.934306,19.719178,10.175233,27.307374
cifar100,blurred_cifar100,0.578526,6.483645,0.810371,7.052482,6.919423,26.605389,12.398986,5.120277,20.155268
cifar100,cifar10,0.603263,13.248542,3.428479,13.665869,13.467205,35.410572,20.709056,12.41086,22.818988
cifar100,svhn,-0.012897,13.037129,3.000271,13.39947,13.160795,38.438683,21.614327,14.269409,25.973753


In [33]:
dataframes_list_ = [
        df_spherical_spherical,
    ]

only_spherical_ood_scores, _ = collect_scores_into_dict(
    dataframes_list_,
    ood_detection_pairs_,
)

dataframes_list_ = [
        df_logscore_logscore,
    ]

only_logscore_ood_scores, _ = collect_scores_into_dict(
    dataframes_list_,
    ood_detection_pairs_,
)

dataframes_list_ = [
        df_brier_brier,
    ]

only_brier_ood_scores, _ = collect_scores_into_dict(
    dataframes_list_,
    ood_detection_pairs_,
)


dataframes_list_ = [
        df_spherical_not_spherical,
    ]

only_NOT_spherical_ood_scores, _ = collect_scores_into_dict(
    dataframes_list_,
    ood_detection_pairs_,
)

dataframes_list_ = [
        df_logscore_not_logscore,
    ]

only_NOT_logscore_ood_scores, _ = collect_scores_into_dict(
    dataframes_list_,
    ood_detection_pairs_,
)

dataframes_list_ = [
        df_brier_not_brier,
    ]

only_NOT_brier_ood_scores, _ = collect_scores_into_dict(
    dataframes_list_,
    ood_detection_pairs_,
)

In [36]:
display(pd.DataFrame.from_dict(only_spherical_ood_scores))
# display(pd.DataFrame.from_dict(only_logscore_ood_scores))
# display(pd.DataFrame.from_dict(only_brier_ood_scores))

Unnamed: 0,InD,OOD,ScoringRule,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
0,cifar10,cifar100,[Spherical],0.907448,0.896724,0.906239,0.897449,0.89576,0.891309,0.894663,0.896545,0.897621
1,cifar10,svhn,[Spherical],0.943557,0.925433,0.94133,0.92606,0.924628,0.923107,0.923754,0.925237,0.926236
2,cifar10,blurred_cifar10,[Spherical],0.832773,0.873367,0.847361,0.871614,0.87525,0.851277,0.87396,0.87263,0.871872
3,cifar10,blurred_cifar100,[Spherical],0.940389,0.95282,0.949292,0.952,0.953614,0.935984,0.952001,0.952211,0.952213
4,cifar100,cifar10,[Spherical],0.791713,0.717949,0.793172,0.734225,0.69853,0.679223,0.705154,0.725589,0.731656
5,cifar100,svhn,[Spherical],0.843926,0.730703,0.842822,0.752121,0.70538,0.705051,0.706518,0.738789,0.749055
6,cifar100,blurred_cifar100,[Spherical],0.699028,0.770412,0.719319,0.763793,0.772932,0.761915,0.75324,0.764293,0.771804
7,cifar100,blurred_cifar10,[Spherical],0.874251,0.844488,0.893772,0.864004,0.815922,0.784722,0.814025,0.854458,0.864251


In [37]:
display(pd.DataFrame.from_dict(only_NOT_spherical_ood_scores))
# display(pd.DataFrame.from_dict(only_NOT_logscore_ood_scores))
# display(pd.DataFrame.from_dict(only_NOT_brier_ood_scores))

Unnamed: 0,InD,OOD,ScoringRule,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
0,cifar10,cifar100,[Spherical],0.906966,0.880914,0.906862,0.882932,0.878763,0.599345,0.794806,0.88291,0.716531
1,cifar10,svhn,[Spherical],0.943123,0.912265,0.941426,0.913617,0.909973,0.603345,0.817165,0.91516,0.730258
2,cifar10,blurred_cifar10,[Spherical],0.827966,0.864819,0.84996,0.865716,0.863014,0.623909,0.7847,0.864421,0.718188
3,cifar10,blurred_cifar100,[Spherical],0.938337,0.948747,0.95076,0.950795,0.945294,0.625755,0.839752,0.949873,0.755761
4,cifar100,cifar10,[Spherical],0.783978,0.603616,0.784348,0.611969,0.593721,0.435117,0.550345,0.630582,0.544873
5,cifar100,svhn,[Spherical],0.845792,0.617834,0.843157,0.628538,0.606064,0.452459,0.557134,0.657892,0.537335
6,cifar100,blurred_cifar100,[Spherical],0.694427,0.71797,0.733989,0.713769,0.716441,0.547432,0.665756,0.735116,0.647505
7,cifar100,blurred_cifar10,[Spherical],0.871103,0.726729,0.89577,0.737728,0.708348,0.522125,0.642367,0.775578,0.626667


In [30]:
# std prediction on OOD vs InD
# pure noise ?
# 

In [31]:
full_mis_rocauc_dataframe.sample(10)

Unnamed: 0,UQMetric,LossFunction,RocAucScore,architecture,training_dataset,base_rule,RiskType
190,MVBI Neglog,Logscore,0.934249,vgg,cifar10,Neglog,MVBI
37,Bayes Logscore Inner,Logscore,0.918993,resnet18,missed_class_cifar10,Logscore,Bayes
140,MV Logscore,Spherical,0.939694,vgg,cifar10,Logscore,MV
21,Total Neglog Inner,Brier,0.803334,resnet18,noisy_cifar10,Neglog,Total
66,Excess Logscore Inner Outer,Brier,0.752933,resnet18,noisy_cifar10,Logscore,Excess
29,Total Spherical Inner,Spherical,0.808568,resnet18,noisy_cifar100,Spherical,Total
68,Excess Logscore Inner Outer,Spherical,0.946205,resnet18,cifar10,Logscore,Excess
118,Bregman Information Spherical,Logscore,0.95126,resnet18,cifar10,Spherical,Bregman Information
51,Bayes Neglog Outer,Brier,0.936227,vgg,cifar10,Neglog,Bayes
109,Bregman Information Logscore,Logscore,0.868259,vgg,cifar100,Logscore,Bregman Information


In [32]:
def collect_scores_into_dict_miss(
    dataframe_list_,
):
    scores_dict_ = {
        "InD": [],
        "ScoringRule": [],
        "Bayes": [],
        "Excess": [],
        "Total": [],
        "Bregman Information": [],
        "Reverse Bregman Information": [],
        "Bias": [],
        "MV": [],
        "MVBI": [],
        "BiasBI": [],
    }
    std_dict_ = {
        "InD": [],
        "ScoringRule": [],
        "Bayes": [],
        "Excess": [],
        "Total": [],
        "Bregman Information": [],
        "Reverse Bregman Information": [],
        "Bias": [],
        "MV": [],
        "MVBI": [],
        "BiasBI": [],
    }
    for dataframe_ in dataframe_list_:
        for ind in dataframe_.training_dataset.unique():
            df_aux_ = dataframe_[(dataframe_['training_dataset'] == ind)]
    
            mean_rocauc_dict = dict(
                df_aux_.groupby(
                    by=['RiskType']
                ).agg({'RocAucScore': ['mean']}
                )[('RocAucScore', 'mean')].reset_index().values
            )
            std_rocauc_dict = dict(
                df_aux_.groupby(
                    by=['RiskType']
                ).agg({'RocAucScore': ['std']}
                )[('RocAucScore', 'std')].reset_index().values
            )
            next_iter = True
            for k in mean_rocauc_dict:
                if k in scores_dict_:
                    scores_dict_[k].append(mean_rocauc_dict[k])
                    std_dict_[k].append(std_rocauc_dict[k])
                    next_iter = False
            if next_iter:
                continue
            
            scores_dict_['InD'].append(ind)
            scores_dict_['ScoringRule'].append(df_aux_['LossFunction'].unique())

            std_dict_['InD'].append(ind)
            std_dict_['ScoringRule'].append(df_aux_['LossFunction'].unique())
    return scores_dict_, std_dict_

In [33]:
all_mis_scores_dict, all_mis_stds_dict = collect_scores_into_dict_miss(
    [full_mis_rocauc_dataframe],
)

In [34]:
all_miss_scores = pd.DataFrame.from_dict(all_mis_scores_dict)
all_miss_stds = pd.DataFrame.from_dict(all_mis_stds_dict)

all_miss_ = all_miss_scores.groupby(by=['InD']).agg(
    {
        "Bayes": ['mean'],
        "Excess": ['mean'],
        "Total": ['mean'],
        "Bregman Information": ['mean'],
        "Reverse Bregman Information": ['mean'],
        "Bias": ['mean'],
        "MV": ['mean'],
        "MVBI": ['mean'],
        "BiasBI": ['mean'],
    }
)

all_miss_stds_ = all_miss_stds.groupby(by=['InD']).agg(
    {
        "Bayes": ['mean'],
        "Excess": ['mean'],
        "Total": ['mean'],
        "Bregman Information": ['mean'],
        "Reverse Bregman Information": ['mean'],
        "Bias": ['mean'],
        "MV": ['mean'],
        "MVBI": ['mean'],
        "BiasBI": ['mean'],
    }
)

In [35]:
df = full_mis_rocauc_dataframe.copy()
df_logscore_logscore = df[(df['base_rule'] == 'Logscore') & (df['LossFunction'] == 'Logscore')]
df_brier_brier = df[(df['base_rule'] == 'Brier') & (df['LossFunction'] == 'Brier')]
df_spherical_spherical = df[(df['base_rule'] == 'Spherical') & (df['LossFunction'] == 'Spherical')]

df_logscore_not_logscore = df[(df['base_rule'] == 'Logscore') & (df['LossFunction'] != 'Logscore')]
df_brier_not_brier = df[(df['base_rule'] == 'Brier') & (df['LossFunction'] != 'Brier')]
df_spherical_not_spherical = df[(df['base_rule'] == 'Spherical') & (df['LossFunction'] != 'Spherical')]

In [36]:
dataframes_list_ = [
        df_logscore_not_logscore,
        df_brier_not_brier,
        df_spherical_not_spherical,
    ]

different_miss_, different_miss_stds_ = collect_scores_into_dict_miss(
    dataframes_list_,
)

dif_miss_scores = pd.DataFrame.from_dict(different_miss_)
dif_miss_stds = pd.DataFrame.from_dict(different_miss_stds_)

dif_miss_ = dif_miss_scores.groupby(by=['InD']).agg(
    {
        "Bayes": ['mean'],
        "Excess": ['mean'],
        "Total": ['mean'],
        "Bregman Information": ['mean'],
        "Reverse Bregman Information": ['mean'],
        "Bias": ['mean'],
        "MV": ['mean'],
        "MVBI": ['mean'],
        "BiasBI": ['mean'],
    }
)

dif_miss_stds_ = dif_miss_stds.groupby(by=['InD']).agg(
    {
        "Bayes": ['mean'],
        "Excess": ['mean'],
        "Total": ['mean'],
        "Bregman Information": ['mean'],
        "Reverse Bregman Information": ['mean'],
        "Bias": ['mean'],
        "MV": ['mean'],
        "MVBI": ['mean'],
        "BiasBI": ['mean'],
    }
)

In [37]:
dataframes_list_ = [
        df_logscore_logscore,
        df_brier_brier,
        df_spherical_spherical,
    ]

same_miss_, same_miss_stds_ = collect_scores_into_dict_miss(
    dataframes_list_,
)

same_miss_scores = pd.DataFrame.from_dict(same_miss_)
same_miss_stds = pd.DataFrame.from_dict(same_miss_stds_)

same_miss_ = same_miss_scores.groupby(by=['InD']).agg(
    {
        "Bayes": ['mean'],
        "Excess": ['mean'],
        "Total": ['mean'],
        "Bregman Information": ['mean'],
        "Reverse Bregman Information": ['mean'],
        "Bias": ['mean'],
        "MV": ['mean'],
        "MVBI": ['mean'],
        "BiasBI": ['mean'],
    }
)

same_miss_stds_ = same_miss_stds.groupby(by=['InD']).agg(
    {
        "Bayes": ['mean'],
        "Excess": ['mean'],
        "Total": ['mean'],
        "Bregman Information": ['mean'],
        "Reverse Bregman Information": ['mean'],
        "Bias": ['mean'],
        "MV": ['mean'],
        "MVBI": ['mean'],
        "BiasBI": ['mean'],
    }
)

In [48]:
display(all_miss_)
display(same_miss_)
# display(same_miss_stds_)
display(dif_miss_)

Unnamed: 0_level_0,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
cifar10,0.943484,0.931047,0.944691,0.92989,0.930042,0.662205,0.855112,0.938054,0.785894
cifar100,0.857517,0.738732,0.838695,0.747216,0.723869,0.47313,0.694561,0.77519,0.687763
missed_class_cifar10,0.935886,0.804109,0.900291,0.822846,0.782947,0.571222,0.735386,0.811123,0.701942
noisy_cifar10,0.797909,0.710099,0.790137,0.71766,0.701365,0.521974,0.665631,0.728768,0.66109
noisy_cifar100,0.817177,0.626465,0.790428,0.637416,0.613515,0.438118,0.586976,0.661751,0.6061


Unnamed: 0_level_0,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
cifar10,0.945336,0.946466,0.947785,0.94685,0.946002,0.771472,0.946749,0.947012,0.944343
cifar100,0.86467,0.829012,0.868342,0.837389,0.819206,0.641034,0.827994,0.849672,0.828769
missed_class_cifar10,0.937303,0.832276,0.911413,0.863369,0.808606,0.67082,0.826791,0.838529,0.817787
noisy_cifar10,0.809958,0.743538,0.809733,0.747681,0.73979,0.578071,0.747875,0.745838,0.735353
noisy_cifar100,0.826432,0.722989,0.824493,0.732837,0.712474,0.56219,0.723853,0.746175,0.725489


Unnamed: 0_level_0,Bayes,Excess,Total,Bregman Information,Reverse Bregman Information,Bias,MV,MVBI,BiasBI
Unnamed: 0_level_1,mean,mean,mean,mean,mean,mean,mean,mean,mean
InD,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
cifar10,0.945101,0.946168,0.947711,0.946325,0.945983,0.77469,0.946292,0.946487,0.945692
cifar100,0.864251,0.798898,0.868299,0.807496,0.789045,0.659415,0.791859,0.819773,0.802774
missed_class_cifar10,0.937562,0.836913,0.923151,0.853055,0.82399,0.680137,0.832792,0.836088,0.831157
noisy_cifar10,0.807179,0.734348,0.813703,0.737235,0.731251,0.625287,0.733178,0.732824,0.735901
noisy_cifar100,0.825088,0.677607,0.824163,0.686561,0.668176,0.592575,0.67206,0.707838,0.683091


In [39]:
from pprint import pprint
import scipy

In [47]:
for training_dataset_name__ in [
    'cifar100',
    'cifar10',
    'missed_class_cifar10',
    'noisy_cifar10',
    'noisy_cifar100',
]:
    print('***' * 10)
    architecture__ = 'resnet18'
    
    uq_results, embeddings_per_dataset, targets_per_dataset = get_uncertainty_scores(
        loss_function_names=loss_function_names,
        training_dataset_name=training_dataset_name__,
        architecture=architecture__,
        model_ids=model_ids,
        list_extraction_datasets=list_extraction_datasets,
        temperature=temperature,
        use_cheating_approximation=use_cheating_approximation,
        use_cached=True
    )
    max_ind = int(targets_per_dataset[training_dataset_name__.split('_')[-1]].shape[0] / len(model_ids))
    true_labels = targets_per_dataset[training_dataset_name__.split('_')[-1]][:max_ind]
    
    pred_labels = get_predicted_labels(
        embeddings_per_dataset=embeddings_per_dataset,
        training_dataset_name=training_dataset_name__.split('_')[-1],
    )

    for loss_name_ in embeddings_per_dataset:
        mean_label_std_dict = {}
        mean_probs_std_dict = {}
        mean_prob_max_dict = {}
        mean_prob_max_std_dict = {}
        for k in embeddings_per_dataset[loss_name_]:
            mean_label_std_dict[k] = np.mean(np.std(np.argmax(embeddings_per_dataset[loss_name_][k], axis=-1), axis=0))
    
            probs = scipy.special.softmax(embeddings_per_dataset[loss_name_][k], axis=-1)
            
            mean_probs_std_dict[k] = np.mean(np.std(probs, axis=0))
            
            mean_prob_max_dict[k] = np.mean(np.max(probs, axis=-1))
            mean_prob_max_std_dict[k] = np.mean(np.std(np.max(probs, axis=-1), axis=0))
            
        print(loss_name_)
        print()
        
        print('*' * 50)
        print(training_dataset_name__)
        print(np.mean(pred_labels[loss_name_]==true_labels))
    
        print('Средняя стд меток')
        pprint(mean_label_std_dict)
        print()
    
        print('Средняя стд вероятностей')
        pprint(mean_probs_std_dict)
        print()
    
        print('Средний максимум предсказанной вероятности')
        pprint(mean_prob_max_dict)
        print()
        
        print('Средний стд максимальной вероятности')
        pprint(mean_prob_max_std_dict)
        print()

******************************
brier_score

**************************************************
cifar100
0.8011
Средняя стд меток
{'blurred_cifar10': 21.813289597404694,
 'blurred_cifar100': 16.73135320522602,
 'cifar10': 17.4652607304963,
 'cifar100': 7.855076023785706,
 'svhn': 19.039289996490712}

Средняя стд вероятностей
{'blurred_cifar10': 0.010390457,
 'blurred_cifar100': 0.008810887,
 'cifar10': 0.0077831345,
 'cifar100': 0.0044766455,
 'svhn': 0.008115297}

Средний максимум предсказанной вероятности
{'blurred_cifar10': 0.34225285,
 'blurred_cifar100': 0.5287537,
 'cifar10': 0.42845762,
 'cifar100': 0.73283696,
 'svhn': 0.36268753}

Средний стд максимальной вероятности
{'blurred_cifar10': 0.20330442,
 'blurred_cifar100': 0.22653645,
 'cifar10': 0.17721102,
 'cifar100': 0.12787572,
 'svhn': 0.17467557}

cross_entropy

**************************************************
cifar100
0.8034
Средняя стд меток
{'blurred_cifar10': 21.137793679380056,
 'blurred_cifar100': 16.171449069661296,

In [6]:
grouped_df = full_ood_rocauc_dataframe.groupby(
    by=['training_dataset', 'architecture', 'Dataset', 'UQMetric']
).agg({'RocAucScore': ['mean', 'std', 'count']})
grouped_df = grouped_df.reset_index()
grouped_df
# grouped_df['UQMetric'].apply(
#                 lambda x: re.sub(r'\s+(Inner|Outer)$', '', x)
# .sort_values(by=('Dataset', 'RocAucScore', 'mean'), ascending=False)

Unnamed: 0_level_0,training_dataset,architecture,Dataset,UQMetric,RocAucScore,RocAucScore,RocAucScore
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,std,count
0,cifar10,resnet18,blurred_cifar10,Bayes Brier Inner,0.895658,0.005439098,3
1,cifar10,resnet18,blurred_cifar10,Bayes Brier Outer,0.879385,0.006993505,3
2,cifar10,resnet18,blurred_cifar10,Bayes Logscore Inner,0.897807,0.006073052,3
3,cifar10,resnet18,blurred_cifar10,Bayes Logscore Outer,0.88131,0.007207124,3
4,cifar10,resnet18,blurred_cifar10,Bayes Maxprob Inner,0.894268,0.005422858,3
5,cifar10,resnet18,blurred_cifar10,Bayes Maxprob Outer,0.881589,0.005368289,3
6,cifar10,resnet18,blurred_cifar10,Bayes Neglog Inner,0.900037,0.007520226,3
7,cifar10,resnet18,blurred_cifar10,Bayes Neglog Outer,0.8649,0.003683366,3
8,cifar10,resnet18,blurred_cifar10,Bayes Spherical Inner,0.895658,0.005439085,3
9,cifar10,resnet18,blurred_cifar10,Bayes Spherical Outer,0.881916,0.005849764,3


In [7]:
grouped_df['UQMetric'] = grouped_df['UQMetric'].apply(lambda x: re.sub(
    r'(Bayes|Excess|Total|Bregman)(.+)', r'\1', x))

In [8]:
grouped_df

Unnamed: 0_level_0,training_dataset,architecture,Dataset,UQMetric,RocAucScore,RocAucScore,RocAucScore
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,std,count
0,cifar10,resnet18,blurred_cifar10,Bayes,0.895658,0.005439098,3
1,cifar10,resnet18,blurred_cifar10,Bayes,0.879385,0.006993505,3
2,cifar10,resnet18,blurred_cifar10,Bayes,0.897807,0.006073052,3
3,cifar10,resnet18,blurred_cifar10,Bayes,0.88131,0.007207124,3
4,cifar10,resnet18,blurred_cifar10,Bayes,0.894268,0.005422858,3
5,cifar10,resnet18,blurred_cifar10,Bayes,0.881589,0.005368289,3
6,cifar10,resnet18,blurred_cifar10,Bayes,0.900037,0.007520226,3
7,cifar10,resnet18,blurred_cifar10,Bayes,0.8649,0.003683366,3
8,cifar10,resnet18,blurred_cifar10,Bayes,0.895658,0.005439085,3
9,cifar10,resnet18,blurred_cifar10,Bayes,0.881916,0.005849764,3


In [12]:
grouped_df_replaced = grouped_df.groupby(
    by=['training_dataset', 'architecture', 'Dataset', 'UQMetric']
).agg({('RocAucScore', 'mean'): ['mean', 'std', 'count']})

grouped_df_replaced = grouped_df_replaced.reset_index()
grouped_df_replaced

Unnamed: 0_level_0,training_dataset,architecture,Dataset,UQMetric,RocAucScore,RocAucScore,RocAucScore
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,mean,mean,mean
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,mean,std,count
0,cifar10,resnet18,blurred_cifar10,Bayes,0.887253,0.01116923,10
1,cifar10,resnet18,blurred_cifar10,Bregman,0.900556,0.0436176,5
2,cifar10,resnet18,blurred_cifar10,Excess,0.901135,0.03979896,15
3,cifar10,resnet18,blurred_cifar10,Logscore Bias Term + MI,0.922276,,1
4,cifar10,resnet18,blurred_cifar10,Logscore Bias term,0.917337,,1
5,cifar10,resnet18,blurred_cifar10,Logscore Model Variance + MI,0.91609,,1
6,cifar10,resnet18,blurred_cifar10,Logscore Model Variance term,0.915004,,1
7,cifar10,resnet18,blurred_cifar10,Reverse Bregman,0.900284,0.04422344,5
8,cifar10,resnet18,blurred_cifar10,Total,0.902217,0.01134928,10
9,cifar10,resnet18,blurred_cifar100,Bayes,0.9598,0.00568609,10
