In [1]:
import sys
import os
import re
import numpy as np

sys.path.insert(0, "./src/")

import pandas as pd
from src.table_utils import (
    collect_scores_into_dict,
    extract_same_different_dataframes,
    ood_detection_pairs_,
    aggregate_over_measures,
)
from IPython.display import display

pd.set_option("display.max_rows", None)

  from .autonotebook import tqdm as notebook_tqdm
stty: 'standard input': Inappropriate ioctl for device


In [2]:
full_ood_rocauc = pd.read_csv("./tables/full_ood_rocauc.csv", index_col=0)
full_ood_rocauc = full_ood_rocauc[
    full_ood_rocauc.Dataset != full_ood_rocauc.training_dataset
]

In [3]:
full_ood_rocauc = full_ood_rocauc[~full_ood_rocauc.UQMetric.str.endswith("Inner Inner")]

In [4]:
full_ood_rocauc.sample(10)

Unnamed: 0,UQMetric,Dataset,LossFunction,RocAucScore,architecture,training_dataset,base_rule,RiskType
1087,MV Neglog,svhn,Logscore,0.944981,resnet18,cifar10,Neglog,MV
62,Total Spherical Outer,cifar10,Spherical,0.771593,vgg,cifar100,Spherical,Total
290,Bayes Spherical Outer,cifar100,Spherical,0.914115,resnet18,missed_class_cifar10,Spherical,Bayes
37,Total Neglog Outer,svhn,Logscore,0.156566,resnet18,noisy_cifar10,Neglog,Total
941,BiasBI Brier,blurred_cifar100,Spherical,0.913722,resnet18,noisy_cifar10,Brier,BiasBI
451,Excess Brier Outer Inner,cifar10,Logscore,0.717323,resnet18,cifar100,Brier,Excess
994,BiasBI Maxprob,cifar100,Logscore,0.5,resnet18,noisy_cifar100,Maxprob,BiasBI
105,Total Neglog Inner,cifar10,Brier,0.806482,resnet18,noisy_cifar100,Neglog,Total
235,Bayes Maxprob Outer,blurred_cifar100,Logscore,0.7435,resnet18,cifar100,Maxprob,Bayes
483,Excess Maxprob Outer Inner,cifar100,Brier,0.5,resnet18,noisy_cifar100,Maxprob,Excess


# How often Excess is better than Bayes in tasks of out-of-distribution detection?

In [5]:
full_ood_rocauc.RiskType.unique()

array(['Total', 'Bayes', 'Excess', 'Bregman Information',
       'Reverse Bregman Information',
       'Expected Pairwise Bregman Information', 'Bias', 'MV', 'MVBI',
       'BiasBI'], dtype=object)

In [6]:
# DROP NEGLOG

# full_ood_rocauc = full_ood_rocauc[full_ood_rocauc.base_rule != 'Neglog']

In [7]:
EXCESS_APPROXIMATION = "Expected Pairwise Bregman Information"  # "Bregman Information" "Reverse Bregman Information", "Expected Pairwise Bregman Information"

In [8]:
excess_ood_scores = full_ood_rocauc[
    (full_ood_rocauc.RiskType == EXCESS_APPROXIMATION)
    & (full_ood_rocauc.Dataset != full_ood_rocauc.training_dataset)
]
excess_ood_scores = excess_ood_scores.drop(columns=["UQMetric", "RiskType"])

In [9]:
bayes_inner_ood_scores = full_ood_rocauc[
    (full_ood_rocauc.RiskType == "Bayes")
    & (full_ood_rocauc.Dataset != full_ood_rocauc.training_dataset)
    & (full_ood_rocauc.UQMetric.str.endswith("Inner"))
]
bayes_inner_ood_scores = bayes_inner_ood_scores.drop(columns=["UQMetric", "RiskType"])

bayes_outer_ood_scores = full_ood_rocauc[
    (full_ood_rocauc.RiskType == "Bayes")
    & (full_ood_rocauc.Dataset != full_ood_rocauc.training_dataset)
    & (full_ood_rocauc.UQMetric.str.endswith("Outer"))
]
bayes_outer_ood_scores = bayes_outer_ood_scores.drop(columns=["UQMetric", "RiskType"])

In [10]:
merge_columns = [el for el in bayes_outer_ood_scores.columns if el != "RocAucScore"]

In [11]:
merge_columns

['Dataset', 'LossFunction', 'architecture', 'training_dataset', 'base_rule']

### Excess is better than Bayes Inner:

In [12]:
merged_tab_ = excess_ood_scores.merge(
    bayes_inner_ood_scores, on=merge_columns, suffixes=["Excess", "Bayes_Inner"]
)

In [13]:
merged_tab_["compare_res"] = (
    merged_tab_["RocAucScoreExcess"] > merged_tab_["RocAucScoreBayes_Inner"]
)
print(merged_tab_["compare_res"].mean())

0.27956989247311825


### Excess is better than Bayes Outer:

In [14]:
merged_tab_ = excess_ood_scores.merge(
    bayes_outer_ood_scores, on=merge_columns, suffixes=["Excess", "Bayes_Outer"]
)

In [15]:
merged_tab_["compare_res"] = (
    merged_tab_["RocAucScoreExcess"] > merged_tab_["RocAucScoreBayes_Outer"]
)
print(merged_tab_["compare_res"].mean())

0.34408602150537637


## Only soft-OOD scenario

In [16]:
soft_ood_rocauc = full_ood_rocauc[
    (
        full_ood_rocauc.training_dataset.str.fullmatch("cifar10")
        & full_ood_rocauc.Dataset.str.fullmatch("blurred_cifar10")
    )
    | (
        full_ood_rocauc.training_dataset.str.fullmatch("cifar100")
        & full_ood_rocauc.Dataset.str.fullmatch("blurred_cifar100")
    )
]

In [17]:
excess_ood_scores = soft_ood_rocauc[
    (soft_ood_rocauc.RiskType == EXCESS_APPROXIMATION)
    & (soft_ood_rocauc.Dataset != soft_ood_rocauc.training_dataset)
]
excess_ood_scores = excess_ood_scores.drop(columns=["UQMetric", "RiskType"])

In [18]:
bayes_inner_ood_scores = soft_ood_rocauc[
    (soft_ood_rocauc.RiskType == "Bayes")
    & (soft_ood_rocauc.Dataset != soft_ood_rocauc.training_dataset)
    & (soft_ood_rocauc.UQMetric.str.endswith("Inner"))
]
bayes_inner_ood_scores = bayes_inner_ood_scores.drop(columns=["UQMetric", "RiskType"])

bayes_outer_ood_scores = soft_ood_rocauc[
    (soft_ood_rocauc.RiskType == "Bayes")
    & (soft_ood_rocauc.Dataset != soft_ood_rocauc.training_dataset)
    & (soft_ood_rocauc.UQMetric.str.endswith("Outer"))
]
bayes_outer_ood_scores = bayes_outer_ood_scores.drop(columns=["UQMetric", "RiskType"])

In [19]:
merge_columns = [el for el in bayes_outer_ood_scores.columns if el != "RocAucScore"]

In [20]:
merge_columns

['Dataset', 'LossFunction', 'architecture', 'training_dataset', 'base_rule']

### Excess is better than Bayes Inner:

In [21]:
merged_tab_ = excess_ood_scores.merge(
    bayes_inner_ood_scores, on=merge_columns, suffixes=["Excess", "Bayes_Inner"]
)

In [22]:
merged_tab_["compare_res"] = (
    merged_tab_["RocAucScoreExcess"] > merged_tab_["RocAucScoreBayes_Inner"]
)
print(merged_tab_["compare_res"].mean())

0.8333333333333334


### Excess is better than Bayes Outer:

In [23]:
merged_tab_ = excess_ood_scores.merge(
    bayes_outer_ood_scores, on=merge_columns, suffixes=["Excess", "Bayes_Outer"]
)

In [24]:
merged_tab_["compare_res"] = (
    merged_tab_["RocAucScoreExcess"] > merged_tab_["RocAucScoreBayes_Outer"]
)
print(merged_tab_["compare_res"].mean())

0.8833333333333333


## Only hard-OOD scenario

In [25]:
hard_ood_rocauc = full_ood_rocauc[
    ~(
        (
            full_ood_rocauc.training_dataset.str.fullmatch("cifar10")
            & full_ood_rocauc.Dataset.str.fullmatch("blurred_cifar10")
        )
        | (
            full_ood_rocauc.training_dataset.str.fullmatch("cifar100")
            & full_ood_rocauc.Dataset.str.fullmatch("blurred_cifar100")
        )
    )
]

In [26]:
excess_ood_scores = hard_ood_rocauc[
    (hard_ood_rocauc.RiskType == EXCESS_APPROXIMATION)
    & (hard_ood_rocauc.Dataset != hard_ood_rocauc.training_dataset)
]
excess_ood_scores = excess_ood_scores.drop(columns=["UQMetric", "RiskType"])

In [27]:
bayes_inner_ood_scores = hard_ood_rocauc[
    (hard_ood_rocauc.RiskType == "Bayes")
    & (hard_ood_rocauc.Dataset != hard_ood_rocauc.training_dataset)
    & (hard_ood_rocauc.UQMetric.str.endswith("Inner"))
]
bayes_inner_ood_scores = bayes_inner_ood_scores.drop(columns=["UQMetric", "RiskType"])

bayes_outer_ood_scores = hard_ood_rocauc[
    (hard_ood_rocauc.RiskType == "Bayes")
    & (hard_ood_rocauc.Dataset != hard_ood_rocauc.training_dataset)
    & (hard_ood_rocauc.UQMetric.str.endswith("Outer"))
]
bayes_outer_ood_scores = bayes_outer_ood_scores.drop(columns=["UQMetric", "RiskType"])

In [28]:
merge_columns = [el for el in bayes_outer_ood_scores.columns if el != "RocAucScore"]
merge_columns

['Dataset', 'LossFunction', 'architecture', 'training_dataset', 'base_rule']

### Excess is better than Bayes Inner:

In [29]:
merged_tab_ = excess_ood_scores.merge(
    bayes_inner_ood_scores, on=merge_columns, suffixes=["Excess", "Bayes_Inner"]
)

In [30]:
merged_tab_["compare_res"] = (
    merged_tab_["RocAucScoreExcess"] > merged_tab_["RocAucScoreBayes_Inner"]
)
print(merged_tab_["compare_res"].mean())

0.19753086419753085


### Excess is better than Bayes Outer:

In [31]:
merged_tab_ = excess_ood_scores.merge(
    bayes_outer_ood_scores, on=merge_columns, suffixes=["Excess", "Bayes_Outer"]
)

In [32]:
merged_tab_["compare_res"] = (
    merged_tab_["RocAucScoreExcess"] > merged_tab_["RocAucScoreBayes_Outer"]
)
print(merged_tab_["compare_res"].mean())

0.2641975308641975


In [42]:
results = {}

for EXCESS_APPROXIMATION in [
    "Bregman Information",
    "Reverse Bregman Information",
    "Expected Pairwise Bregman Information",
]:
    results[EXCESS_APPROXIMATION] = {}

    # How often Excess is better than Bayes in tasks of out-of-distribution detection?
    excess_ood_scores = full_ood_rocauc[
        (full_ood_rocauc.RiskType == EXCESS_APPROXIMATION)
        & (full_ood_rocauc.Dataset != full_ood_rocauc.training_dataset)
    ]
    excess_ood_scores = excess_ood_scores.drop(columns=["UQMetric", "RiskType"])

    bayes_inner_ood_scores = full_ood_rocauc[
        (full_ood_rocauc.RiskType == "Bayes")
        & (full_ood_rocauc.Dataset != full_ood_rocauc.training_dataset)
        & (full_ood_rocauc.UQMetric.str.endswith("Inner"))
    ]
    bayes_inner_ood_scores = bayes_inner_ood_scores.drop(
        columns=["UQMetric", "RiskType"]
    )

    bayes_outer_ood_scores = full_ood_rocauc[
        (full_ood_rocauc.RiskType == "Bayes")
        & (full_ood_rocauc.Dataset != full_ood_rocauc.training_dataset)
        & (full_ood_rocauc.UQMetric.str.endswith("Outer"))
    ]
    bayes_outer_ood_scores = bayes_outer_ood_scores.drop(
        columns=["UQMetric", "RiskType"]
    )

    merge_columns = [el for el in bayes_outer_ood_scores.columns if el != "RocAucScore"]

    # Excess is better than Bayes Inner:
    merged_tab_ = excess_ood_scores.merge(
        bayes_inner_ood_scores, on=merge_columns, suffixes=["Excess", "Bayes_Inner"]
    )

    merged_tab_["compare_res"] = (
        merged_tab_["RocAucScoreExcess"] > merged_tab_["RocAucScoreBayes_Inner"]
    )
    print(merged_tab_["compare_res"].mean())
    results[EXCESS_APPROXIMATION] = merged_tab_["compare_res"].mean()

0.2645161290322581
0.26021505376344084
0.27956989247311825


In [43]:
results

{'Bregman Information': 0.2645161290322581,
 'Reverse Bregman Information': 0.26021505376344084,
 'Expected Pairwise Bregman Information': 0.27956989247311825}