In [1]:
from psruq.source.postprocessing_utils import (
    get_sampled_combinations_uncertainty_scores,
    get_predicted_labels,
    get_missclassification_dataframe,
    get_ood_detection_dataframe,
    get_raw_scores_dataframe,
)
from source.datasets.constants import DatasetName
from source.losses.constants import LossName
from source.models.constants import ModelName, ModelSource
from source.metrics import (
    ApproximationType,
    GName,
    RiskType,
)

import pandas as pd
import numpy as np
import pickle
from psruq.source.path_config import REPOSITORY_ROOT
import os
from psruq.source.postprocessing_utils import remove_and_expand_list

pd.set_option("display.max_rows", None)

stty: 'standard input': Inappropriate ioctl for device


In [2]:
model_source = ModelSource.OUR_MODELS.value

training_dataset_names = [
    # DatasetName.CIFAR10.value,
    # DatasetName.TINY_IMAGENET.value,
    # DatasetName.CIFAR100.value,
    # DatasetName.CIFAR10_NOISY_LABEL.value,
    DatasetName.CIFAR100_NOISY_LABEL.value,
]
temperature = 1.0
model_ids = np.arange(20)
if model_source == ModelSource.TORCH_UNCERTAINTY.value:
    loss_function_names = [LossName.CROSS_ENTROPY]
else:
    loss_function_names = [el for el in LossName]
architectures = [ModelName.RESNET18]

In [3]:
def get_lists_of_extracted_datasets(training_dataset_name: str) -> tuple[list, list]:
    if training_dataset_name == DatasetName.CIFAR10.value:
        list_extraction_datasets = [
            DatasetName.CIFAR10_NOISY_LABEL.value,
            DatasetName.CIFAR10.value,
            DatasetName.CIFAR100.value,
            DatasetName.SVHN.value,
            DatasetName.TINY_IMAGENET.value,
            DatasetName.CIFAR10C.value,
        ]
    elif training_dataset_name == DatasetName.TINY_IMAGENET.value:
        list_extraction_datasets = [
            DatasetName.TINY_IMAGENET.value,
            DatasetName.IMAGENET_A.value,
            DatasetName.IMAGENET_R.value,
            DatasetName.IMAGENET_O.value,
        ]

    elif training_dataset_name == DatasetName.CIFAR100.value:
        list_extraction_datasets = [
            DatasetName.CIFAR10.value,
            DatasetName.CIFAR100.value,
            DatasetName.SVHN.value,
            # DatasetName.CIFAR100C.value,
        ]

    elif training_dataset_name == DatasetName.CIFAR100_NOISY_LABEL.value:
        list_extraction_datasets = [
            DatasetName.CIFAR100.value,
        ]
    elif training_dataset_name == DatasetName.CIFAR10_NOISY_LABEL.value:
        list_extraction_datasets = [
            DatasetName.CIFAR10.value,
        ]
        
    list_extraction_datasets = remove_and_expand_list(list_extraction_datasets)
    list_ood_datasets = [el for el in list_extraction_datasets]
    return list_extraction_datasets, list_ood_datasets

In [4]:
def postprocess_tables(
    full_ood_rocauc_dataframe_,
    full_mis_rocauc_dataframe_,
    full_dataframe_,
):
    pattern_baserule = r"(LogScore|BrierScore|ZeroOneScore|SphericalScore)"
    pattern_risk = r"(outer outer|outer inner|outer central|inner outer|inner inner|inner central|central outer|central inner|central central|energy inner|energy outer|outer|inner|central)"

    full_ood_rocauc_dataframe_["base_rule"] = full_ood_rocauc_dataframe_[
        "UQMetric"
    ].str.extract(pattern_baserule)
    full_ood_rocauc_dataframe_["RiskType"] = full_ood_rocauc_dataframe_[
        "UQMetric"
    ].str.extract(pattern_risk)

    full_mis_rocauc_dataframe_["base_rule"] = full_mis_rocauc_dataframe_[
        "UQMetric"
    ].str.extract(pattern_baserule)
    full_mis_rocauc_dataframe_["RiskType"] = full_mis_rocauc_dataframe_[
        "UQMetric"
    ].str.extract(pattern_risk)

    full_dataframe_["base_rule"] = full_dataframe_["UQMetric"].str.extract(
        pattern_baserule
    )
    full_dataframe_["RiskType"] = full_dataframe_["UQMetric"].str.extract(pattern_risk)

    return full_ood_rocauc_dataframe_, full_mis_rocauc_dataframe_, full_dataframe_

In [5]:
for training_dataset_name in training_dataset_names:
    full_dataframe = None
    full_ood_rocauc_dataframe = None
    full_mis_rocauc_dataframe = None

    list_extraction_datasets, list_ood_datasets = get_lists_of_extracted_datasets(
        training_dataset_name
    )

    if training_dataset_name not in [
        DatasetName.CIFAR10_NOISY_LABEL.value,
        DatasetName.CIFAR100_NOISY_LABEL.value,
    ]:
        training_dataset_name_aux = training_dataset_name
    else:
        training_dataset_name_aux = training_dataset_name.split("_")[0]

    for architecture in architectures:
        uq_results, embeddings_per_dataset, targets_per_dataset = (
            get_sampled_combinations_uncertainty_scores(
                loss_function_names=loss_function_names,
                training_dataset_name=training_dataset_name,
                architecture=architecture,
                model_ids=model_ids,
                list_extraction_datasets=list_extraction_datasets,
                temperature=temperature,
                model_source=model_source,
                use_cached=False,
            )
        )

        df_ood = get_ood_detection_dataframe(
            ind_dataset=training_dataset_name_aux,
            uq_results=uq_results,
            list_ood_datasets=list_ood_datasets,
        )

        max_ind = int(
            targets_per_dataset[training_dataset_name_aux].shape[0] / len(model_ids)
        )
        true_labels = targets_per_dataset[training_dataset_name_aux][:max_ind]

        pred_labels = get_predicted_labels(
            embeddings_per_dataset=embeddings_per_dataset,
            training_dataset_name=training_dataset_name_aux,
        )

        df_misclassification = get_missclassification_dataframe(
            ind_dataset=training_dataset_name_aux,
            uq_results=uq_results,
            true_labels=true_labels,
            pred_labels=pred_labels,
        )

        scores_df_unravel = get_raw_scores_dataframe(uq_results=uq_results)
        scores_df_unravel["architecture"] = architecture.value
        scores_df_unravel["training_dataset"] = training_dataset_name
        df_ood["architecture"] = architecture.value
        df_ood["training_dataset"] = training_dataset_name
        df_misclassification["architecture"] = architecture.value
        df_misclassification["training_dataset"] = training_dataset_name

        if full_dataframe is None:
            full_dataframe = scores_df_unravel
            full_ood_rocauc_dataframe = df_ood
            full_mis_rocauc_dataframe = df_misclassification
        else:
            full_dataframe = pd.concat([full_dataframe, scores_df_unravel])
            full_ood_rocauc_dataframe = pd.concat([full_ood_rocauc_dataframe, df_ood])
            full_mis_rocauc_dataframe = pd.concat(
                [full_mis_rocauc_dataframe, df_misclassification]
            )

    full_ood_rocauc_dataframe_, full_mis_rocauc_dataframe_, full_dataframe_ = (
        postprocess_tables(
            full_ood_rocauc_dataframe_=full_ood_rocauc_dataframe,
            full_mis_rocauc_dataframe_=full_mis_rocauc_dataframe,
            full_dataframe_=full_dataframe,
        )
    )
    prefix = training_dataset_name + "_" + model_source + "_"
    full_dataframe.to_pickle(
        os.path.join(
            REPOSITORY_ROOT, f"tables/central_tables/final/{prefix}full_dataframe.pkl"
        )
    )
    full_ood_rocauc_dataframe.to_pickle(
        os.path.join(
            REPOSITORY_ROOT, f"tables/central_tables/final/{prefix}full_ood_rocauc.pkl"
        )
    )
    full_mis_rocauc_dataframe.to_pickle(
        os.path.join(
            REPOSITORY_ROOT, f"tables/central_tables/final/{prefix}full_mis_rocauc.pkl"
        )
    )

  0%|          | 0/110 [00:00<?, ?it/s]

In [6]:
training_dataset_name_aux

'cifar100'

In [5]:
uq_results["LogScore energy outer"]["CrossEntropy"].keys()

dict_keys(['tiny_imagenet', 'imagenet_a', 'imagenet_r', 'imagenet_o'])

In [6]:
full_dataframe.sample(10)

Unnamed: 0,UQMetric,LossFunction,Dataset,Scores,architecture,training_dataset
980,SphericalScore BayesRisk outer,SphericalScore,tiny_imagenet,"[[0.17789865, 0.7583793, 0.5098355, 0.02870331...",resnet18,tiny_imagenet
385,BrierScore ExcessRisk outer central,CrossEntropy,imagenet_a,"[[0.23723434, 0.03039942, 0.054669026, 0.09136...",resnet18,tiny_imagenet
489,BrierScore BayesRisk inner,SphericalScore,imagenet_a,"[[0.65724003, 0.9308876, 0.912091, 0.93532413,...",resnet18,tiny_imagenet
10,LogScore TotalRisk outer outer,SphericalScore,imagenet_r,"[[4.902204, 2.9508655, 1.2938819, 3.739118, 4....",resnet18,tiny_imagenet
323,BrierScore TotalRisk inner central,SphericalScore,imagenet_o,"[[0.6010382, 0.431612, 0.1471363, 0.49661836, ...",resnet18,tiny_imagenet
132,LogScore ExcessRisk outer central,CrossEntropy,tiny_imagenet,"[[0.23100778, 0.56910986, 0.4589969, 0.0679755...",resnet18,tiny_imagenet
319,BrierScore TotalRisk inner central,BrierScore,imagenet_o,"[[0.6010382, 0.431612, 0.1471363, 0.49661836, ...",resnet18,tiny_imagenet
418,BrierScore ExcessRisk inner inner,SphericalScore,imagenet_r,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",resnet18,tiny_imagenet
838,SphericalScore TotalRisk central outer,SphericalScore,imagenet_r,"[[0.9545533436269606, 0.8859911670965148, 0.38...",resnet18,tiny_imagenet
919,SphericalScore ExcessRisk inner inner,BrierScore,imagenet_o,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",resnet18,tiny_imagenet


In [7]:
full_ood_rocauc_dataframe.Dataset.unique()

array(['tiny_imagenet', 'imagenet_a', 'imagenet_r', 'imagenet_o'],
      dtype=object)

In [8]:
pattern_baserule = r"(LogScore|BrierScore|ZeroOneScore|SphericalScore)"
pattern_risk = r"(outer outer|outer inner|outer central|inner outer|inner inner|inner central|central outer|central inner|central central|energy inner|energy outer|outer|inner|central)"

full_ood_rocauc_dataframe["base_rule"] = full_ood_rocauc_dataframe[
    "UQMetric"
].str.extract(pattern_baserule)
full_ood_rocauc_dataframe["RiskType"] = full_ood_rocauc_dataframe[
    "UQMetric"
].str.extract(pattern_risk)

full_mis_rocauc_dataframe["base_rule"] = full_mis_rocauc_dataframe[
    "UQMetric"
].str.extract(pattern_baserule)
full_mis_rocauc_dataframe["RiskType"] = full_mis_rocauc_dataframe[
    "UQMetric"
].str.extract(pattern_risk)

full_dataframe["base_rule"] = full_dataframe["UQMetric"].str.extract(pattern_baserule)
full_dataframe["RiskType"] = full_dataframe["UQMetric"].str.extract(pattern_risk)

In [9]:
full_dataframe.sample(10)

Unnamed: 0,UQMetric,LossFunction,Dataset,Scores,architecture,training_dataset,base_rule,RiskType
896,SphericalScore ExcessRisk outer central,SphericalScore,tiny_imagenet,"[[0.008429126962601258, 0.08840444922807775, 0...",resnet18,tiny_imagenet,SphericalScore,outer central
433,BrierScore ExcessRisk central outer,CrossEntropy,imagenet_a,"[[0.23723432, 0.030399434, 0.054669186, 0.0913...",resnet18,tiny_imagenet,BrierScore,central outer
816,SphericalScore TotalRisk inner central,CrossEntropy,tiny_imagenet,"[[0.18632775730031337, 0.8467837293621233, 0.5...",resnet18,tiny_imagenet,SphericalScore,inner central
237,LogScore BayesRisk inner,SphericalScore,imagenet_a,"[[1.6223768, 3.8559604, 3.3577938, 3.7510993, ...",resnet18,tiny_imagenet,LogScore,inner
400,BrierScore ExcessRisk inner outer,BrierScore,tiny_imagenet,"[[0.026536442, 0.029567286, 0.060794458, 0.001...",resnet18,tiny_imagenet,BrierScore,inner outer
975,SphericalScore BayesRisk outer,CrossEntropy,imagenet_o,"[[0.32719886, 0.22051707, 0.0755226, 0.2691950...",resnet18,tiny_imagenet,SphericalScore,outer
227,LogScore BayesRisk outer,SphericalScore,imagenet_o,"[[1.6531043, 0.9266964, 0.34597713, 1.156508, ...",resnet18,tiny_imagenet,LogScore,outer
626,ZeroOneScore ExcessRisk outer inner,CrossEntropy,imagenet_r,"[[0.17329028, 0.091238156, 0.0, 0.073936634, 0...",resnet18,tiny_imagenet,ZeroOneScore,outer inner
127,LogScore ExcessRisk outer inner,BrierScore,imagenet_o,"[[0.3859374, 0.15969387, 0.021598311, 0.128690...",resnet18,tiny_imagenet,LogScore,outer inner
693,ZeroOneScore ExcessRisk central outer,SphericalScore,imagenet_a,"[[0.0090661645, 0.08915358, 0.0853706, 0.07697...",resnet18,tiny_imagenet,ZeroOneScore,central outer


In [10]:
full_ood_rocauc_dataframe.sample(10)

Unnamed: 0,UQMetric,Dataset,LossFunction,RocAucScores_array,architecture,training_dataset,base_rule,RiskType
787,SphericalScore TotalRisk outer central,imagenet_r,BrierScore,"[0.8217763066666666, 0.82397291, 0.8284398, 0....",resnet18,tiny_imagenet,SphericalScore,outer central
303,BrierScore TotalRisk inner inner,imagenet_a,CrossEntropy,"[0.8298071066666667, 0.8325714399999999, 0.835...",resnet18,tiny_imagenet,BrierScore,inner inner
475,BrierScore BayesRisk outer,imagenet_r,BrierScore,"[0.812054965, 0.8143152816666668, 0.8197883766...",resnet18,tiny_imagenet,BrierScore,outer
526,ZeroOneScore TotalRisk outer inner,imagenet_o,BrierScore,"[0.729591725, 0.73271675, 0.7338373250000001, ...",resnet18,tiny_imagenet,ZeroOneScore,outer inner
887,SphericalScore ExcessRisk outer inner,imagenet_o,SphericalScore,"[0.717291325, 0.7167095500000001, 0.7198998750...",resnet18,tiny_imagenet,SphericalScore,outer inner
411,BrierScore ExcessRisk inner inner,imagenet_a,CrossEntropy,"[0.5, 0.5, 0.5, 0.5, 0.5]",resnet18,tiny_imagenet,BrierScore,inner inner
255,BrierScore TotalRisk outer outer,imagenet_a,CrossEntropy,"[0.8154712466666668, 0.8150871133333334, 0.821...",resnet18,tiny_imagenet,BrierScore,outer outer
421,BrierScore ExcessRisk inner central,tiny_imagenet,BrierScore,"[0.5, 0.5, 0.5, 0.5, 0.5]",resnet18,tiny_imagenet,BrierScore,inner central
563,ZeroOneScore TotalRisk inner inner,imagenet_o,SphericalScore,"[0.729591675, 0.732716775, 0.733837125, 0.7326...",resnet18,tiny_imagenet,ZeroOneScore,inner inner
164,LogScore ExcessRisk inner inner,imagenet_r,SphericalScore,"[0.5, 0.5, 0.5, 0.5, 0.5]",resnet18,tiny_imagenet,LogScore,inner inner


In [11]:
# full_dataframe.to_csv('../../tables/central_tables/full_dataframe.csv')
# full_ood_rocauc_dataframe.to_csv('../../tables/central_tables/full_ood_rocauc.csv')
# full_mis_rocauc_dataframe.to_csv('../../tables/central_tables/full_mis_rocauc.csv')

In [18]:
len(training_dataset_names)

1

In [21]:
if (
    len(training_dataset_names) == 1
    and training_dataset_names[0] == DatasetName.TINY_IMAGENET.value
):
    prefix = "imagenet_"
else:
    prefix = ""

In [22]:
prefix

'imagenet_'

In [23]:
full_dataframe.to_pickle(
    os.path.join(REPOSITORY_ROOT, f"tables/central_tables/{prefix}full_dataframe.pkl")
)
full_ood_rocauc_dataframe.to_pickle(
    os.path.join(REPOSITORY_ROOT, f"tables/central_tables/{prefix}full_ood_rocauc.pkl")
)
full_mis_rocauc_dataframe.to_pickle(
    os.path.join(REPOSITORY_ROOT, f"tables/central_tables/{prefix}full_mis_rocauc.pkl")
)

In [13]:
# full_dataframe.to_pickle(os.path.join(REPOSITORY_ROOT, "tables/central_tables/full_dataframe.csv"))
# full_ood_rocauc_dataframe.to_pickle(os.path.join(REPOSITORY_ROOT, "tables/central_tables/full_ood_rocauc.csv"))
# full_mis_rocauc_dataframe.to_pickle(os.path.join(REPOSITORY_ROOT, "tables/central_tables/full_mis_rocauc.csv"))