In [1]:
import os
import csv
import numpy as np

import torch

from dataset import Dataset
from feature_extractor import FeatureExtractor
from evaluation import get_auroc, get_average_precision, get_tnr_frac_tpr
from monitors import OutsideTheBoxMonitor, GaussianMixtureMonitor, MaxSoftmaxProbabilityMonitor, MaxLogitMonitor, \
    EnergyMonitor, ReActMonitor, MahalanobisMonitor

from sklearn.mixture import GaussianMixture
from sklearn.metrics import average_precision_score

In [8]:
batch_size = 10
device_name = 'cuda:0' if torch.cuda.is_available() else 'cpu'

# id_dataset = "cifar10"
model = "densenet"
layer = 98

# novelties = ["cifar100", "svhn", "lsun"]

all_id_datasets = ["cifar10", "svhn", "cifar100"]

all_ood_datasets = [["cifar100", "svhn", "lsun"],
                    ["cifar10", "tiny_imagenet", "lsun"],
                    ["cifar10", "svhn", "lsun"]]

perturbations = ["brightness", "blur", "pixelization"]
attacks = ["fgsm", "deepfool", "pgd"]

cov_constraints = ["full", "diag", "tied", "spherical"]
n_clusters = [1, 2, 3, 5]
is_novel = [1, 1, 1, 0, 0, 0, 0, 0, 0]

In [5]:
def select_parameters_gmm(features_train, pred_train, lab_train,
                          features_test, pred_test, lab_test,
                          features_ood, pred_ood, lab_ood,
                          is_novel_ood, eval_idx,
                          cov_constraints_list, n_clusters_list, 
                          selection_metric = "aupr"):
    
    print("Tuning GMM hyperparameters for dataset ", eval_idx)
    
    correct_indices_train = (lab_train == pred_train)
    
    features_train = features_train[correct_indices_train]
    lab_train = lab_train[correct_indices_train]
    pred_train = pred_train[correct_indices_train]
    
    N = features_test.shape[0] // 2
    features_test_selec = features_test[N:]
    pred_test_selec = pred_test[N:]
    lab_test_selec = lab_test[N:]

    features_test_eval = features_test[:N]
    pred_test_eval = pred_test[:N]
    lab_test_eval = lab_test[:N]
    
    n_ood_datasets = len(lab_ood)
    select_indices = [k for k in range(n_ood_datasets) if k is not eval_idx]
    
    features_ood_selec = [features_ood[i][:N] for i in select_indices]
    pred_ood_selec = [pred_ood[i][:N] for i in select_indices]
    lab_ood_selec = [lab_ood[i][:N] for i in select_indices]
    is_novel_ood_selec = [is_novel_ood[i] for i in select_indices]
        
    features_ood_eval = features_ood[eval_idx][:N]
    pred_ood_eval = pred_ood[eval_idx][:N]
    lab_ood_eval = lab_ood[eval_idx][:N]
    is_novel_ood_eval = is_novel_ood[eval_idx]
    
    if selection_metric == "aupr":
        metric = get_average_precision
    elif selection_metric == "auroc":
        metric = get_auroc
    elif selection_metric == "tnr95tpr":
        metric = get_tnr_frac_tpr
    else:
        "Error, unsupported selection metric. Valid choices are 'aupr', 'auroc' and 'tnr95tpr'"
        
    constraints, n_components = [], []
    for cat in list(set(lab_train)):
        print("... Selecting hyperparameters for class ", cat)
        
        f_train = features_train[pred_train == cat]
        
        res_config = []
        for cc in  cov_constraints:
            for n in n_clusters:
                gmm = GaussianMixture(n_components=n, covariance_type=cc)
                gmm.fit(f_train)
                
                res = []
                for i in range(len(features_ood_selec)):
                    f = np.concatenate([features_test_selec[pred_test_selec == cat], 
                                        features_ood_selec[i][pred_ood_selec[i] == cat]])
                    p = np.concatenate([pred_test_selec[pred_test_selec == cat], 
                                        pred_ood_selec[i][pred_ood_selec[i] == cat]])
                    if is_novel_ood_selec[i]:
                        l = np.concatenate([lab_test_selec[pred_test_selec == cat], 
                                            -np.ones(lab_ood_selec[i][pred_ood_selec[i] == cat].shape)])
                    else:
                        l = np.concatenate([lab_test_selec[pred_test_selec == cat], 
                                            lab_ood_selec[i][pred_ood_selec[i] == cat]])
                    try:
                        scores = -gmm.score_samples(f)
                    except:
                        pass
                    
                    res.append(metric(scores, l, p))
#                     print(cc, n, res[-1])
                
                res = np.array(res)
                res_config.append(res)
                
        res_config = np.array(res_config)
        
        ranks = np.zeros(res_config.shape)
        for i in range(res_config.shape[1]):
            temp = res_config[:, i].argsort()
            ranks[:, i] = np.empty_like(temp)
            ranks[temp, i] = np.arange(len(res_config))
        avg_ranks = np.mean(ranks, axis = 1)

        constraints.append(cov_constraints[np.argmax(avg_ranks) // len(n_clusters)])
        n_components.append(n_clusters[np.argmax(avg_ranks) % len(n_clusters)])
                
    f_eval = np.concatenate([features_test_eval, features_ood_eval])
    p_eval = np.concatenate([pred_test_eval, pred_ood_eval])
    if is_novel_ood_eval:
        l_eval = np.concatenate([lab_test_eval, -np.ones(lab_ood_eval.shape)])
    else:
        l_eval = np.concatenate([lab_test_eval, lab_ood_eval])
        
    monitor = GaussianMixtureMonitor(id_dataset, model, layer, n_components=n_components, constraint=constraints)
    monitor.fit(features_train, pred_train, lab_train, save=False)
    
    scores = -monitor.predict(f_eval, p_eval)
    
    ap = get_average_precision(scores, l_eval, p_eval)
    auroc = get_auroc(scores, l_eval, p_eval)
    tnr = get_tnr_frac_tpr(scores, l_eval, p_eval)
    
    print("AP       = ", ap)
    print("AUROC    = ", auroc)
    print("TNR95TPR = ", tnr)
    
    return constraints, n_components, ap, auroc, tnr

In [None]:
for i_id, id_dataset in enumerate(all_id_datasets):
    print("ID dataset: ", id_dataset)
    
    train_dataset = Dataset(id_dataset, "train", model, batch_size=batch_size)
    test_dataset = Dataset(id_dataset, "test", model, batch_size=batch_size)
    
    feature_extractor = FeatureExtractor(model, id_dataset, [layer], device_name)
    
    novelties = all_ood_datasets[i_id]
    
    ood_names = [novelties[0], novelties[1], novelties[2], 
                 perturbations[0], perturbations[1], perturbations[2],
                 attacks[0], attacks[1], attacks[2]]
    
    ood_datasets = []
    ood_datasets.append(Dataset(novelties[0], "test", model, batch_size=batch_size))
    ood_datasets.append(Dataset(novelties[1], "test", model, batch_size=batch_size))
    ood_datasets.append(Dataset(novelties[2], "test", model, batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, additional_transform=perturbations[0], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, additional_transform=perturbations[1], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, additional_transform=perturbations[2], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, adversarial_attack=attacks[0], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, adversarial_attack=attacks[1], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, adversarial_attack=attacks[2], batch_size=batch_size))

    features_train, logits_train, softmax_train, pred_train, lab_train = feature_extractor.get_features(train_dataset)
    features_train = features_train[0]
    features_test, logits_test, softmax_test, pred_test, lab_test = feature_extractor.get_features(test_dataset)
    features_test = features_test[0]

    features_ood, logits_ood, softmax_ood, pred_ood, lab_ood = [], [], [], [], []
    for k in range(len(ood_datasets)):
        features, logits, softmax, pred, lab = feature_extractor.get_features(ood_datasets[k])
        features_ood.append(features[0])
        logits_ood.append(logits)
        softmax_ood.append(softmax)
        pred_ood.append(pred)
        lab_ood.append(lab)

    for eval_idx in range(9):
        print("... OOD dataset: ", ood_names[eval_idx])
        constraints, n_components, ap, auroc, tnr = select_parameters_gmm(features_train, pred_train, lab_train,
                                                                          features_test, pred_test, lab_test,
                                                                          features_ood, pred_ood, lab_ood,
                                                                          is_novel, eval_idx,
                                                                          cov_constraints, n_clusters, 
                                                                          selection_metric = "tnr95tpr")
        print(constraints, n_components, ap, auroc, tnr)
        print()

ID dataset:  cifar10
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Using downloaded and verified file: ./Data/test_32x32.mat
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
... OOD dataset:  cifar100
Tuning GMM hyperparameters for dataset  0
... Selecting hyperparameters for class  0
... Selecting hyperparameters for class  1
... Selecting hyperparameters for class  2
... Selecting hyperparameters for class  3
... Selecting hyperparameters for class  4
... Selecting hyperparameters for class  5




... Selecting hyperparameters for class  6
... Selecting hyperparameters for class  7
... Selecting hyperparameters for class  8
... Selecting hyperparameters for class  9
AP       =  0.8971359573284498
AUROC    =  0.8917531399829207
TNR95TPR =  0.6020189003436427
['tied', 'full', 'tied', 'tied', 'tied', 'tied', 'tied', 'tied', 'tied', 'diag'] [2, 5, 5, 3, 5, 2, 3, 5, 5, 3] 0.8971359573284498 0.8917531399829207 0.6020189003436427

... OOD dataset:  svhn
Tuning GMM hyperparameters for dataset  1
... Selecting hyperparameters for class  0
... Selecting hyperparameters for class  1
... Selecting hyperparameters for class  2
... Selecting hyperparameters for class  3
... Selecting hyperparameters for class  4
... Selecting hyperparameters for class  5
... Selecting hyperparameters for class  6
... Selecting hyperparameters for class  7
... Selecting hyperparameters for class  8
... Selecting hyperparameters for class  9
AP       =  0.9301235201518434
AUROC    =  0.9507453360032513
TNR95TPR



... Selecting hyperparameters for class  6
... Selecting hyperparameters for class  7
... Selecting hyperparameters for class  8
... Selecting hyperparameters for class  9
AP       =  0.9458366957912194
AUROC    =  0.9397508140934626
TNR95TPR =  0.7167096219931272
['full', 'spherical', 'tied', 'tied', 'tied', 'tied', 'tied', 'tied', 'tied', 'diag'] [2, 1, 3, 5, 5, 2, 5, 3, 5, 3] 0.9458366957912194 0.9397508140934626 0.7167096219931272

... OOD dataset:  brightness
Tuning GMM hyperparameters for dataset  3
... Selecting hyperparameters for class  0
... Selecting hyperparameters for class  1


In [15]:
def eval_mahalanobis(features_train, pred_train, lab_train,
                     features_test, pred_test, lab_test,
                     features_ood, pred_ood, lab_ood,
                     is_novel_ood, eval_idx):
    
    correct_indices_train = (lab_train == pred_train)
    
    features_train = features_train[correct_indices_train]
    lab_train = lab_train[correct_indices_train]
    pred_train = pred_train[correct_indices_train]
    
    N = features_test.shape[0] // 2

    features_test_eval = features_test[:N]
    pred_test_eval = pred_test[:N]
    lab_test_eval = lab_test[:N]
    
    features_ood_eval = features_ood[eval_idx][:N]
    pred_ood_eval = pred_ood[eval_idx][:N]
    lab_ood_eval = lab_ood[eval_idx][:N]
    is_novel_ood_eval = is_novel_ood[eval_idx]
          
    f_eval = np.concatenate([features_test_eval, features_ood_eval])
    p_eval = np.concatenate([pred_test_eval, pred_ood_eval])
    if is_novel_ood_eval:
        l_eval = np.concatenate([lab_test_eval, -np.ones(lab_ood_eval.shape)])
    else:
        l_eval = np.concatenate([lab_test_eval, lab_ood_eval])
        
    monitor = MahalanobisMonitor(id_dataset, model, layer, is_tied=True)
    monitor.fit(features_train, pred_train, lab_train, save=False)
    
    scores = -monitor.predict(f_eval, p_eval)
    
    ap = get_average_precision(scores, l_eval, p_eval)
    auroc = get_auroc(scores, l_eval, p_eval)
    tnr = get_tnr_frac_tpr(scores, l_eval, p_eval)
    
    print(ap, ",", auroc, ",", tnr)

In [16]:
for i_id, id_dataset in enumerate(all_id_datasets):
    print("ID dataset: ", id_dataset)
    
    train_dataset = Dataset(id_dataset, "train", model, batch_size=batch_size)
    test_dataset = Dataset(id_dataset, "test", model, batch_size=batch_size)
    
    feature_extractor = FeatureExtractor(model, id_dataset, [layer], device_name)
    
    novelties = all_ood_datasets[i_id]
    
    ood_names = [novelties[0], novelties[1], novelties[2], 
                 perturbations[0], perturbations[1], perturbations[2],
                 attacks[0], attacks[1], attacks[2]]
    
    ood_datasets = []
    ood_datasets.append(Dataset(novelties[0], "test", model, batch_size=batch_size))
    ood_datasets.append(Dataset(novelties[1], "test", model, batch_size=batch_size))
    ood_datasets.append(Dataset(novelties[2], "test", model, batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, additional_transform=perturbations[0], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, additional_transform=perturbations[1], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, additional_transform=perturbations[2], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, adversarial_attack=attacks[0], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, adversarial_attack=attacks[1], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, adversarial_attack=attacks[2], batch_size=batch_size))

    features_train, logits_train, softmax_train, pred_train, lab_train = feature_extractor.get_features(train_dataset)
    features_train = features_train[0]
    features_test, logits_test, softmax_test, pred_test, lab_test = feature_extractor.get_features(test_dataset)
    features_test = features_test[0]

    features_ood, logits_ood, softmax_ood, pred_ood, lab_ood = [], [], [], [], []
    for k in range(len(ood_datasets)):
        features, logits, softmax, pred, lab = feature_extractor.get_features(ood_datasets[k])
        features_ood.append(features[0])
        logits_ood.append(logits)
        softmax_ood.append(softmax)
        pred_ood.append(pred)
        lab_ood.append(lab)

    for eval_idx in range(9):
#         print("... OOD dataset: ", ood_names[eval_idx])
        eval_mahalanobis(features_train, pred_train, lab_train,
                         features_test, pred_test, lab_test,
                         features_ood, pred_ood, lab_ood,
                         is_novel, eval_idx)

ID dataset:  cifar10
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Using downloaded and verified file: ./Data/test_32x32.mat
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
0.6588314100290246 , 0.6202829072681704 , 0.10821052631578942
0.958238852315053 , 0.9457901152882205 , 0.6336842105263158
0.9455471620125726 , 0.9309913182957394 , 0.5741052631578947
0.44688559132855415 , 0.6913949650878644 , 0.18148921363952675
0.9274594621999217 , 0.9447058367028759 , 0.8672599886385155
0.5995855138595496 , 0.8163230958714781 , 0.4407281337871837
0.4613397496227938 , 0.7156830296054418 , 0.21087588210875885
0.4485499300626769 , 0.6913617895518684 , 0.16822691071678075
0.8281884302239098 , 0.8408507649878907 , 0.3953254080193431
ID dataset:  svhn
Using do

In [11]:
def eval_MSP(softmax_test, pred_test, lab_test,
             softmax_ood, pred_ood, lab_ood,
             is_novel_ood, eval_idx):
    
    N = softmax_test.shape[0] // 2

    softmax_test_eval = softmax_test[:N]
    pred_test_eval = pred_test[:N]
    lab_test_eval = lab_test[:N]
    
    softmax_ood_eval = softmax_ood[eval_idx][:N]
    pred_ood_eval = pred_ood[eval_idx][:N]
    lab_ood_eval = lab_ood[eval_idx][:N]
    is_novel_ood_eval = is_novel_ood[eval_idx]
          
    s_eval = np.concatenate([softmax_test_eval, softmax_ood_eval])
    p_eval = np.concatenate([pred_test_eval, pred_ood_eval])
    if is_novel_ood_eval:
        l_eval = np.concatenate([lab_test_eval, -np.ones(lab_ood_eval.shape)])
    else:
        l_eval = np.concatenate([lab_test_eval, lab_ood_eval])
        
    monitor = MaxSoftmaxProbabilityMonitor()
    monitor.fit()
    
    scores = -monitor.predict(s_eval)
    
    ap = get_average_precision(scores, l_eval, p_eval)
    auroc = get_auroc(scores, l_eval, p_eval)
    tnr = get_tnr_frac_tpr(scores, l_eval, p_eval)
    
    print(ap, ",", auroc, ",", tnr)

In [12]:
for i_id, id_dataset in enumerate(all_id_datasets):
    print("ID dataset: ", id_dataset)
    
    train_dataset = Dataset(id_dataset, "train", model, batch_size=batch_size)
    test_dataset = Dataset(id_dataset, "test", model, batch_size=batch_size)
    
    feature_extractor = FeatureExtractor(model, id_dataset, [layer], device_name)
    
    novelties = all_ood_datasets[i_id]
    
    ood_names = [novelties[0], novelties[1], novelties[2], 
                 perturbations[0], perturbations[1], perturbations[2],
                 attacks[0], attacks[1], attacks[2]]
    
    ood_datasets = []
    ood_datasets.append(Dataset(novelties[0], "test", model, batch_size=batch_size))
    ood_datasets.append(Dataset(novelties[1], "test", model, batch_size=batch_size))
    ood_datasets.append(Dataset(novelties[2], "test", model, batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, additional_transform=perturbations[0], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, additional_transform=perturbations[1], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, additional_transform=perturbations[2], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, adversarial_attack=attacks[0], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, adversarial_attack=attacks[1], batch_size=batch_size))
    ood_datasets.append(Dataset(id_dataset, "test", model, adversarial_attack=attacks[2], batch_size=batch_size))

    features_train, logits_train, softmax_train, pred_train, lab_train = feature_extractor.get_features(train_dataset)
    features_train = features_train[0]
    features_test, logits_test, softmax_test, pred_test, lab_test = feature_extractor.get_features(test_dataset)
    features_test = features_test[0]

    features_ood, logits_ood, softmax_ood, pred_ood, lab_ood = [], [], [], [], []
    for k in range(len(ood_datasets)):
        features, logits, softmax, pred, lab = feature_extractor.get_features(ood_datasets[k])
        features_ood.append(features[0])
        logits_ood.append(logits)
        softmax_ood.append(softmax)
        pred_ood.append(pred)
        lab_ood.append(lab)

    for eval_idx in range(9):
#         print("... OOD dataset: ", ood_names[eval_idx])
        eval_MSP(softmax_test, pred_test, lab_test,
                 softmax_ood, pred_ood, lab_ood,
                 is_novel, eval_idx)
#         print()

ID dataset:  cifar10
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Using downloaded and verified file: ./Data/test_32x32.mat
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
0.9064580782796519 , 0.9143132230576441 , 0.6812631578947368
0.910390839581521 , 0.9220553784461153 , 0.7294736842105263
0.9621038076120779 , 0.9671329323308269 , 0.8846315789473684
0.5867067666681072 , 0.8182140610186756 , 0.4629088378566458
0.7597432344061327 , 0.8190328661658127 , 0.4090134444234047
0.6642986472353485 , 0.8435565113982139 , 0.5588278821962409
0.6664938089597721 , 0.8748798618555421 , 0.6305520963055209
0.34841115069457085 , 0.654007713338213 , 0.5365376554422243
0.5515102274587463 , 0.575356646529964 , 0.0
ID dataset:  svhn
Using downloaded and verified