In [8]:
from dataset import Dataset
from feature_extractor import FeatureExtractor
from monitors import MahalanobisMonitor, GaussianMixtureMonitor, OutsideTheBoxMonitor, MaxSoftmaxProbabilityMonitor,\
                    MaxLogitMonitor, EnergyMonitor, ReActMonitor
from evaluation import Evaluator

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

import numpy as np
import matplotlib.pyplot as plt

In [10]:
batch_size = 10

model = "resnet"
# model = "densenet"

layers_ids = [2, 5, 8, 12, 15, 19, 22, 26, 29, 32]
# layers_ids = [9, 19, 29, 40, 50, 60, 69, 79, 89, 98]

id_dataset = "svhn"

ood_dataset = "svhn"

additional_transform = None#"pixelization"
adversarial_attack = "fgsm"

In [11]:
dataset_train = Dataset(id_dataset, "train", model, batch_size=batch_size)
dataset_test = Dataset(id_dataset, "test", model, batch_size=batch_size)
dataset_ood = Dataset(ood_dataset, "test", model, additional_transform, adversarial_attack, batch_size=batch_size)

Using downloaded and verified file: ./Data/train_32x32.mat
Using downloaded and verified file: ./Data/test_32x32.mat
Using downloaded and verified file: ./Data/test_32x32.mat


In [12]:
feature_extractor = FeatureExtractor(model, id_dataset, layers_ids)

In [13]:
features_train, logits_train, softmax_train, pred_train, lab_train = feature_extractor.get_features(dataset_train)
features_test, logits_test, softmax_test, pred_test, lab_test = feature_extractor.get_features(dataset_test)
features_ood, logits_ood, softmax_ood, pred_ood, lab_ood = feature_extractor.get_features(dataset_ood)

Extracting layers: 'layer1.0.relu_1', 'layer1.2.relu', 'layer2.0.relu_1', 'layer2.2.relu_1', 'layer3.0.relu', 'layer3.2.relu', 'layer3.3.relu_1', 'layer3.5.relu_1', 'layer4.1.relu', 'layer4.2.relu_1'


100%|███████████████████████████████████████████████████████████████████| 2604/2604 [01:09<00:00, 37.36it/s]


In [14]:
id_accuracy = accuracy_score(lab_test, pred_test)
ood_accuracy = 0
if id_dataset == ood_dataset:
    ood_accuracy = accuracy_score(lab_ood, pred_ood)

print("Accuracy")
print("ID:  ", id_accuracy)
print("OOD: ", ood_accuracy)

Accuracy
ID:   0.9668484941610326
OOD:  0.6509680393362016


In [15]:
eval_oms = Evaluator("oms", is_novelty=(id_dataset!=ood_dataset))
eval_ood = Evaluator("ood", is_novelty=(id_dataset!=ood_dataset))

eval_oms.fit_ground_truth(lab_test, lab_ood, pred_test, pred_ood)
eval_ood.fit_ground_truth(lab_test, lab_ood, pred_test, pred_ood)

precision_star, recall_star, f1_star = eval_oms.get_metrics(eval_ood.y_true[:lab_test.shape[0]].astype(bool), 
                                             eval_ood.y_true[lab_test.shape[0]:].astype(bool))

print("OMS results for perfect OOD detector")
print("Precision ", " Recall ", " F1")
print("{:.4f}".format(precision_star), "    ", "{:.4f}".format(recall_star), " ", "{:.4f}".format(f1_star))

OMS results for perfect OOD detector
Precision   Recall   F1
0.3490      0.9133   0.5050


In [16]:
id_layer_monitored = 3

In [18]:
monitor_mahalanobis = MahalanobisMonitor(id_dataset, model, id_layer_monitored, is_tied=True)
monitor_mahalanobis.fit(features_train[id_layer_monitored], lab_train)

scores_test_mahalanobis = monitor_mahalanobis.predict(features_test[id_layer_monitored], pred_test) 
scores_ood_mahalanobis = monitor_mahalanobis.predict(features_ood[id_layer_monitored], pred_ood) 

precision_ood_maha, recall_ood_maha, f1_ood_maha = eval_ood.get_metrics(scores_test_mahalanobis, 
                                                                        scores_ood_mahalanobis)
precision_oms_maha, recall_oms_maha, f1_oms_maha = eval_oms.get_metrics(scores_test_mahalanobis, 
                                                                        scores_ood_mahalanobis)

print("Mahalanobis")
print("       ", "Precision ", " Recall ", " F1")
print("OOD:   ", "{:.4f}".format(precision_ood_maha), "    ", "{:.4f}".format(recall_ood_maha), 
      " ", "{:.4f}".format(f1_ood_maha))
print("OMS:   ", "{:.4f}".format(precision_oms_maha), "    ", "{:.4f}".format(recall_oms_maha), 
      " ", "{:.4f}".format(f1_oms_maha))

Mahalanobis
        Precision   Recall   F1
OOD:    0.5623      0.8946   0.6905
OMS:    0.7498      0.6119   0.6739


In [19]:
monitor_oob = OutsideTheBoxMonitor(n_clusters=10)
monitor_oob.fit(features_train[id_layer_monitored], lab_train)

scores_oob_test = monitor_oob.predict(features_test[id_layer_monitored], pred_test)
scores_oob_ood = monitor_oob.predict(features_ood[id_layer_monitored], pred_ood)

precision_ood_oob, recall_ood_oob, f1_ood_oob = eval_ood.get_metrics(scores_oob_test, scores_oob_ood)
precision_oms_oob, recall_oms_oob, f1_oms_oob = eval_oms.get_metrics(scores_oob_test, scores_oob_ood)

print("Outside the box")
print("       ", "Precision ", " Recall ", " F1")
print("OOD:   ", "{:.4f}".format(precision_ood_oob), "    ", "{:.4f}".format(recall_ood_oob), 
      " ", "{:.4f}".format(f1_ood_oob))
print("OMS:   ", "{:.4f}".format(precision_oms_oob), "    ", "{:.4f}".format(recall_oms_oob), 
      " ", "{:.4f}".format(f1_oms_oob))

Outside the box
        Precision   Recall   F1
OOD:    0.7860      0.5971   0.6787
OMS:    0.4255      0.8458   0.5662


In [20]:
monitor_msp = MaxSoftmaxProbabilityMonitor()
monitor_msp.fit()

scores_test_msp = monitor_msp.predict(softmax_test) 
scores_ood_msp = monitor_msp.predict(softmax_ood) 

precision_ood_msp, recall_ood_msp, f1_ood_msp = eval_ood.get_metrics(scores_test_msp, scores_ood_msp,)
precision_oms_msp, recall_oms_msp, f1_oms_msp = eval_oms.get_metrics(scores_test_msp, scores_ood_msp,)

print("Max Softmax Probability")
print("       ", "Precision ", " Recall ", " F1")
print("OOD:   ", "{:.4f}".format(precision_ood_msp), "    ", "{:.4f}".format(recall_ood_msp), 
      " ", "{:.4f}".format(f1_ood_msp))
print("OMS:   ", "{:.4f}".format(precision_oms_msp), "    ", "{:.4f}".format(recall_oms_msp), 
      " ", "{:.4f}".format(f1_oms_msp))

Max Softmax Probability
        Precision   Recall   F1
OOD:    0.6183      0.7650   0.6839
OMS:    0.6867      0.8345   0.7534


In [21]:
monitor_react = ReActMonitor(quantile_value=0.99, mode="msp")
monitor_react.fit(feature_extractor, features_train[-1])

scores_test_react = monitor_react.predict(features_test[-1])
scores_ood_react = monitor_react.predict(features_ood[-1])

precision_ood_react_msp, recall_ood_react_msp, f1_ood_react_msp = eval_ood.get_metrics(scores_test_react, 
                                                                                       scores_ood_react)
precision_oms_react_msp, recall_oms_react_msp, f1_oms_react_msp = eval_oms.get_metrics(scores_test_react, 
                                                                                       scores_ood_react)

print("ReAct MSP")
print("       ", "Precision ", " Recall ", " F1")
print("OOD:   ", "{:.4f}".format(precision_ood_react_msp), "    ", "{:.4f}".format(recall_ood_react_msp), 
      " ", "{:.4f}".format(f1_ood_react_msp))
print("OMS:   ", "{:.4f}".format(precision_oms_react_msp), "    ", "{:.4f}".format(recall_oms_react_msp), 
      " ", "{:.4f}".format(f1_oms_react_msp))

ReAct MSP
        Precision   Recall   F1
OOD:    0.6015      0.7833   0.6804
OMS:    0.6878      0.8390   0.7559


In [22]:
monitor_maxlogits = MaxLogitMonitor()
monitor_maxlogits.fit()

scores_test_maxlogits = monitor_maxlogits.predict(logits_test) 
scores_ood_maxlogits = monitor_maxlogits.predict(logits_ood) 

precision_ood, recall_ood, f1_ood = eval_ood.get_metrics(scores_test_maxlogits, scores_ood_maxlogits)
precision_oms, recall_oms, f1_oms = eval_oms.get_metrics(scores_test_maxlogits, scores_ood_maxlogits)

print("Max Logit")
print("       ", "Precision ", " Recall ", " F1")
print("OOD:   ", "{:.4f}".format(precision_ood), "    ", "{:.4f}".format(recall_ood), " ", "{:.4f}".format(f1_ood))
print("OMS:   ", "{:.4f}".format(precision_oms), "    ", "{:.4f}".format(recall_oms), " ", "{:.4f}".format(f1_oms))

Max Logit
        Precision   Recall   F1
OOD:    0.6220      0.7450   0.6780
OMS:    0.6982      0.8156   0.7523


In [23]:
T = 1

monitor_energy = EnergyMonitor(temperature=T)
monitor_energy.fit()

scores_test_energy = monitor_energy.predict(logits_test)
scores_ood_energy = monitor_energy.predict(logits_ood)

precision_ood_energy, recall_ood_energy, f1_ood_energy = eval_ood.get_metrics(scores_test_energy, scores_ood_energy)
precision_oms_energy, recall_oms_energy, f1_oms_energy = eval_oms.get_metrics(scores_test_energy, scores_ood_energy)

print("Energy")
print("       ", "Precision ", " Recall ", " F1")
print("OOD:   ", "{:.4f}".format(precision_ood_energy), "    ", "{:.4f}".format(recall_ood_energy), 
      " ", "{:.4f}".format(f1_ood_energy))
print("OMS:   ", "{:.4f}".format(precision_oms_energy), "    ", "{:.4f}".format(recall_oms_energy), 
      " ", "{:.4f}".format(f1_oms_energy))

Energy
        Precision   Recall   F1
OOD:    0.6219      0.7451   0.6780
OMS:    0.7000      0.8133   0.7524


In [24]:
monitor_react = ReActMonitor(quantile_value=0.99)
monitor_react.fit(feature_extractor, features_train[-1])

scores_test_react = monitor_react.predict(features_test[-1])
scores_ood_react = monitor_react.predict(features_ood[-1])

precision_ood_react_ene, recall_ood_react_ene, f1_ood_react_ene = eval_ood.get_metrics(scores_test_react, 
                                                                                       scores_ood_react)
precision_oms_react_ene, recall_oms_react_ene, f1_oms_react_ene = eval_oms.get_metrics(scores_test_react, 
                                                                                       scores_ood_react)

print("ReAct Energy")
print("       ", "Precision ", " Recall ", " F1")
print("OOD:   ", "{:.4f}".format(precision_ood_react_ene), "    ", "{:.4f}".format(recall_ood_react_ene), 
      " ", "{:.4f}".format(f1_ood_react_ene))
print("OMS:   ", "{:.4f}".format(precision_oms_react_ene), "    ", "{:.4f}".format(recall_oms_react_ene), 
      " ", "{:.4f}".format(f1_oms_react_ene))

ReAct Energy
        Precision   Recall   F1
OOD:    0.6059      0.7678   0.6773
OMS:    0.7028      0.8208   0.7572
