In [1]:
#imports
from dataset import Dataset
from feature_extractor import FeatureExtractor
from monitors_internals import MahalanobisMonitor, GaussianMixtureMonitor, OutsideTheBoxMonitor, MaxSoftmaxProbabilityMonitor,\
                    MaxLogitMonitor, EnergyMonitor, ReActMonitor
from monitors_input import SHINE_monitor
from evaluation import Evaluator

from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, classification_report

import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
import torch
import utils

ModuleNotFoundError: No module named 'torchvision.models.feature_extraction'

In [3]:
#model params
batch_size = 10
model = "resnet"

#monitor params
layer_relu_ids = [32]

#dataset params
id_dataset = "cifar10"
ood_dataset = "svhn"

additional_transform = None
adversarial_attack = None#

In [4]:
#loading data
dataset_train = Dataset(id_dataset, "train", model, batch_size=batch_size)
dataset_test = Dataset(id_dataset, "test", model, batch_size=batch_size)
dataset_ood = Dataset(ood_dataset, "test", model, additional_transform, adversarial_attack, batch_size=batch_size)

Files already downloaded and verified
Files already downloaded and verified
Using downloaded and verified file: ./Data/test_32x32.mat


In [5]:
#extracting and storing features from ID and OOD data using an ML model trained on ID data
feature_extractor = FeatureExtractor(model, id_dataset, layer_relu_ids)

features_train, logits_train, softmax_train, pred_train, lab_train = feature_extractor.get_features(dataset_train)
features_test, logits_test, softmax_test, pred_test, lab_test = feature_extractor.get_features(dataset_test)
features_ood, logits_ood, softmax_ood, pred_ood, lab_ood = feature_extractor.get_features(dataset_ood)

In [6]:
#accuracy of the ML model 
id_accuracy = accuracy_score(lab_test, pred_test)
ood_accuracy = 0
if id_dataset == ood_dataset:
    ood_accuracy = accuracy_score(lab_ood, pred_ood)

print("Accuracy")
print("ID:  ", id_accuracy)
print("OOD: ", ood_accuracy)

Accuracy
ID:   0.9367
OOD:  0


In [7]:
# Converting training dataset (ID) from pytorch to numpy format
num_samples = None

X, y = [],[]

with torch.no_grad():
    for data in tqdm(dataset_train.dataloader):
        X.append(data[0].numpy())
        y.append(data[1].numpy())

X = np.array(X)
y = np.array(y)
X_train = np.reshape(X, (batch_size*X.shape[0], X.shape[3], X.shape[4], X.shape[2]))
y_train = np.reshape(y, batch_size*y.shape[0])

print('np.shape(X), np.shape(y):',np.shape(X_train), np.shape(y_train))

100%|██████████████████████████████████████| 5000/5000 [00:05<00:00, 851.12it/s]


np.shape(X), np.shape(y): (50000, 32, 32, 3) (50000,)


In [7]:
#building SHINE monitor with ID data
monitor_shine = SHINE_monitor(id_dataset)
#monitor_shine.fit_by_class_parallel(X_train, y_train)
monitor_shine.fit_by_class(X_train, y_train)

print('number of monitors',len(monitor_shine.arr_density))

{'bandwidth': 1.0}
{'bandwidth': 1.0}
{'bandwidth': 1.0}
{'bandwidth': 1.0}
{'bandwidth': 1.0}
{'bandwidth': 1.0}
{'bandwidth': 1.0}
{'bandwidth': 1.0}
{'bandwidth': 1.0}
{'bandwidth': 1.0}
number of monitors 10


In [18]:
## Converting test dataset (ID) from pytorch to numpy format
X, y = [],[]

for data in tqdm(dataset_test.dataloader):
    X.append(data[0].numpy())
    y.append(data[1].numpy())
    
X = np.array(X)
y = np.array(y)
print(np.shape(X), np.shape(y), np.shape(pred_test))

X_test = np.reshape(X, (batch_size*X.shape[0], X.shape[3], X.shape[4], X.shape[2]))
y_test = np.reshape(y, batch_size*y.shape[0])

print(np.shape(X_test), np.shape(y_test), np.shape(pred_test))

100%|██████████████████████████████████████| 1000/1000 [00:01<00:00, 754.93it/s]


(1000, 10, 3, 32, 32) (1000, 10) (10000,)
(10000, 32, 32, 3) (10000,) (10000,)


In [26]:
#between 0 and 1, percentage of ID data to be preserved
threshold_id = 0.9

In [27]:
#testing SHINE monitors on the same ID trainset (just for test purposes, one can skip this part)
m_true = []
m_pred = []

for x, pred, label in tqdm(zip(X_train, pred_train, y_train)):
    monitor_pred, pdf = monitor_shine.predict(np.array([x]), pred, threshold_id)
    m_pred.append(monitor_pred)
    
    if pred == label: #monitor does not need to activate
        m_true.append(0)
    else: #monitor should activate
        m_true.append(1)

50000it [02:32, 326.87it/s]


In [23]:
#testing SHINE monitors on ID testset
m_true = []
m_pred = []

for x, pred, label in tqdm(zip(X_test, pred_test, y_test)):
    monitor_pred, pdf = monitor_shine.predict(np.array([x]), pred, threshold_id)
    m_pred.append(monitor_pred)
    
    if pred == label: #monitor does not need to activate
        m_true.append(0)
    else: #monitor should activate
        m_true.append(1)

10000it [00:31, 317.55it/s]


In [29]:
# testing SHINE monitors on entire OOD dataset
X_ood, y_ood = [],[]


for data in tqdm(dataset_ood.dataloader):
    img = data[0].numpy()
    X_ood = np.reshape(img, (img.shape[0], img.shape[2], img.shape[3], img.shape[1]))

    for x, pred in zip(X_ood, pred_ood):
        monitor_pred, pdf = monitor_shine.predict(np.array([x]), pred, threshold_id)
        m_pred.append(monitor_pred)
        m_true.append(1) #monitor should always react to novel classes

  0%|                                                  | 0/2604 [00:00<?, ?it/s]


AttributeError: 'numpy.ndarray' object has no attribute 'append'

In [31]:
from sklearn.metrics import matthews_corrcoef as mcc
from sklearn.metrics import balanced_accuracy_score
m_true = np.array(m_true)
m_pred = np.array(m_pred).flatten()
#evaluating SHINE

print(classification_report(m_true, m_pred))
print(mcc(m_true, m_pred))
print(balanced_accuracy_score(m_true, m_pred))

              precision    recall  f1-score   support

           0       1.00      0.90      0.95     49873
           1       0.02      0.91      0.04       127

    accuracy                           0.90     50000
   macro avg       0.51      0.91      0.50     50000
weighted avg       1.00      0.90      0.94     50000

0.1350467623939485
0.9065655899843872


In [None]:
#0.9
precision    recall  f1-score   support

           0       0.12      0.13      0.13      9367
           1       0.69      0.67      0.68     26665

    accuracy                           0.53     36032
   macro avg       0.40      0.40      0.40     36032
weighted avg       0.54      0.53      0.53     36032

-0.19622882233117975

#0.1
precision    recall  f1-score   support

           0       0.05      0.04      0.05      9367
           1       0.69      0.73      0.71     26665

    accuracy                           0.55     36032
   macro avg       0.37      0.39      0.38     36032
weighted avg       0.52      0.55      0.54     36032

-0.24099129213678386

#0