In [1]:
#imports
from dataset import Dataset
from feature_extractor import FeatureExtractor
from monitors_internals import MahalanobisMonitor, GaussianMixtureMonitor, OutsideTheBoxMonitor, MaxSoftmaxProbabilityMonitor,\
                    MaxLogitMonitor, EnergyMonitor, ReActMonitor
from monitors_input import SHINE_monitor, SHINE_monitor2
from evaluation import Evaluator
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score, classification_report

import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
import torch
import utils

import os
import sys
import gzip
from PIL import Image
import scipy.io as spio

2022-08-02 20:37:36.340470: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /home/rsenaferre/anaconda3_new/envs/ANITI_RuntimeMonitoringBenchmark/lib/python3.10/site-packages/cv2/../../lib64:
2022-08-02 20:37:36.340486: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
def load_dataset(threat_type, variation_type, dataset_name, mode, root_path='data'):
   
    x_train, y_train, x_test, y_test = None, None, None, None
    if mode == 'train':
        fixed_path = os.path.join(root_path,'training_set',threat_type,variation_type)
        if dataset_name != None:
            fixed_path = os.path.join(root_path,'training_set',threat_type,dataset_name,variation_type)

        print('loading data from', fixed_path)
        train_images = os.path.join(fixed_path,'train-images-npy.gz')
        train_labels = os.path.join(fixed_path,'train-labels-npy.gz')

        f = gzip.GzipFile(train_images, "r")
        x_train = np.load(f)
        
        f = gzip.GzipFile(train_labels, "r")
        y_train = np.load(f)

    elif mode == 'test':
        fixed_path = os.path.join(root_path,'benchmark_dataset',threat_type,variation_type)
        if dataset_name != None:
            fixed_path = os.path.join(root_path,'benchmark_dataset',threat_type,dataset_name,variation_type)        
        print('loading data from', fixed_path)
        test_images = os.path.join(fixed_path,'test-images-npy.gz')
        test_labels = os.path.join(fixed_path,'test-labels-npy.gz')

        f = gzip.GzipFile(test_images, "r")
        x_test = np.load(f)

        f = gzip.GzipFile(test_labels, "r")
        y_test = np.load(f)

    return (x_train, y_train), (x_test, y_test)

In [10]:
#model params
batch_size = 10
model = "resnet"

#monitor params
layer_relu_ids = [32]
additional_transform = None
adversarial_attack = None#

#id dataset params
id_dataset = "cifar10"
num_classes_id = 10
(id_X_train, id_y_train), (_, _) = load_dataset(
    threat_type, compl_path, id_dataset, 'train', root_path='../PRDC_2021_Data_profile_module/data')#utils.load_data(id_dataset, None)
id_X_train = id_X_train.astype('float32')
print(np.shape(id_X_train), np.shape(id_y_train))

#ood dataset params
threat_type = 'distributional_shift'
variation = 'snow'
severity = 1
compl_path = '{}_severity_{}'.format(variation, severity)

(_, _), (ood_X_test, ood_y_test) = load_dataset(
    threat_type, compl_path, id_dataset, 'test', root_path='../PRDC_2021_Data_profile_module/data')
#correcting classes (for ood) from threat generator
ood_y_test = [y-num_classes_id if y >= num_classes_id else y for y in ood_y_test ]
ood_X_test = ood_X_test.astype('float32')
#ood_X_test /= 255

print(np.shape(ood_X_test), np.shape(ood_y_test))

loading data from ../PRDC_2021_Data_profile_module/data/training_set/distributional_shift/cifar10/snow_severity_1
(50000, 32, 32, 3) (50000,)
loading data from ../PRDC_2021_Data_profile_module/data/benchmark_dataset/distributional_shift/cifar10/snow_severity_1
(70000, 32, 32, 3) (70000,)


In [4]:
#converting custom numpy data to pytorch dataset (for operations with ML models built with pytorch)
def numpy_to_pytorch_dataset(X, y):
    t_X = X.transpose((0, 3, 1, 2))
    tensor_x = torch.Tensor(t_X) # transform to torch tensor
    tensor_y = torch.Tensor(y)
    dataset = TensorDataset(tensor_x,tensor_y)
    return dataset

In [11]:
traning_data = (id_X_train, id_y_train) #numpy_to_pytorch_dataset(id_X_train, id_y_train)
#test_data = (id_X_test, id_y_test) #numpy_to_pytorch_dataset(id_X_test, id_y_test)
ood_data = (ood_X_test, ood_y_test) #numpy_to_pytorch_dataset(ood_X_test, ood_y_test)

In [12]:
dataset_train = Dataset(id_dataset, "train", model, traning_data, batch_size=batch_size)
#dataset_test = Dataset(id_dataset, "test", model, test_data, batch_size=batch_size)
dataset_ood = Dataset(compl_path, "test", model, ood_data, batch_size=batch_size)

In [13]:
#extracting and storing features from ID and OOD data using an ML model trained on ID data
feature_extractor = FeatureExtractor(model, id_dataset, layer_relu_ids)

features_train, logits_train, softmax_train, pred_train, lab_train = feature_extractor.get_features(dataset_train)
#features_test, logits_test, softmax_test, pred_test, lab_test = feature_extractor.get_features(dataset_test)
features_ood, logits_ood, softmax_ood, pred_ood, lab_ood = feature_extractor.get_features(dataset_ood)

Extracting layers: 'layer4.2.relu_1'


100%|███████████████████████████████████████| 5000/5000 [01:11<00:00, 70.37it/s]


Extracting layers: 'layer4.2.relu_1'


100%|███████████████████████████████████████| 7000/7000 [01:45<00:00, 66.13it/s]


In [14]:
#accuracy of the ML model 
id_accuracy = accuracy_score(lab_train, pred_train)
ood_accuracy = accuracy_score(lab_ood, pred_ood)

print("Accuracy")
print("Training:  ", id_accuracy)
print("Test: ", ood_accuracy)

Accuracy
Training:   0.99746
Test:  0.5744857142857143
