## Imports

In [1]:
import os
import pickle
from tqdm import tqdm

import numpy as np

import tensorflow as tf
from tensorflow.keras.layers import Softmax
from tensorflow.keras.models import load_model

from sklearn.ensemble import IsolationForest

import matplotlib.pyplot as plt

gpus = tf.config.list_physical_devices('GPU')
tf.config.set_logical_device_configuration(gpus[0],[tf.config.LogicalDeviceConfiguration(memory_limit=4096)])

2022-08-24 12:51:47.555013: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-08-24 12:51:48.908484: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-08-24 12:51:48.908963: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-08-24 12:51:48.967017: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-24 12:51:48.967269: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2080 SUPER computeCapability: 7.5
coreClock: 1.815GHz coreCount: 48 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 462.00GiB/s
2022-08-24 12:51:48.967281: I tensorflow/stream_executor/platf

## Dataset and perturbation

Run the *generate_perturbation.py* script

## Training neural networks
To train the CIFAR10 classifier used in this use case, run the *train_classifier.py* script.

To train the CIFAR10 auto-encoder used in this use case, run the *train_auto-encoder.py* script.

If you don't want to re-run trainings, the pretrained weights used in our experiments can be downloaded from https://drive.google.com/file/d/1SzB2uqxehPMnh0ROqYsE2Nd34rdsXIu6/view?usp=sharing.

## Extract features
Once the data and perturbations have been generated and the pretrained models have been downloaded and extracted, run the *extract_features.py* script.

## Reproduce results

#### Load features

In [2]:
X_train_ae = "Data/train_features_ae.p"
X_train_cl = "Data/train_features_classifier.p"
X_test_ae = "Data/test_features_ae.p"
X_test_cl = "Data/test_features_classifier.p"
X_testBright_ae = "Data/test_bright_features_ae.p"
X_testBright_cl = "Data/test_bright_features_classifier.p"

X_train_ae = pickle.load(open(X_train_ae, "rb"))
X_train_cl = pickle.load(open(X_train_cl, "rb"))
X_test_ae = pickle.load(open(X_test_ae, "rb"))
X_test_cl = pickle.load(open(X_test_cl, "rb"))
X_testBright_ae = pickle.load(open(X_testBright_ae, "rb"))
X_testBright_cl = pickle.load(open(X_testBright_cl, "rb"))

X_eval_ae = np.r_[X_test_ae, X_testBright_ae]
X_eval_cl = np.r_[X_test_cl, X_testBright_cl]

#### Load required raw data

In [4]:
trainData_path = "Data/train.p"
testData_path = "Data/test.p"
testBrightnessData_path = "Data/test_bright.p"

(_, y_train) = pickle.load(open(trainData_path, "rb"))
(X_test, y_test) = pickle.load(open(testData_path, "rb"))
(X_test_bright, y_test_bright) = pickle.load(open(testBrightnessData_path, "rb"))

X_test = X_test / 255.0
X_test_bright = X_test_bright / 255.0

#### Load classifier

In [5]:
classifier_path = "Models/classifier"
classifier = load_model(classifier_path)

2022-08-24 12:52:05.372221: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-08-24 12:52:05.373659: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-08-24 12:52:05.374035: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:01:00.0 name: NVIDIA GeForce RTX 2080 SUPER computeCapability: 7.5
coreClock: 1.815GHz coreCount: 48 deviceMemorySize: 7.79GiB deviceMemoryBandwidth: 462.00GiB/s
2022-08-24 12:52:05.374064: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic libra

#### Compute monitoring labels

In [6]:
predictions_test = np.argmax(Softmax()(classifier.predict(X_test)).numpy(), axis=1)
labels_test_oms = (predictions_test==y_test[:,0]).astype(np.int8)

predictions_testBright = np.argmax(Softmax()(classifier.predict(X_test_bright)).numpy(), axis=1)
labels_testBright_oms = (predictions_testBright==y_test_bright[:,0]).astype(np.int8)

labels_test_oms = np.where(labels_test_oms==0, 1, 0)
labels_testBright_oms = np.where(labels_testBright_oms==0, 1, 0)

labels_oms = np.r_[labels_test_oms, labels_testBright_oms]  # Evaluation 1
labels_ood = np.array([0] * X_test.shape[0] + [1] * X_test.shape[0])  # Evaluation 2

2022-08-24 12:52:06.073127: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 122880000 exceeds 10% of free system memory.
2022-08-24 12:52:06.121308: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-08-24 12:52:06.138865: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3699850000 Hz
2022-08-24 12:52:06.215587: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-08-24 12:52:06.327536: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudnn.so.7
2022-08-24 12:52:07.236342: W tensorflow/core/framework/cpu_allocator_impl.cc:80] Allocation of 122880000 exceeds 10% of free system memory.


#### Fit One Clas Classifiers

In [7]:
eval_predictions = np.r_[predictions_test, predictions_testBright]
eval_classes = np.r_[y_test, y_test_bright][:, 0]
train_classes = y_train[:, 0]

occ_ae = []

for i in tqdm(range(10)):
    occ_ae.append(IsolationForest(contamination=0.3, bootstrap=True).fit(X_train_ae[train_classes == i]))

predict_ae = np.copy(eval_predictions)
for i in range(10):
    predict_ae[eval_predictions == i] = occ_ae[i].predict(X_eval_ae[eval_predictions == i])
predict_ae = np.where(predict_ae==-1, 1, 0)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:28<00:00,  2.88s/it]


In [8]:
occ_cl = []

for i in tqdm(range(10)):
    occ_cl.append(IsolationForest(contamination=0.3, bootstrap=True).fit(X_train_cl[train_classes == i]))
    
predict_cl = np.copy(eval_predictions)
for i in range(10):
    predict_cl[eval_predictions == i] = occ_cl[i].predict(X_eval_cl[eval_predictions == i])
predict_cl = np.where(predict_cl==-1, 1, 0)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:27<00:00,  2.77s/it]


#### Compute metrics

In [9]:
print("Evaluation 1: Detect Model Errors")
print("Auto-encoder features\n")

labels = labels_oms
preds = predict_ae

true_monitor_detection = 0
monitor_miss = 0
false_monitor_detection = 0
total = 0
for i, l in enumerate(labels):
    total += 1
    if l == 1:
        if preds[i] == 1:
            true_monitor_detection += 1
        else:
            monitor_miss += 1
    if l == 0 and preds[i] == 1:
        false_monitor_detection += 1

print("SG: ", true_monitor_detection / total)
print("RH: ", monitor_miss / total)
print("AC: ", false_monitor_detection / total)

Evaluation 1: Detect Model Errors
Auto-encoder features

SG:  0.07105
RH:  0.14155
AC:  0.2173


In [10]:
print("Evaluation 2: Detect Runtime Threats")
print("Auto-encoder features\n")

labels = labels_ood
preds = predict_ae

true_monitor_detection = 0
monitor_miss = 0
false_monitor_detection = 0
total = 0
for i, l in enumerate(labels):
    total += 1
    if l == 1:
        if preds[i] == 1:
            true_monitor_detection += 1
        else:
            monitor_miss += 1
    if l == 0 and preds[i] == 1:
        false_monitor_detection += 1

print("SG: ", true_monitor_detection / total)
print("RH: ", monitor_miss / total)
print("AC: ", false_monitor_detection / total)

Evaluation 2: Detect Runtime Threats
Auto-encoder features

SG:  0.14455
RH:  0.35545
AC:  0.1438


In [11]:
print("Evaluation 1: Detect Model Errors")
print("Classifier features\n")

labels = labels_oms
preds = predict_cl

true_monitor_detection = 0
monitor_miss = 0
false_monitor_detection = 0
total = 0
for i, l in enumerate(labels):
    total += 1
    if l == 1:
        if preds[i] == 1:
            true_monitor_detection += 1
        else:
            monitor_miss += 1
    if l == 0 and preds[i] == 1:
        false_monitor_detection += 1

print("SG: ", true_monitor_detection / total)
print("RH: ", monitor_miss / total)
print("AC: ", false_monitor_detection / total)

Evaluation 1: Detect Model Errors
Classifier features

SG:  0.0674
RH:  0.1452
AC:  0.2257


In [12]:
print("Evaluation 2: Detect Runtime Threats")
print("Classifier features\n")

labels = labels_ood
preds = predict_cl

true_monitor_detection = 0
monitor_miss = 0
false_monitor_detection = 0
total = 0
for i, l in enumerate(labels):
    total += 1
    if l == 1:
        if preds[i] == 1:
            true_monitor_detection += 1
        else:
            monitor_miss += 1
    if l == 0 and preds[i] == 1:
        false_monitor_detection += 1

print("SG: ", true_monitor_detection / total)
print("RH: ", monitor_miss / total)
print("AC: ", false_monitor_detection / total)

Evaluation 2: Detect Runtime Threats
Classifier features

SG:  0.1473
RH:  0.3527
AC:  0.1458
