<a href="https://colab.research.google.com/github/mnassar/segfault/blob/main/SegFault_TABLE_II.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Segmentation Fault: A cheap defense against adversarial machine learning

## Author: Mohamed Nassar, Doha Al Bared

## Off-The-Shelf Classifiers Detection Auc For The Different IQR Representations
## Table II


In [None]:
# install foolbox for generating adversarial samples
# better to run it first since it requires runtime restart
!pip install foolbox

In [None]:
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()

import numpy as np
import matplotlib.pyplot as plt
import foolbox as fb

# classifiers 
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
#tf.debugging.set_log_device_placement(True)


# Classifier and dataset exploration

In [None]:
# https://drive.google.com/file/d/1H4KEE0Vp8DFZOe_QfcxqOxEVnpun-uka/view?usp=sharing
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1H4KEE0Vp8DFZOe_QfcxqOxEVnpun-uka' -O CIFAR10model.h5

Load the target CIFAR classifier 

In [None]:
# load the cifar classifier
from tensorflow.keras.models import load_model

pretrained_model = load_model('CIFAR10model.h5')
pretrained_model.trainable = False
pretrained_model.summary()

Load the CIFAR10 dataset.

In [None]:
#get dataset: cifar10

import tensorflow_datasets as tfds
from keras.datasets import cifar10
(ds_train, ds_test), ds_info = tfds.load(
    'cifar10',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)
print("-------------")
print (ds_info)


Generate IQR values for our dataset for original and adversarial images





In [None]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
# normalize images 

# these are the numbers used during training the model 
mean = 120.70748
std = 64.150024
bound_min = (0-mean)/std
bound_max = (255-mean)/std
BATCH_SIZE=128

def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  # return tf.cast(image, tf.float32) / 255., tf.one_hot(label, 10)
  return (tf.cast(image, tf.float32) - mean) / std, tf.one_hot(label, 10)


ds_train = ds_train.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

ds_test = ds_test.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(BATCH_SIZE)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)


In [None]:
pretrained_model.evaluate(iter(ds_train))

In [None]:

pretrained_model.evaluate(iter(ds_test))

# Data set preparation



## Choose Number of batches

In [None]:
NB_BATCHES = 30 # means that we will have NB_BATCHES radnom normal batches
# and NB_BATCHES random adversarial batches each coming from a different normal batch and a different epsilon 

## Choose attack

In [None]:
# attack = fb.attacks.L2CarliniWagnerAttack()
# attack = fb.attacks.PGD()
attack = fb.attacks.FGSM()
# fb.attacks.LinfDeepFoolAttack()

In [None]:
ds_experiment = []
ds_experiment_f = [] 
epsilons = [0.02, 0.06, 0.1]

fmodel = fb.models.TensorFlowModel(model=pretrained_model, bounds=(bound_min, bound_max))


gen = iter(ds_train)

for b in range(NB_BATCHES): 
  images, labels = gen.next()
  labels_class = tf.argmax(labels, axis=1)
  ds_experiment.append(images) 
  raw, fimages, is_adv = attack(fmodel, images, criterion=fb.criteria.Misclassification(labels_class),epsilons=epsilons[b%3])
  ds_experiment_f.append(fimages)


# for b in range(NB_BATCHES//3):
#   for eps in epsilons:
#     images, labels = gen.next() 
#     labels_class = tf.argmax(labels, axis=1)
#     raw, fimages, is_adv = attack(fmodel, images, criterion=fb.criteria.Misclassification(labels_class),epsilons=eps)
#     ds_experiment.append(fimages)


In [None]:

ds_experiment += ds_experiment_f
len(ds_experiment)

# IQR calculations

In [None]:
from keras import backend as K
# pretrained_model.summary()
NB_LAYERS=20
NB_NODES_PER_LAYER=200

In [None]:
inp = pretrained_model.input  
layers_= pretrained_model.layers[-NB_LAYERS:]
outputs = [lay.output for lay in layers_]     
intermediate_model = K.function([inp], outputs) 


# select NB_NODES_PER_LAYER random nodes from each selected layer
print ("these nodes will be used to compute the IQR-"+str(NB_LAYERS*NB_NODES_PER_LAYER))

node_indices=[]
for lay in layers_[:-1]:
  # we omit the first dim (batch dim) of each layer 
  node_indices.append([[np.random.randint(0,d) for d in lay.output.shape[1:]] for s in range(NB_NODES_PER_LAYER)])
  # print("%s:" % lay.name)
  

# add the last layer 
node_indices.append([[x] for x in range(10)])
# print("%s:" % layers_[-1].name)
print(intermediate_model)

In [None]:

%%time 
iqr_all = []
iqr_10_all = [] 
iqr_4k_all =[]
for b in range(2*NB_BATCHES):
  print("batch %d:" % b) 
  images = ds_experiment[b]
  preds = pretrained_model.predict(images)
  preds_value = tf.reduce_max(preds, axis=1)
  preds_idx = tf.argmax(preds, axis=1)
  preds_layers = intermediate_model(images)
  preds_4k = [] 
  for u in range(NB_LAYERS): # loop through the last 10 layers ]
    for v in node_indices[u]: # loop through the 10 random nodes for that layer 
      t = tuple(v)
      # print((0,*t))
      preds_4k.append ( preds_layers[u][(...,*t)] )
  preds_4k = np.array(preds_4k).T
  # print (preds_value)
  # print (preds_idx)
  # print ( preds_value == tf.gather_nd(preds, list(zip(range(BATCH_SIZE), preds_idx))) )
  iqr = [] 
  iqr10 = [] 
  iqr4k = []
  for i in range(32): 
    for j in range(32): 
      mask = np.ones((BATCH_SIZE,32,32,3)) 
      mask[:,i,j,:]=0 
      images_0 = images * mask
      preds_0 = pretrained_model.predict(images_0)
      preds_value_0 = tf.gather_nd(preds_0, list(zip(range(BATCH_SIZE), preds_idx))) 
      # preds_value_0_old = tf.reduce_max(preds_0, axis=1)
      # print (preds_value_0)
      # print (preds_value_0_old == preds_value_0)
      iqr.append(abs(preds_value - preds_value_0))  
      iqr10.append(abs(preds - preds_0))
      preds_layers_0 = intermediate_model(images_0)
      preds_4k_0 = [] 
      for u in range(NB_LAYERS): # loop through the last 10 layers ]
        for v in node_indices[u]: # loop through the 10 random nodes for that layer 
          t = tuple(v) 
          preds_4k_0.append ( preds_layers_0[u][(...,*t)] )
      preds_4k_0 = np.array(preds_4k_0).T
      iqr4k.append(abs(preds_4k - preds_4k_0))
  iqr_vals = np.percentile(iqr, 75, axis=0) - np.percentile(iqr, 25, axis=0)
  iqr_10_vals = np.percentile(iqr10, 75, axis=0) - np.percentile(iqr10, 25, axis=0)
  iqr_4k_vals = np.percentile(iqr4k, 75, axis=0) - np.percentile(iqr4k, 25, axis=0)
  iqr_all.append(iqr_vals)
  iqr_10_all.append(iqr_10_vals)
  iqr_4k_all.append(iqr_4k_vals)

In [None]:
# print (preds_4k.shape)

# IQR-1D

# classification

In [None]:
len(iqr_all)

In [None]:
X = np.array(iqr_all).flatten()
y = np.concatenate( ( np.zeros(NB_BATCHES*BATCH_SIZE), np.ones(NB_BATCHES*BATCH_SIZE) ) )

score = cross_val_score(XGBClassifier(), X.reshape(-1,1), y, cv=2)
print (score)

In [None]:


X_train, X_test, y_train, y_test = train_test_split( X.reshape(-1,1), y, test_size=0.2)
rdm = RandomForestClassifier().fit(X_train,y_train)
svc = SVC(probability=True).fit(X_train,y_train)
xgb = XGBClassifier().fit(X_train,y_train)


In [None]:
# print(rdm.score(X_train,y_train))
print(rdm.score(X_test,y_test))
# print(svc.score(X_train,y_train))
print(svc.score(X_test,y_test))
# print(xgb.score(X_train,y_train))
print(xgb.score(X_test,y_test))

In [None]:
# AUC 
from sklearn.metrics import roc_curve, roc_auc_score

rdm_probs = rdm.predict_proba(X_test)[:,1]
svc_probs = svc.predict_proba(X_test)[:,1]
xgb_probs = xgb.predict_proba(X_test)[:,1]

rdm_auc = roc_auc_score(y_test, rdm_probs)
svc_auc = roc_auc_score(y_test, svc_probs)
xgb_auc = roc_auc_score(y_test, xgb_probs)

print('Random Forest: AUROC = %.3f' %(rdm_auc) )
print('SVC: AUROC = %.3f' %(svc_auc) )
print('RaXGBClassifier: AUROC = %.3f' %(xgb_auc) )

# IQR-10D

In [None]:

# %%time 
# iqr_all = []
# for b in range(2*NB_BATCHES):
#   images = ds_experiment[b]
#   preds = pretrained_model.predict(images)
#   iqr = [] 
#   for i in range(32): 
#     for j in range(32): 
#       mask = np.ones((BATCH_SIZE,32,32,3)) 
#       mask[:,i,j,:]=0 
#       images_0 = images * mask
#       preds_0 = pretrained_model.predict(images_0)
#       iqr.append(abs(preds - preds_0))  
#   iqr_vals = np.percentile(iqr, 75, axis=0)-np.percentile(iqr, 25, axis=0)
#   iqr_all.append(iqr_vals)



In [None]:
# np.array(iqr_10_all).shape
# X = np.array(iqr_all).reshape(-1,10) 
# X.shape

# Classification


In [None]:
X = np.array(iqr_10_all).reshape(-1,10) 
y = np.concatenate( ( np.zeros(NB_BATCHES*BATCH_SIZE), np.ones(NB_BATCHES*BATCH_SIZE) ) )

score = cross_val_score(XGBClassifier(), X, y, cv=2)
print (score)

In [None]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2)
rdm = RandomForestClassifier().fit(X_train,y_train)
svc = SVC(probability=True).fit(X_train,y_train)
xgb = XGBClassifier().fit(X_train,y_train)

print(rdm.score(X_test,y_test))
print(svc.score(X_test,y_test))
print(xgb.score(X_test,y_test))

In [None]:
# AUC 
from sklearn.metrics import roc_curve, roc_auc_score

rdm_probs = rdm.predict_proba(X_test)[:,1]
svc_probs = svc.predict_proba(X_test)[:,1]
xgb_probs = xgb.predict_proba(X_test)[:,1]

rdm_auc = roc_auc_score(y_test, rdm_probs)
svc_auc = roc_auc_score(y_test, svc_probs)
xgb_auc = roc_auc_score(y_test, xgb_probs)

print('Random Forest: AUROC = %.3f' %(rdm_auc) )
print('SVC: AUROC = %.3f' %(svc_auc) )
print('RaXGBClassifier: AUROC = %.3f' %(xgb_auc) )

In [None]:
rdm_fpr, rdm_tpr, _ = roc_curve(y_test, rdm_probs)
svc_fpr, svc_tpr, _ = roc_curve(y_test, svc_probs)
xgb_fpr, xgb_tpr, _ = roc_curve(y_test, xgb_probs)

plt.plot(rdm_fpr, rdm_tpr, marker='+', label='Random Forest (AUROC = %0.3f) '% rdm_auc )
plt.plot(svc_fpr, svc_tpr, marker='.', label='SVC (AUROC = %0.3f) '% svc_auc)
plt.plot(xgb_fpr, xgb_tpr, marker='*', label='XGB (AUROC = %0.3f) '% xgb_auc)

# Title
plt.title('ROC Plot')
# Axis labels
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
# Show legend
plt.legend() 
# Show plot
plt.show()

# IQR-4K-D


# Classification

In [None]:
# We try a very basic classification 
X = np.array(iqr_4k_all).reshape(-1,3810) 
y = np.concatenate( ( np.zeros(NB_BATCHES*BATCH_SIZE), np.ones(NB_BATCHES*BATCH_SIZE) ) )

score = cross_val_score(XGBClassifier(), X, y, cv=2)
print (score)

In [None]:
print (np.array(iqr_4k_all).shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2)
rdm = RandomForestClassifier().fit(X_train,y_train)
svc = SVC(probability=True).fit(X_train,y_train)
xgb = XGBClassifier().fit(X_train,y_train)

print(rdm.score(X_test,y_test))
print(svc.score(X_test,y_test))
print(xgb.score(X_test,y_test))

In [None]:
# AUC 
from sklearn.metrics import roc_curve, roc_auc_score

rdm_probs = rdm.predict_proba(X_test)[:,1]
svc_probs = svc.predict_proba(X_test)[:,1]
xgb_probs = xgb.predict_proba(X_test)[:,1]

rdm_auc = roc_auc_score(y_test, rdm_probs)
svc_auc = roc_auc_score(y_test, svc_probs)
xgb_auc = roc_auc_score(y_test, xgb_probs)

print('Random Forest: AUROC = %.3f' %(rdm_auc) )
print('SVC: AUROC = %.3f' %(svc_auc) )
print('RaXGBClassifier: AUROC = %.3f' %(xgb_auc) )

In [None]:
rdm_fpr, rdm_tpr, _ = roc_curve(y_test, rdm_probs)
svc_fpr, svc_tpr, _ = roc_curve(y_test, svc_probs)
xgb_fpr, xgb_tpr, _ = roc_curve(y_test, xgb_probs)

plt.plot(rdm_fpr, rdm_tpr, marker='+', label='Random Forest (AUROC = %0.3f) '% rdm_auc )
plt.plot(svc_fpr, svc_tpr, marker='.', label='SVC (AUROC = %0.3f) '% svc_auc)
plt.plot(xgb_fpr, xgb_tpr, marker='*', label='XGB (AUROC = %0.3f) '% xgb_auc)

# Title
plt.title('ROC Plot')
# Axis labels
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
# Show legend
plt.legend() 
# Show plot
plt.show()