<a href="https://colab.research.google.com/github/mnassar/segfault/blob/main/SegFault_ML_LOO_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Segmentation Fault: A cheap defense against adversarial machine learning
## ML LOO Implementation

## Authors: Mohamed Nassar, Doha Al Bared
### Department of Computer Science 
### AUB 


In [None]:
# install foolbox for generating adversarial samples
# better to run it first since it requires runtime restart
!pip install foolbox

In [None]:
import tensorflow.compat.v2 as tf
tf.enable_v2_behavior()

import numpy as np
import matplotlib.pyplot as plt
import foolbox as fb

# classifiers 
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
#tf.debugging.set_log_device_placement(True)


# Classifier and dataset exploration

In [None]:
# https://drive.google.com/file/d/1H4KEE0Vp8DFZOe_QfcxqOxEVnpun-uka/view?usp=sharing
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1H4KEE0Vp8DFZOe_QfcxqOxEVnpun-uka' -O CIFAR10model.h5

Load the target CIFAR classifier 

In [None]:
# load the cifar classifier
from tensorflow.keras.models import load_model

pretrained_model = load_model('CIFAR10model.h5')
pretrained_model.trainable = False
pretrained_model.summary()



In [None]:
print (len ( pretrained_model.layers)) 

Load the CIFAR10 dataset.

In [None]:
#get dataset: cifar10

import tensorflow_datasets as tfds
from keras.datasets import cifar10
(ds_train, ds_test), ds_info = tfds.load(
    'cifar10',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

print (ds_info)


Generate IQR values for our dataset for original and adversarial images





In [None]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

In [None]:
ds_train

In [None]:
# normalize images 

# these are the numbers used during training the model 
mean = 120.70748
std = 64.150024
bound_min = (0-mean)/std
bound_max = (255-mean)/std
BATCH_SIZE=128

def normalize_img(image, label):
  """Normalizes images: `uint8` -> `float32`."""
  # return tf.cast(image, tf.float32) / 255., tf.one_hot(label, 10)
  return (tf.cast(image, tf.float32) - mean) / std, tf.one_hot(label, 10)


ds_train = ds_train.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)
ds_train = ds_train.batch(BATCH_SIZE)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

ds_test = ds_test.map(
    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(BATCH_SIZE)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)


In [None]:
# training accuracy 
pretrained_model.evaluate(iter(ds_train))

In [None]:
# testing accuracy 
pretrained_model.evaluate(iter(ds_test))

In [None]:
# plot examples from original data 

# images, labels = next(iter(ds_train))
# print (images.shape)
# image = np.squeeze(images[0], axis=-1)
# for (img, label) in 
# with tf.device("/gpu:0"):
images, labels = next(iter(ds_train))

preds_imgs = pretrained_model.predict(images)
for img, label, pred in zip(images, labels, preds_imgs): 
  img = img.numpy()
  label = label.numpy()
  plt.figure(figsize = (1,1))
  plt.axis('off')
  plt.imshow((img * std + mean).astype(np.uint8))
  plt.show()
  print (class_names[np.argmax(label)], class_names[np.argmax(pred)])

  if input()=="q": 
    break 

# LOO: Leave One Out - IQR Calculations for original 


In [None]:

# take a batch
images, labels = next(iter(ds_train))
# print (images.shape)
preds = pretrained_model.predict(images)
preds_class = tf.argmax(preds, axis=1)
labels = tf.argmax(labels, axis=1)
# performance eval for one batch
# to make sure we have acceptable classification accuracy 
print ("accuracy", tf.reduce_mean( tf.cast( tf.equal(labels, preds_class), tf.float32 ) ).numpy().item() * 100, "%")  


# prediction values 
preds_value = tf.reduce_max(preds, axis=1)
print(preds_value.shape)



In [None]:
%%time
iqr = [] 

for i in range(32): 
  for j in range(32): 
    mask = np.ones((BATCH_SIZE,32,32,3)) 
    mask[:,i,j,:]=0 
    images_0 = images * mask
    preds_0 = pretrained_model.predict(images_0)
    preds_value_0 = tf.reduce_max(preds_0, axis=1)
    iqr.append(abs(preds_value - preds_value_0))

iqr = np.array(iqr)
print(iqr.shape)  
  # print(iqr.device)



In [None]:

iqr_vals = np.percentile(iqr, 75, axis=0)-np.percentile(iqr, 25, axis=0)
print (iqr_vals.shape)

In [None]:
plt.scatter(range(BATCH_SIZE), iqr_vals)
plt.title("we notice that the IQRs for these images are small!")
plt.show()


# Adversarial image generation 


In [None]:
labels.shape

In [None]:
print (std)

In [None]:



images, labels = next(iter(ds_train))
labels_class = tf.argmax(labels, axis=1)

attack = fb.attacks.FGSM()
fmodel = fb.models.TensorFlowModel(model=pretrained_model, bounds=(bound_min, bound_max))

fimages = attack.run(fmodel, images, criterion=fb.criteria.Misclassification(labels_class), epsilon=0.1)
fpreds = pretrained_model.predict(fimages)

fpreds_class = tf.argmax(fpreds, axis=1)

# accuracy before attack
pretrained_model.evaluate(images,labels)
# accuracy after attack
pretrained_model.evaluate(fimages,labels)

nb=0
nb_samples=1
correct=0
for img, fimg, label, fpred in zip(images, fimages, labels_class, fpreds_class): 
  if label==fpred: 
    correct+=1
  plt.figure(figsize = (2,1))
  fig, ax = plt.subplots(1,2)
  ax[0].imshow((img.numpy() * std + mean).astype(np.uint8))
  ax[0].set_title(class_names[label])
  ax[0].axis('off')
  ax[1].imshow((fimg.numpy() * std + mean).astype(np.uint8))
  ax[1].set_title(class_names[fpred])
  ax[1].axis('off')
  plt.show()
  if nb>nb_samples: 
    break
  nb+=1
print(correct)
  # if input()=="q": 
  #   break 
  

In [None]:
images, labels = next(iter(ds_train))
labels_class = tf.argmax(labels, axis=1)

attack = fb.attacks.FGSM()
fmodel = fb.models.TensorFlowModel(model=pretrained_model, bounds=(bound_min, bound_max))

fimages = attack.run(fmodel, images, criterion=fb.criteria.Misclassification(labels_class), epsilon=0.1)

# accuracy before attack
pretrained_model.evaluate(images,labels)
# accuracy after attack
pretrained_model.evaluate(fimages,labels)


# IQR calculations for adversarial

In [None]:
%%time 
# calculate the IQR for the adversarial images 
# compare with the IQR of original 
fiqr = [] 

fpreds = pretrained_model.predict(fimages)
fpreds_value = tf.reduce_max(fpreds, axis=1)

for i in range(32): 
  for j in range(32): 
    mask = np.ones((BATCH_SIZE,32,32,3)) 
    mask[:,i,j,:]=0 
    fimages_0 = fimages * mask
    fpreds_0 = pretrained_model.predict(fimages_0)
    fpreds_value_0 = tf.reduce_max(fpreds_0, axis=1)
    fiqr.append(abs(fpreds_value - fpreds_value_0))

fiqr = np.array(fiqr)
print(fiqr.shape) 
fiqr_vals = np.percentile(fiqr, 75, axis=0)-np.percentile(fiqr, 25, axis=0)

print (fiqr_vals.shape) 

In [None]:
plt.scatter(range(BATCH_SIZE), iqr_vals, label='oig')
plt.scatter(range(BATCH_SIZE), fiqr_vals, label='adv', marker='s')

plt.legend()
plt.title("IQR-1D: Orig. vs. FGSM ($\epsilon=0.1$) Adv.")
plt.xlabel("images")
plt.ylabel("IQR")
plt.show()
# It looks like in general orig has lower IQR than ADV 

# Classification Adv. vs. Orig


In [None]:
from sklearn.utils import shuffle
# We try a very basic classification 
# print (type(fiqr_vals))
X = np.concatenate((iqr_vals, fiqr_vals), axis=0)
y = np.concatenate( ( np.zeros(iqr_vals.shape), np.ones(fiqr_vals.shape) ) )

X,y = shuffle(X,y)
# print (y)

In [None]:
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score



score = cross_val_score(XGBClassifier(), X.reshape(-1,1), y, cv=2)
print (score)

score = cross_val_score(SVC(), X.reshape(-1,1), y, cv=2)
print (score)

# we have a little better than random accuracy 

# IQR-10

In [None]:
# now what if we enhance our classification accuracy by adding more IQR values 
# the IQR values correspond to random nodes in the network

# let's start with the 10 output nodes. We call it iqr-10

%%time 


NB_BATCHES=1
nb=1
for images, labels in ds_train: 
  preds = pretrained_model.predict(images)
  iqr=[]
  for i in range(32): 
    for j in range(32): 
      mask = np.ones((BATCH_SIZE,32,32,3)) 
      mask[:,i,j,:]=0 
      images_0 = images * mask
      preds_0 = pretrained_model.predict(images_0)
      iqr.append(abs(preds - preds_0))
  
  if nb==1: 
    iqr_all = np.array(iqr) 
    # print(iqr_all.shape)
  else: 
    iqr = np.array(iqr) 
    # print(iqr.shape)
    # print(iqr_all.shape)
    iqr_all = np.concatenate((iqr_all, iqr), axis=1)      
    # print(iqr_all.shape)
    
  print(nb) 
  nb+=1
  if nb > NB_BATCHES:
    break  
 


print(iqr_all.shape) 


In [None]:
iqr_vals = np.percentile(iqr_all, 75, axis=0)-np.percentile(iqr_all, 25, axis=0)
print (iqr_vals.shape) 

In [None]:
# IQR 10 for adversarial 
%%time

nb=1

attack = fb.attacks.FGSM()
fmodel = fb.models.TensorFlowModel(model=pretrained_model, bounds=(bound_min, bound_max))


for images, labels in ds_train:
  
  labels_class = tf.argmax(labels, axis=1)
  fimages = attack.run(fmodel, images, criterion=fb.criteria.Misclassification(labels_class), epsilon=0.1)
  fpreds = pretrained_model.predict(fimages)
  pretrained_model.evaluate(fimages,labels)
  fiqr = [] 
  for i in range(32): 
    for j in range(32): 
      mask = np.ones((BATCH_SIZE,32,32,3)) 
      mask[:,i,j,:]=0 
      fimages_0 = fimages * mask
      fpreds_0 = pretrained_model.predict(fimages_0)
      fiqr.append(abs(fpreds - fpreds_0))
  

  
  if nb==1: 
    fiqr_all = np.array(fiqr)
  else: 
    fiqr = np.array(fiqr)
    fiqr_all = np.concatenate((fiqr_all, fiqr), axis=1)

  print(nb) 
  nb+=1
  if nb > NB_BATCHES:
    break 

print (fiqr_all.shape)



In [None]:
fiqr_vals = np.percentile(fiqr_all, 75, axis=0)-np.percentile(fiqr_all, 25, axis=0)
print (fiqr_vals.shape) 

In [None]:


fig, axs = plt.subplots(2, 5, figsize=(20, 6), sharex=True, sharey=True)


j=0
for i in range(10):
  axs[j,i-j*5].scatter(range(BATCH_SIZE*NB_BATCHES), iqr_vals[:,i], label='oig')
  axs[j,i-j*5].scatter(range(BATCH_SIZE*NB_BATCHES), fiqr_vals[:,i], label='adv', marker='s')
  axs[j,i-j*5].legend(loc=2)
  # axs[j,i-j*5].set(xlabel='images', ylabel='IQR')
  # axs[j,j*5+i].ylabel("IQR")
  if i==4: 
    j+=1
plt.suptitle("IQR-10D: Orig. vs. FGSM ($\epsilon=0.1$) Adv.", fontsize=16)



fig.text(0.5, 0.04, 'Images', ha='center', fontweight='bold')
fig.text(0.09, 0.5, 'IQR', va='center', rotation='vertical', fontweight='bold')
# plt.ylabel("IQR")
# plt.xlabel("images")
# fig.tight_layout()
plt.show()


In [None]:

X = np.concatenate((iqr_vals, fiqr_vals), axis=0)
y = np.concatenate( ( np.zeros(iqr_vals.shape[0]), np.ones(fiqr_vals.shape[0]) ) )
print(X.shape)
print (y.shape)
score = cross_val_score(XGBClassifier(), X, y, cv=3)
print (sum(score)/3)

score = cross_val_score(SVC(C=10**5, gamma=10), X, y, cv=3)
print (sum(score)/3)

In [None]:
from sklearn.model_selection import GridSearchCV

parameters = {'C':[0.1, 1, 10, 100, 1000, 10000, 10**5], 'gamma':[0.0001, 0.001, 0.01, 0.1, 1, 10]}
svc = SVC()
clf = GridSearchCV(svc, parameters)
clf.fit(X,y)
print(clf.best_params_)
print(clf.best_score_)
# print (clf.best_estimator_, clf.best_index_)
score = cross_val_score(clf.best_estimator_, X, y, cv=3)
print (sum(score)/3)

# ML-LOO: Mutli Layer approach

In [None]:
# pretrained_model.summary()
NB_LAYERS=20
NB_NODES_PER_LAYER=200

Example for an intermediate model in Kears

Source: https://stackoverflow.com/questions/41711190/keras-how-to-get-the-output-of-each-layer


In [None]:
# code to print any intermediate node from any intermediate layer 
from keras import backend as K

# inp = pretrained_model.input  
# # last two layers 
# outputs = [layer.output for layer in pretrained_model.layers[-2:]]          
# functor = K.function([inp], outputs) 
# Testing
# test = np.random.random((32,32,3))[np.newaxis,...]
# layer_outs = functor([test])
# print(layer_outs)

In [None]:


inp = pretrained_model.input  
layers_= pretrained_model.layers[-NB_LAYERS:]
outputs = [lay.output for lay in layers_]     
intermediate_model = K.function([inp], outputs) 


# select NB_NODES_PER_LAYER random nodes from each selected layer
print ("these nodes will be used to compute the IQR-"+str(NB_LAYERS*NB_NODES_PER_LAYER))

node_indices=[]
for lay in layers_[:-1]:
  # we omit the first dim (batch dim) of each layer 
  node_indices.append([[np.random.randint(0,d) for d in lay.output.shape[1:]] for s in range(NB_NODES_PER_LAYER)])
  print("%s:" % lay.name)
  

# add the last layer 
node_indices.append([[x] for x in range(10)])
# print("%s:" % layers_[-1].name)
print(len(node_indices)) 

In [None]:
# collect values for a batch example 
images, labels = next(iter(ds_train))

preds_layers = intermediate_model(images)

preds=[]
for i in range(NB_LAYERS): # loop through the last NB_LAYERS layers 
  print (preds_layers[i].shape) 
  for j in node_indices[i]: # loop through the NB_NODES_PER_LAYER random nodes for that layer 
    t = tuple(j)
    # print((0,*t))
    # preds.append(preds_layers[i])
    preds.append ( preds_layers[i][(...,*t)] )

preds = np.array(preds).T
print (preds.shape)


In [None]:
# now what if we enhance our classification accuracy by adding more IQR values 
# the IQR values correspond to random nodes in the network

# let' select 100 output nodes from each layer. We call it iqr-1000 (it is actually iqr-910 since last layer is only 10)

%%time 


NB_BATCHES=2
nb=1

for images, labels in ds_train: 
  # compute the IQR original for the batch
  preds_layers = intermediate_model(images)
  preds = [] 
  for i in range(NB_LAYERS): # loop through the last NB_LAYERS
    for j in node_indices[i]: # loop through the NB_NODES_PER_LAYER
      t = tuple(j)
      # print((0,*t))
      # preds.append(preds_layers[i])
      preds.append ( preds_layers[i][(...,*t)] )
  preds = np.array(preds).T
  
  iqr=[]
  for i in range(32): 
    for j in range(32): 
      mask = np.ones((BATCH_SIZE,32,32,3)) 
      mask[:,i,j,:]=0 
      images_0 = images * mask
      preds_layers_0 = intermediate_model(images_0)
      preds_0 = [] 
      for u in range(NB_LAYERS): # loop through the last 10 layers 
        for v in node_indices[u]: # loop through the 10 random nodes for that layer 
          t = tuple(v)
          # print((0,*t))
          # preds.append(preds_layers[i])
          preds_0.append ( preds_layers_0[u][(...,*t)] )
      preds_0 = np.array(preds_0).T
      iqr.append(abs(preds - preds_0))
  
  iqr = np.array(iqr)
  # print(iqr.shape)
  iqr_vals_batch = np.percentile(iqr, 75, axis=0)-np.percentile(iqr, 25, axis=0)
  # print(iqr_vals_batch.shape)
  if nb==1: 
    iqr_vals = iqr_vals_batch
    # print(iqr_all.shape)
  else: 
    iqr_vals = np.concatenate((iqr_vals, iqr_vals_batch), axis=0)      

    
  print(nb) 
  nb+=1
  if nb > NB_BATCHES:
    break  
 


print(iqr_vals.shape) 


In [None]:
from sys import getsizeof
print (iqr.shape)
getsizeof(iqr)/10**9

In [None]:
# iqr_vals = np.percentile(iqr_all, 75, axis=0)-np.percentile(iqr_all, 25, axis=0)
print (iqr_vals.shape) 

In [None]:
# IQR 3810 for adversarial 
%%time

nb=1

attack = fb.attacks.FGSM()
fmodel = fb.models.TensorFlowModel(model=pretrained_model, bounds=(bound_min, bound_max))


for images, labels in ds_train:
  
  labels_class = tf.argmax(labels, axis=1)
  fimages = attack.run(fmodel, images, criterion=fb.criteria.Misclassification(labels_class), epsilon=0.1)
  fpreds_layers = intermediate_model(fimages)
  fpreds = [] 
  for i in range(NB_LAYERS): # loop through the last 10 layers ]
    for j in node_indices[i]: # loop through the 10 random nodes for that layer 
      t = tuple(j)
      fpreds.append ( fpreds_layers[i][(...,*t)] )
  fpreds = np.array(fpreds).T
  pretrained_model.evaluate(fimages,labels)
  
  fiqr = [] 
  for i in range(32): 
    for j in range(32): 
      mask = np.ones((BATCH_SIZE,32,32,3)) 
      mask[:,i,j,:]=0 
      fimages_0 = fimages * mask
      fpreds_layers_0 = intermediate_model(fimages_0)
      fpreds_0 = [] 
      for u in range(NB_LAYERS): # loop through the last 10 layers ]
        for v in node_indices[u]: # loop through the 10 random nodes for that layer 
          t = tuple(v)
          fpreds_0.append ( fpreds_layers_0[u][(...,*t)] )
      fpreds_0 = np.array(fpreds_0).T
      fiqr.append(abs(fpreds - fpreds_0))
  fiqr = np.array(fiqr)
  fiqr_vals_batch = np.percentile(fiqr, 75, axis=0)-np.percentile(fiqr, 25, axis=0)
  # batch management
  if nb==1: 
    fiqr_vals = fiqr_vals_batch
  else: 
    fiqr_vals = np.concatenate((fiqr_vals, fiqr_vals_batch), axis=0)

  print(nb) 
  nb+=1
  if nb > NB_BATCHES:
    break 

print (fiqr_vals.shape)


In [None]:
# fiqr_vals = np.percentile(fiqr_all, 75, axis=0)-np.percentile(fiqr_all, 25, axis=0)
# print (fiqr_vals.shape) 

In [None]:
# free ram memory
# del(fiqr_all)

In [None]:
X = np.concatenate((iqr_vals, fiqr_vals), axis=0)
y = np.concatenate( ( np.zeros(iqr_vals.shape[0]), np.ones(fiqr_vals.shape[0]) ) )
print(X.shape)
print (y.shape)
score = np.average(cross_val_score(XGBClassifier(), X, y, cv=3)) 
print (score)


score = np.average(cross_val_score(SVC(), X, y, cv=3))
print (score)

In [None]:
from sklearn.model_selection import train_test_split
parameters = {'C':[0.1, 1, 10, 100, 1000, 10000, 10**5], 'gamma':[0.0001, 0.001, 0.01, 0.1, 1, 10]}
svc = SVC()
clf = GridSearchCV(svc, parameters)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf.fit(X_train,y_train)
print(clf.best_params_)
print(clf.best_score_)
# print (clf.best_estimator_, clf.best_index_)
clf.best_estimator_.score(X_test, y_test)
# score = cross_val_score(clf.best_estimator_, X, y, cv=3)
# print (sum(score)/3)

In [None]:
SVC().fit(X,y).score(X,y)

# Segmentation Approach