# Blackbox testing of robustness against DeepFool

### Importing required packages

In [85]:
from __future__ import absolute_import, division, print_function, unicode_literals

from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Activation, Dropout
import numpy as np
import tensorflow as tf


from art.attacks import DeepFool
from art.classifiers import KerasClassifier, BlackBoxClassifier
from art.utils import load_mnist

tf.compat.v1.disable_eager_execution()

### Load Dataset

In [78]:
# Step 1: Load the MNIST dataset

(x_train, y_train), (x_test, y_test), min_pixel_value, max_pixel_value = load_mnist()

### Creating a sample model
A sample model is used to demonstrate the use of ART library
Model to be tested will be replaced here

In [79]:
# Step 2: Create the model

model = Sequential()

model.add(Conv2D(4,5,padding='same',input_shape = [28, 28, 1]))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(10,5, padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(100))
model.add(Activation('relu'))

model.add(Dense(10))
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

### Wrapping of ART classifier
Wrapping is necessary to use ART functions

In [80]:
# Step 3: wrap ART tf classifier
classifier = KerasClassifier(model=model, clip_values=(min_pixel_value, max_pixel_value))
classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Black box environment
in order to conduct evaluation under blackbox environment, we will wrap the model that is to be evaluated in a blackbox wrapper.

In [81]:
blackbox_classifier = BlackBoxClassifier(predict = classifier.predict, input_shape = [28, 28, 1], nb_classes = 10, clip_values=(min_pixel_value, max_pixel_value))

### Evaluating model based on original Dataset

In [82]:
# Step 5: Evaluate the ART classifier on benign test examples

predictions = blackbox_classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy on benign test examples: {}%'.format(accuracy * 100))

Accuracy on benign test examples: 98.91%


### Creating a surrogate model to create adversarial samples

In [83]:
# Step 6: create surrogate model

surr_model = Sequential()

surr_model.add(Conv2D(4,5,padding='same',input_shape = [28, 28, 1]))
surr_model.add(Activation('relu'))
surr_model.add(MaxPooling2D(pool_size=(2, 2)))

surr_model.add(Flatten())
surr_model.add(Dense(10))
surr_model.add(Activation('softmax'))

surr_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [84]:
# step 6a: fit surrogate model
surr_classifier = KerasClassifier(model=surr_model, clip_values=(min_pixel_value, max_pixel_value))
surr_classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Creating Adversarial samples
Using DeepFool

In [86]:
# Step 6: Generate adversarial test examples
attack = DeepFool(surr_classifier)
x_test_adv = attack.generate(x=x_test)

### Evaluating model on adversarial samples

In [87]:
# Step 7: Evaluate the ART classifier on adversarial test examples

predictions = blackbox_classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print('Accuracy on adversarial test examples: {}%'.format(accuracy * 100))

Accuracy on adversarial test examples: 28.1%


### Evaluation results
- Using accuracy as an evaluation metric, we can see that the sample model's performance dropped by a huge amount after going through adversarial samples.
- Results is normal as sample model does not have defence against adversarial attacks
- note that adversarial samples are constructed using surrogate model, and the actual model to be tested is not used to create adversarial samples
