In [1]:
# This link might be useful, unclear yet: https://medium.com/randomai/ensemble-and-store-models-in-keras-2-x-b881a6d7693f
import numpy as np
import keras
from keras import backend
from keras.models import load_model
import tensorflow as tf
from utils import *

from cleverhans.attacks import FastGradientMethod
from cleverhans.attacks import BasicIterativeMethod
from cleverhans.utils_keras import KerasModelWrapper

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
dataset = 'mnist'
num_classes = 10
(x_train, y_train, x_test, y_test) = get_dataset(dataset)

x_train shape (60000, 28, 28, 1)
y_train shape (60000,)
x_test shape (10000, 28, 28, 1)
y_test shape (10000,)


In [3]:
backend.set_learning_phase(False)
sess =  backend.get_session()

# Define input TF placeholder
x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
y = tf.placeholder(tf.float32, shape=(None, 10))

In [4]:
# this is the secret seed 87 one
# we're going to give this trained model to the adversary
KNOWN_SEED = 87
keras_model = load_model('models/'+dataset+'_trained_keras_model'+'.hdf5', custom_objects={'tf':tf}) 
x_shuffle = permute_pixels(x_test, KNOWN_SEED)
pred = np.argmax(keras_model.predict(x_shuffle), axis = 1)
acc =  np.mean(np.equal(pred, y_test))
print("The normal test accuracy is: {}".format(acc))

The normal test accuracy is: 0.957


In [6]:
# generate adversariale examples (x_adv) using the 87 keras model
# http://everettsprojects.com/2018/01/30/mnist-adversarial-examples.html
# https://cleverhans.readthedocs.io/en/latest/source/attacks.html#generate_np
wrap = KerasModelWrapper(keras_model)
fgsm = FastGradientMethod(wrap, sess=sess)
fgsm_params = {'eps': 0.3,
               'clip_min': 0.,
               'clip_max': 1.}
x_adv = fgsm.generate_np(x_test, **fgsm_params)

# test x_adv against the single model
x_shuffle = permute_pixels(x_test, KNOWN_SEED)
pred = np.argmax(keras_model.predict(x_shuffle), axis = 1)  # predicted class labels
acc =  np.mean(np.equal(pred, y_test))
print("The adversarial validation accuracy is: {}".format(acc))

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

The adversarial validation accuracy is: 0.0736


In [7]:
# test x_adv against the ensemble model

num_models = 1  # debugging with smaller number. change this to 50 later.
num_samples = x_adv.shape[0]
acc_per_model = []
# We're assuming majority voting?
pred_all_model = np.zeros((num_samples, num_classes)) # we're going to store votes from each model here

for SECRET_SEED in range(num_models):
    keras_model = load_model('models/'+dataset+'_trained_keras_model_'+str(SECRET_SEED)+'.hdf5', custom_objects={'tf':tf})
    x_adv_shuffle = permute_pixels(x_adv, SECRET_SEED)
    pred = np.argmax(keras_model.predict(x_adv_shuffle), axis = 1)  # predicted class labels
    pred_all_model[:, pred] += 1  # +1 vote 
    acc =  np.mean(np.equal(pred, y_test))
    print ('Individual model accuracy:', acc, 'SECRET_SEED:', SECRET_SEED)
    acc_per_model.append(acc)  # accuracy per model, not reported in paper

# for each adversarial sample, find out the class with most votes
ensemble_pred = np.argmax(pred_all_model, axis = 1)
acc =  np.mean(np.equal(ensemble_pred, y_test))
print ('Ensemble accuracy:', acc)

Individual model accuracy: 0.0933 SECRET_SEED: 0
Ensemble accuracy: 0.098
