In [None]:
# This link might be useful, unclear yet: https://medium.com/randomai/ensemble-and-store-models-in-keras-2-x-b881a6d7693f
import numpy as np
import keras
from keras import backend
from keras.models import load_model
import tensorflow as tf
from utils import *

import cleverhans.attacks as Attacks
from cleverhans.attacks import FastGradientMethod
from cleverhans.attacks import BasicIterativeMethod
from cleverhans.utils_keras import KerasModelWrapper

## Set up

In [None]:
ATTACK_NAME = 'fgsm' # sys.argv[1]
DATASET_NAME = 'mnist' # sys.argv[2]

In [None]:
# l-inf attacks
if ATTACK_NAME == 'fgsm':
    AttackModel = Attacks.FastGradientMethod
    attack_params = {
    'eps': 0.5,
    'clip_min': 0.0,
    'clip_max': 1.0
    }
elif ATTACK_NAME == 'pgd':
    AttackModel = Attacks.ProjectedGradientDescent
    attack_params = {
    'eps': 0.5,
    'clip_min': 0.0,
    'clip_max': 1.0
    }
elif ATTACK_NAME == 'bim':
    # subclass of pgd (rand_init == 0)
    AttackModel = Attacks.BasicIterativeMethod
    attack_params = {
    'eps': 0.5,
    'clip_min': 0.0,
    'clip_max': 1.0
    }
# l-2 
elif ATTACK_NAME == 'inf-pgd':
    AttackModel = Attacks.ProjectedGradientDescent
    attack_params = {
    'eps': 0.5,
    'clip_min': 0.0,
    'clip_max': 1.0,
    'ord': np.inf
    }
elif ATTACK_NAME == 'cw':
    # subclass of pgd (rand_init == 0)
    AttackModel = Attacks.CarliniWagnerL2
    attack_params = {
    'eps': 0.5,
    'clip_min': 0.0,
    'clip_max': 1.0
    }




In [None]:
num_classes = 10
(x_train, y_train, x_test, y_test) = get_dataset(DATASET_NAME)

In [None]:
backend.set_learning_phase(False)
sess =  backend.get_session()

# Define input TF placeholder
if DATASET_NAME == 'mnist':
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))
elif DATASET_NAME == 'cifar10':
    x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
    y = tf.placeholder(tf.float32, shape=(None, 10))

In [None]:
def test_keras_model(model, SEED, images, labels):
    x_shuffle = permute_pixels(images, SEED)
    pred = np.argmax(model.predict(x_shuffle), axis = 1)
    acc =  np.mean(np.equal(pred, labels))
    return (pred, acc)

## Generate Adversarial Examples

In [None]:
KNOWN_SEED = 87
# we're going to give this model trained with KNOWN_SEED to the adversary

known_model = load_model('models/'+DATASET_NAME+'_trained_keras_model'+'.hdf5', custom_objects={'tf':tf}) 
pred, acc = test_keras_model(known_model, KNOWN_SEED, x_test, y_test)
print("The normal test accuracy is: {}".format(acc))

In [None]:
# generate adversariale examples (x_adv) using the known model
# http://everettsprojects.com/2018/01/30/mnist-adversarial-examples.html
# https://cleverhans.readthedocs.io/en/latest/source/attacks.html#generate_np

wrap = KerasModelWrapper(known_model)
attack_model = AttackModel(wrap, sess=sess)
x_adv = attack_model.generate_np(x_test, **attack_params)

# test x_adv against the single model
pred, acc = test_keras_model(known_model, KNOWN_SEED, x_adv, y_test)
print("The adversarial test accuracy is: {}".format(acc))

## Attack Ensemble PPD models

In [None]:
# test x_adv against the ensemble model

num_models = 10  # debugging with smaller number. change this to 10/50 later.
num_samples = x_adv.shape[0]
adv_acc = []
normal_acc = []
# We're assuming majority voting?
# we're going to store votes from each model here
adv_pred = np.zeros((num_samples, num_classes)) 
normal_pred = np.zeros((num_samples, num_classes)) 

for SECRET_SEED in range(num_models):
    keras_model = load_model('models/'+DATASET_NAME+'_trained_keras_model_'+str(SECRET_SEED)+'.hdf5', custom_objects={'tf':tf})

    pred, acc = test_keras_model(keras_model, SECRET_SEED, x_test, y_test)
    normal_pred[range(num_samples), pred] += 1  # +1 vote 
    print ('SECRET_SEED:', SECRET_SEED, 'Individual model normal accuracy:', acc)
    normal_acc.append(acc)
    
    pred, acc = test_keras_model(keras_model, SECRET_SEED, x_adv, y_test)
    adv_pred[range(num_samples), pred] += 1  # +1 vote 
    print ('SECRET_SEED:', SECRET_SEED, 'Individual model adversarial accuracy:', acc)   
    adv_acc.append(acc)  # accuracy per model, not reported in paper

    
# for each sample, find out the class with most votes
ensemble_pred = np.argmax(normal_pred, axis = 1)
acc =  np.mean(np.equal(ensemble_pred, y_test))
print ('Ensemble normal accuracy:', acc)

ensemble_pred = np.argmax(adv_pred, axis = 1)
acc =  np.mean(np.equal(ensemble_pred, y_test))
print ('Ensemble adversarial accuracy:', acc)