In [1]:
"""
classifier and attacker
dataset: mnist
""" 
# ---------------------
# import required packages
# ---------------------
from __future__ import division, absolute_import, print_function

import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K
import time

from cleverhans.evaluation import batch_eval
from keras.utils import np_utils
from keras.datasets import mnist
#from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models
from tensorflow.keras.models import load_model
from tqdm import tqdm

tf.enable_eager_execution()
tf.set_random_seed(1000)

%matplotlib inline
%load_ext autotime

W0714 14:12:25.082078 140587591059200 deprecation_wrapper.py:119] From /home/rabina7/anaconda3/envs/opencv/lib/python2.7/site-packages/cleverhans/utils_tf.py:341: The name tf.GraphKeys is deprecated. Please use tf.compat.v1.GraphKeys instead.

Using TensorFlow backend.


In [2]:
# ------------------------------------
# Configuration for the experiment
# ------------------------------------
# parameters (model)
LR = 0.001
BATCH_SIZE = 128

# parameters (dataset)
IMG_ROW = 28
IMG_COL = 28
VAL_RATE = 0.2

# parameters (fgsm)
eps = 0.25

"""
Datasets used to train the models,
Each item stands for one type of transformation being applied on the dataset.
Specifically, 'clean' means the original dataset, no transformation being applied,
and each the rest stands for the transformation being applied on the clean dataset.
e.g., 'rotate90' means that each image was rotated 90 deg.
"""
# -----------------------------------
# Your task:
# Please complete this array, adding the transformations of interest.
# Please note that the transformations being added here
# should be the same you added in the transform function.
# -----------------------------------
M = ['clean', 'rotate90', 'rotate180', 'rotate270', 'erosion', 'dilation', 'gradient']
# a walkaround for error of not able to save trained model:
# training models one by one
#M = ['rotate90'] 

# -----------------------------------
# Your task ends
# -----------------------------------

"""
Adversarial examples generation algorithms.
"""
attacks = ['fgsm'] #, 'jsma']

time: 9.34 ms


In [3]:
"""
Load data
"""
def load_mnist():
    """
    Load and process training set and test set
    """
    (X_train, Y_train), (X_test, Y_test) = mnist.load_data()
    X_train = X_train.reshape(-1, IMG_ROW, IMG_COL, 1)
    X_test = X_test.reshape(-1, IMG_ROW, IMG_COL, 1)
    # cast pixels to floats, normalize to [0, 1] range
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255

    # one-hot-encode the labels
    Y_train = np_utils.to_categorical(Y_train, 10)
    Y_test = np_utils.to_categorical(Y_test, 10)
    
    print("Dataset summary:")
    print("Train set: {}, {}".format(X_train.shape, Y_train.shape))
    print("Test set: {}, {}".format(X_test.shape, Y_test.shape))
    
    return (X_train, Y_train), (X_test, Y_test)

time: 9.99 ms


In [4]:
"""
Define model structures for some datasets.
"""
def cnn():
    """
    Returns the appropriate Keras model.
    :return: The model; a Keras 'Sequential' instance.
    """
    # MNIST model
    struct = [
        layers.Conv2D(32, (3, 3), input_shape = (IMG_ROW, IMG_COL, 1)),
        layers.Activation('relu'),
        layers.MaxPooling2D(pool_size = (2, 2)),

        layers.Conv2D(64, (3, 3)),
        layers.Activation('relu'),
        layers.MaxPooling2D(pool_size = (2, 2)),

        layers.Flatten(),
        layers.Dense(64 * 64),
        layers.Dropout(rate = 0.4),
        layers.Dense(10),
        layers.Activation('softmax')
    ]
    
    model = models.Sequential()
    for layer in struct:
        model.add(layer)

    return model

time: 9.53 ms


In [5]:
"""
Training a model
"""
def train(X, Y, model_name='mnist_cnn_clean.model'):
    """
    Train a model over given training set,
    then save the trained model.
    :param: model - the keras model to train
    :param: train_set - Tuple of the training set, includes training samples and corresponding desired labels.
    :param: val_set - Tuple of the validation set, includes samples and desired labels.
    :param: model_name - the name used to save the trained model.
    :return: na
    """
    nb_train = int(len(X) * VAL_RATE)
    train_samples = X[:-nb_train]
    train_classes = Y[:-nb_train]
    val_samples = X[-nb_train:]
    val_classes = Y[-nb_train:]
    
    model = cnn()
    
    model.compile(loss = 'categorical_crossentropy', 
                  optimizer = 'adam', metrics = ['accuracy'])
    
    
    # Train the model
    print("Training {}...".format(model_name))
    model.fit(train_samples, train_classes, epochs = 1,
              batch_size = BATCH_SIZE, shuffle = True,
              verbose = 1, validation_data = (val_samples, val_classes))
     
    # Save the model
    #model.save("data/{}".format(model_name))
    models.save_model(model, "data/{}".format(model_name))
    del model
    
    print("Trained model has been saved to data/{}".format(model_name))

time: 11.7 ms


In [6]:
"""
Implement algorithms generating adversarial examples
"""
def fgsm(x, prediction, eps, y = None):
    '''
    Define the symbolic FGSM fitting tf framework
    '''
    if y is None:
        y = tf.cast(tf.equal(prediction, 
                                tf.reduce_max(prediction, 1, keepdims = True)), tf.float32)
        
    y /= tf.reduce_sum(y, 1, keepdims = True)
    
    logits, = prediction.op.inputs
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits_v2(logits = logits, labels = y))

    grad, = tf.gradients(loss, x)
    perturbation = eps * tf.sign(grad)
    adv_sample = tf.stop_gradient(x + perturbation)
    
    return adv_sample

time: 6.92 ms


In [7]:
"""
Crafting adversarial examples.
@author: Ying Meng (y.meng201011(at)gmail(dot)com)
"""
def generate_adversarial(model_name, X, Y, attack_approach):
    """
    Craft and save adversarial examples.
    :param: model_name - the name of the target model
    :param: X
    :param: Y
    :param: attack_approach - adversarial example generation algorithm to use
    :param: adv_file_name - the file name used to save the generated adversarial exmaples
    """
    
    with tf.Session() as sess:
        K.set_session(sess)
        K.set_learning_phase(0)
        
        # define tf placeholders and operations
        x = tf.placeholder(tf.float32, shape = (None,) + X.shape[1:])
        y = tf.placeholder(tf.float32, shape = (None,) + Y.shape[1:])

        # load model
        model = load_model("data/{}".format(model_name))
        adv_file_name = model_name.split('.')[0]

        # model accuracy
        _, acc_original = model.evaluate(X, Y, batch_size = BATCH_SIZE, verbose = 0)
        print('test acc (on original): {}'.format(acc_original))

        if attack_approach == 'fgsm':
            adv_file_name = '{}_{}_eps{}.npy'.format(adv_file_name, attack_approach, int(eps * 100))

            # symbolic fgsm
            x_adv = fgsm(x, model(x), eps = eps, y = y)

            # craft adversarial examples
            X_adv, = batch_eval(sess, [x, y], [x_adv], [X, Y], batch_size = BATCH_SIZE)
        elif attack_approach == 'jsma':
            raise NotImplementedError('Not ready yet.')
        else:
            raise ValueError('{} is not supported.'.format(attack_approach))

        # test accuracy on adversarial examples
        _, acc_adv = model.evaluate(X_adv, Y, batch_size = BATCH_SIZE, verbose = 0)
        print ('test acc (on adversarial): {} - (epsilon: {})'.format(acc_adv, eps))
        
        print('adv:', X.shape, X_adv.shape, Y.shape)
                
        # save the generated adversarial examples
        np.save("data/orig_{}".format(adv_file_name), X)
        np.save("data/adv_{}".format(adv_file_name), X_adv)
        np.save("data/label_{}".format(adv_file_name), Y)
        print("adversarial examples were generated and saved to data/{}".format(adv_file_name))


time: 25.3 ms


In [8]:
"""
Apply standard transformation on given dataset.
"""
def transform(original_images, IMG_ROW, IMG_COL, transformation):
    """
    Apply standard transformation on given dataset.
    :param: original_images - the images to applied transformations on.
    :param: transformation - the standard transformation to apply.
    :return: the transformed dataset.
    """
    trans_matrix = None
    
    transformed_images = []
    
    if transformation == 'rotate90':
        angle90 = 90
        scale = 1.0
        center = (IMG_ROW / 2, IMG_COL / 2)
        
        trans_matrix = cv2.getRotationMatrix2D(center, angle90, scale)
    elif transformation == 'rotate180':
        angle180 = 180
        scale = 1.0
        center = (IMG_ROW / 2, IMG_COL / 2)
        
        trans_matrix = cv2.getRotationMatrix2D(center, angle180, scale)
    elif transformation == 'rotate270':
        angle270 = 270
        scale = 1.0
        center = (IMG_ROW / 2, IMG_COL / 2)
        
        trans_matrix = cv2.getRotationMatrix2D(center, angle270, scale)
        
    elif transformation == 'erosion':
        kernel = np.ones((5, 5), np.uint8)
        center = (IMG_ROW/2 , IMG_COL/2)
        
        trans_matrix = cv2.erode(kernel, center, iterations = 1)
    elif transformation == 'dilation':
        center = (IMG_ROW/2, IMG_COL/2)
        kernel = np.ones((5, 5), np.uint8);
        
        trans_matrix = cv2.dilate(kernel,center,iterations = 1)
        
    elif transformation == 'gradient':
        kernel = np.ones((5, 5), np.uint8)
        center = (IMG_ROW/2, IMG_COL/2)
        trans_matrix = cv2.morphologyEx(cv2.MORPH_GRADIENT, center, kernel)
    '''elif transformation == 'Affine:
        center = (IMG_ROW / 2, IMG_COL / 2)
        pts = ([[1,0,100],[0,1,50]])
        #dst = cv2.getAffineTransform(pts)
        trans_matrix = cv2.warpAffine(pts, center)'''
   
        
    # ----------------------------
    # Your codes start here
    # ----------------------------
    """
    Please take care the rest transformation types here:
    1. Each type per elif statement,
    for example, elif transformation == 'rotate180':
    2. Define transformation matrix (please assign to variable 'trans_matrix')
    Please take care of the corresponding parameters for each transformation type, respectively.
    """
    
        
    # ----------------------------
    # Your codes end
    # ----------------------------
#     else:
#         raise ValueError('{} is not supported.'.format(transformation))
    
    # applying an affine transformation over the dataset
    transformed_images = np.zeros_like(original_images)
    for i in range(original_images.shape[0]):
        transformed_images[i] = np.expand_dims(cv2.warpAffine(original_images[i], trans_matrix, 
                                                              (IMG_COL, IMG_ROW)), axis = 2)

        # for debugging
        '''for i in range(5):
            cv2.imshow('clean image', X_train[i].reshape(IMG_ROW, IMG_COL))
            cv2.waitKey(0)
            cv2.destroyAllWindows()

            cv2.imshow(transformation, transformed_images[i])
            cv2.waitKey(0)
            cv2.destroyAllWindows()'''
    print('Applied transformation {}.'.format(transformation))
    return transformed_images

"""
Evaluate a model
"""
def evaluate(model_name, X, Y):
    """
    Evaluate given model.
    :param: model - the model to evaluate.
    :param: X - the test set.
    :param: Y - the desired labels associated with test examples.
    :return: test accuracy, 
            average confidence for correctly classified examples, 
            average confidence for misclassified examples.
    """
    correct_cnt = 0
    nb_examples = 0
    
    test_acc = 0.
    conf = 0.
    conf_misclassified = 0.
    
    model = load_model('data/{}'.format(model_name))
    
    pred_probs = model.predict(X, batch_size=BATCH_SIZE)
    
    # iterate over test set
    for pred_prob, true_prob in zip(pred_probs, Y):
        nb_examples += 1
        
        pred_label = np.argmax(pred_prob)
        true_label = np.argmax(true_prob)
        
        if (pred_label == true_label):
            correct_cnt += 1
            conf += np.max(pred_prob)
        else:
            conf_misclassified += np.max(pred_prob)
    
    # test accuracy
    test_acc = (1.0 * correct_cnt) / nb_examples
    
    # average confidece for correctly classified examples
    avg_conf = conf / correct_cnt
    
    # average confidence for misclassified examples
    avg_conf_misclassified = conf_misclassified / (nb_examples - correct_cnt)
    
    return test_acc, avg_conf, avg_conf_misclassified    

time: 80.3 ms


In [9]:
"""
Evaluate an attack

"""
def evaluate_attack(model_name, adv_file_name, transformation):
    """
    Evaluate attack approach
    :param: model_name - name of the target model.
    :param: X - the original test set.
    :param: Y - the desired labels associated with test examples.
    :param: adv_file_name - name of the adversarial example file.
    :return: test accuracy,
            average confidence,
            error rate
    """
    # load model
    model = load_model('data/{}'.format(model_name))
    
    cnt_correct = 0
    nb_legitimates = 0
    cnt_miss = 0
    
    test_acc = 0.
    conf = 0.
    err_rate = 0.
    
    X = np.load('data/orig_{}'.format(adv_file_name))
    X_adv = np.load('data/adv_{}'.format(adv_file_name))
    Y = np.load('data/label_{}'.format(adv_file_name))
    
    print(X.shape, X_adv.shape, Y.shape)
    
    if transformation != 'clean':
        X = transform(X, IMG_ROW, IMG_COL, transformation)
        X_adv = transform(X_adv, IMG_ROW, IMG_COL, transformation)
    
    pred_probs = model.predict(X, batch_size=BATCH_SIZE)
    pred_probs_adv = model.predict(X_adv, batch_size=BATCH_SIZE)
    
    _, acc_original = model.evaluate(X, Y, batch_size = BATCH_SIZE, verbose = 0)
    _, acc_adv = model.evaluate(X_adv, Y, batch_size = BATCH_SIZE, verbose = 0)
    
    print(acc_original, acc_adv)
    
    for pred_prob, pred_prob_adv, true_prob in zip(pred_probs, pred_probs_adv, Y):
        pred_label = np.argmax(pred_prob)
        pred_label_adv = np.argmax(pred_prob_adv)
        true_label = np.argmax(true_prob)
        
        if (pred_label == true_label):
            nb_legitimates += 1
            if (pred_label_adv != pred_label):
                conf += np.max(pred_prob_adv)
                cnt_miss += 1
            else:
                cnt_correct += 1
              
    # error rate
    miss_original = 1 - acc_original
    miss_adv = 1 - acc_adv
    err_rate = miss_adv - miss_original
    
    # average confidece for examples successfully attacked
    avg_conf = conf / cnt_miss
        
    return acc_original, acc_adv, err_rate, avg_conf

time: 27.2 ms


In [10]:
"""
Training models: One over each transformed dataset.
Then evaluating each trained models.
"""
def training():
    # training models
    for trans in tqdm(M):
        model_name = 'mnist_cnn_{}.h5'.format(trans)
        print("Training model {}...".format(model_name))
        
        (X_train, Y_train), _ = load_mnist()

        if trans != 'clean':
            # transform data
            X_train = transform(X_train, IMG_ROW, IMG_COL, trans)
        
        # train the model
        train(X_train, Y_train, model_name=model_name)
        
        print("---------------------------")
        print()

time: 4.99 ms


In [11]:
"""
Crafting adversarial examples.
"""
def crafting_adversarial_examples():
    (X_train, Y_train), (X_test, Y_test) = load_mnist()

    # generate_adversarial(model_name, X, Y, attack_approach):

    for attack in tqdm(attacks):
        #generate_adversarial(model_name = 'mnist_cnn_clean.h5', X_train, Y_train, attack)
        generate_adversarial('mnist_cnn_clean.h5', X_test, Y_test, attack)

time: 2.39 ms


In [12]:
"""
Evaluating adversarial attacks.
@author: Ying Meng (y.meng201011(at)gmail(dot)com)
"""
def evaluating_attacks():
    acc_models = []
    acc_advs = []
    ave_confs = []
    error_rates = []
    
    # def evaluate_attack(model_name, X, Y, adv_file_name):
    for attack in tqdm(attacks):
        adv_file_name = 'mnist_cnn_clean_{}_eps{}.npy'.format(attack, int(eps * 100))

        for trans in tqdm(M):
            model_name = 'mnist_cnn_{}.h5'.format(trans)
            acc, acc_adv, err_rate, conf = evaluate_attack(model_name, adv_file_name, trans)

            acc_models.append(acc)
            acc_advs.append(acc_adv)
            ave_confs.append(conf)
            error_rates.append(err_rate)

    return acc_models, acc_advs, error_rates, ave_confs

time: 6.68 ms


In [13]:
"""
Evaluate models
@author: Ying Meng (y.meng201011(at)gmail(dot)com)
"""
def evaluating():
    test_accuracies = []
    ave_confs = []
    ave_confs_misclassified = []
    
    # training models
    for trans in tqdm(M):
        model_name = 'mnist_cnn_{}.h5'.format(trans)
        print("Evaluating model {}...".format(model_name))
        
        _, (X_test, Y_test) = load_mnist()

        if trans != 'clean':
            # transform data
            X_test = transform(X_test, IMG_ROW, IMG_COL, trans)
         
        # evaluate trained model
        acc, conf, conf_misclassified = evaluate(model_name, X_test, Y_test)

        test_accuracies.append(acc)
        ave_confs.append(conf)
        ave_confs_misclassified.append(conf_misclassified)

    return test_accuracies, ave_confs, ave_confs_misclassified

time: 7.76 ms


In [14]:
# ---------------------------
# Automating experiments.
# 1. train and save models
# 
# You only need to run this once per model structure, dataset.
# ---------------------------
#training()

time: 1.29 ms


In [15]:
# ---------------------------
# Automating experiments.
# 2. craft and save adversarial examples
#
# You only need to run this once per targeted model, attack approach.
# ---------------------------
crafting_adversarial_examples()

  0%|          | 0/1 [00:00<?, ?it/s]W0714 14:12:25.825325 140587591059200 deprecation.py:506] From /home/rabina7/anaconda3/envs/opencv/lib/python2.7/site-packages/tensorflow/python/ops/init_ops.py:97: calling __init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0714 14:12:25.827725 140587591059200 deprecation.py:506] From /home/rabina7/anaconda3/envs/opencv/lib/python2.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling __init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0714 14:12:25.830027 140587591059200 deprecation.py:506] From /home/rabina7/anaconda3/envs/opencv/lib/python2.7/site-packages/tensorflow/python/ops/init

Dataset summary:
Train set: (60000, 28, 28, 1), (60000, 10)
Test set: (10000, 28, 28, 1), (10000, 10)
test acc (on original): 0.982500016689


100%|██████████| 1/1 [00:07<00:00,  7.25s/it]

test acc (on adversarial): 0.0518000014126 - (epsilon: 0.25)
adv: (10000, 28, 28, 1) (10000, 28, 28, 1) (10000, 10)
adversarial examples were generated and saved to data/mnist_cnn_clean_fgsm_eps25.npy
time: 7.68 s





In [16]:
# ---------------------------
# Automating experiments.
# 3. evaluate trained models
#
# ---------------------------
acc, confs, confs_misclassified = evaluating()

# print reports
pd.options.display.float_format = '{:.4f}'.format
pd.DataFrame(data = {
    'Model': M,
    'Test Acc': acc,
    'Average Confidence': confs,
    'Average Confidence (misclassified)': confs_misclassified
})

  0%|          | 0/7 [00:00<?, ?it/s]

Evaluating model mnist_cnn_clean.h5...
Dataset summary:
Train set: (60000, 28, 28, 1), (60000, 10)
Test set: (10000, 28, 28, 1), (10000, 10)


Exception tensorflow.python.framework.errors_impl.CancelledError: (None, None, 'Session has been closed.') in <bound method _Callable.__del__ of <tensorflow.python.client.session._Callable object at 0x7fdcbfcf6510>> ignored
W0714 14:12:33.796168 140587591059200 deprecation.py:323] From /home/rabina7/anaconda3/envs/opencv/lib/python2.7/site-packages/tensorflow/python/ops/math_grad.py:1250: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
 14%|█▍        | 1/7 [00:02<00:14,  2.49s/it]

Evaluating model mnist_cnn_rotate90.h5...
Dataset summary:
Train set: (60000, 28, 28, 1), (60000, 10)
Test set: (10000, 28, 28, 1), (10000, 10)
Applied transformation rotate90.


 29%|██▊       | 2/7 [00:05<00:12,  2.53s/it]

Evaluating model mnist_cnn_rotate180.h5...
Dataset summary:
Train set: (60000, 28, 28, 1), (60000, 10)
Test set: (10000, 28, 28, 1), (10000, 10)
Applied transformation rotate180.


 43%|████▎     | 3/7 [00:07<00:10,  2.62s/it]

Evaluating model mnist_cnn_rotate270.h5...
Dataset summary:
Train set: (60000, 28, 28, 1), (60000, 10)
Test set: (10000, 28, 28, 1), (10000, 10)
Applied transformation rotate270.


 57%|█████▋    | 4/7 [00:11<00:08,  2.79s/it]

Evaluating model mnist_cnn_erosion.h5...
Dataset summary:
Train set: (60000, 28, 28, 1), (60000, 10)
Test set: (10000, 28, 28, 1), (10000, 10)


error: OpenCV(3.4.2) /tmp/build/80754af9/opencv-suite_1535558553474/work/modules/imgproc/src/imgwarp.cpp:2619: error: (-215:Assertion failed) (M0.type() == 5 || M0.type() == 6) && M0.rows == 2 && M0.cols == 3 in function 'warpAffine'


time: 11.9 s


In [None]:
# ---------------------------
# Automating experiments.
# 4. craft and save adversarial examples
#
# ---------------------------
acc, acc_adv, error_rates, confs = evaluating_attacks()

pd.options.display.float_format = '{:.4f}'.format
pd.DataFrame(data = {
    'Model': M,
    'Acc (model)': acc,
    'Acc (adversarial)': acc_adv,
    'Average Confidence': confs,
    'Error Rate': error_rates
})