## Result Reproduction

This file includes codes for result reproduction.

In this file you can reproduce the result for FGSM untargeted and targeted Attack.

You can:

* Input Epsilon
* Check Accuracy and Attack Success Rate
* Save perturb images to file

### Please Run Each Cell one after another.
### Don't interrupt the kernel. If have to interrupt, restart and clean output for both notebooks.

### First, set the initiations of the model.

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

# Parameters
learning_rate = 0.001
training_epochs = 20
batch_size = 100
display_step = 1

# tf Graph Input
x = tf.placeholder(tf.float32, [None, 784]) # mnist data image of shape 28*28=784
y = tf.placeholder(tf.float32, [None, 10]) # 0-9 digits recognition => 10 classes

# Set model weights
W1 = tf.Variable(tf.random_normal([784, 300], mean=0, stddev=1))
b1 = tf.Variable(tf.random_normal([300], mean=0, stddev = 1))

W3 = tf.Variable(tf.zeros([300, 10]))
b3 = tf.Variable(tf.zeros([10]))

#y_pred = tf.Variable(np.arange(3000), dtype=tf.float32, name="prediction")

# Construct model

hidden1 = tf.nn.relu(tf.matmul(x, W1) + b1); #first hidden layer

#hidden2 = tf.nn.relu(tf.matmul(hidden1, W2) + b2); #second hidden layer

pred = tf.nn.softmax(tf.matmul(hidden1, W3) + b3) # Softmax layer outputs prediction probabilities

# Minimize error using cross entropy 
cost = tf.reduce_mean(-tf.reduce_sum(y*tf.log(pred), reduction_indices=1))

optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost)

saver = tf.train.Saver()  

xts = mnist.test.images
yts = mnist.test.labels

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


### 2 functions are defined to calculate success rate and save image

In [2]:
def attack_success_rate(xts, xts_new, yts, target):
    
    # Result of old test data
    prediction_old = tf.argmax(pred,1)
    prediction_old = prediction_old.eval({x: xts})    
    
    correct_prediction = tf.equal(prediction_old, tf.argmax(yts, 1))
    correct_prediction = correct_prediction.eval({x: xts})
        
    # Because we are only looking for sussessful rate
    correct_prediction_index = np.where(correct_prediction)
        
    xts_correct = xts_new[correct_prediction_index,:]
    xts_correct = xts_correct[0,:,:]
        
    correct_prediction = correct_prediction[correct_prediction_index]
    prediction_old = prediction_old[correct_prediction_index]
        
    # Result of new test data
    prediction_new = tf.argmax(pred,1)
    prediction_new = prediction_new.eval({x:xts_correct})
    
    if target==None:
        
        # Find out which index of correct_predictions are changed after perturb
        attack_success_index = np.not_equal(prediction_old, prediction_new)
        
    if target==True:
        
        yts_shift = np.roll(yts, 1, axis = 1)
        yts_target = yts_shift[correct_prediction_index,:]
        prediction_target = np.argmax(yts_target,2)
        
        # Find out which index of correct_predictions are changed to (i+1)%10 after perturb
        attack_success_index = np.equal(prediction_target, prediction_new)
        
    # Calculaye attack ratio
    attack_success_no = np.count_nonzero(attack_success_index)
    correct_prediction_no = np.count_nonzero(correct_prediction)
       
    attack_success_rate = attack_success_no/correct_prediction_no
    
    return attack_success_rate

In [3]:
def image_save(path, xts_adv):
    
    for i in range(xts_adv.shape[0]):
        image_name = str(str('./')+str(path)+str('/adv_')+str(i)+str('.png'))
        nrow = 28
        ncol = 28
        xsq = xts_adv[i,None].reshape((nrow,ncol))
    
        plt.imsave(image_name, xsq)
        
    print('Images Saved in', str('./')+str(path))

## FGSM Based Untargeted Attack

### Please Run Each Cell one after another.

In [4]:
# Enter epsilon here, then click 'Enter'.
# Run next cell after click 'Enter'

epsilon = float(input('Input Epsilon = '))

Input Epsilon = 25.1


In [5]:
with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    
    if os.path.exists('temp/checkpoint'): 
        saver.restore(sess, 'temp/trained_model.ckpt')
    
    # FGSM Attack
    print("\nFGSM Untargeted Attack!")
  
    print ("\nEpsilon:", epsilon)
    epsilon = float(epsilon /256)
        
    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    
    # Calculate accuracy for 3000 examples; you should get roughly ~90% accuracy although it might vary from run to run
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
    # Generate new test dataset
    grad = tf.gradients(xs = x,
                        ys = cost)
    xts_new = tf.clip_by_value(x + epsilon*tf.sign(grad),0,1)
    xts_new, _ = sess.run([xts_new , cost], feed_dict={x: xts, y: yts})
    xts_new = xts_new[0,:,:]
         
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    print ("\nAccuracy:", 
            accuracy.eval({x: xts_new, 
                        y: yts},
                        session=sess))
        
    rate = attack_success_rate(xts, xts_new, yts, None)   
    print ("\nAttack success rate",
            rate)
        
    image_save('ADV_image_untarget', xts_new)

INFO:tensorflow:Restoring parameters from temp/trained_model.ckpt

FGSM Untargeted Attack!

Epsilon: 25.1

Accuracy: 0.0009

Attack success rate 0.9990335051546392
Images Saved in ./ADV_image_untarget


## FGSM Based Targeted Attack

### Please Run Each Cell one after another.

In [8]:
# Enter epsilon here, then click 'Enter'.
# Run next cell after click 'Enter'

epsilon = float(input('Input Epsilon = '))

Input Epsilon = 12.88


In [9]:
yts_shift = np.roll(yts, 1, axis = 1)

with tf.Session() as sess:
    
    sess.run(tf.global_variables_initializer())
    
    if os.path.exists('temp/checkpoint'): 
        saver.restore(sess, 'temp/trained_model.ckpt')
    
    # FGSM Attack
    print("\nFGSM Targeted Attack!")

    print ("\nEpsilon:", epsilon)
    eps = float((epsilon)/256)
        
    # Generate new test dataset
    grad = tf.gradients(xs = x,
                        ys = cost)
        
    # Targeted Attack
    xts_new = tf.clip_by_value(x - eps*tf.sign(grad),0,1)
    xts_new, _ = sess.run([xts_new , cost], feed_dict={x: xts, y: yts_shift})
    xts_new = xts_new[0,:,:]
        
    # Test model
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    prediction = tf.argmax(pred,1)
    prediction = prediction.eval({x:xts_new})
    
    print('\nResult for first-10 images:')
    print(np.argmax(yts[:10],1))
    print(prediction[:10])
        
    rate = attack_success_rate(xts, xts_new, yts, True)
    print("\nAttack success rate",rate)
        
    image_save('ADV_image_target', xts_new)

INFO:tensorflow:Restoring parameters from temp/trained_model.ckpt

FGSM Targeted Attack!

Epsilon: 12.88

Result for first-10 images:
[7 2 1 0 4 1 4 9 5 9]
[7 3 2 0 5 2 5 5 6 7]

Attack success rate 0.42794243986254293
Images Saved in ./ADV_image_target
