In [None]:
__author__ = 'Fan Fan, Kwonjoon Lee and Weijian Xu'

# Python libraries.
import os
import tensorflow as tf
import numpy as np
import scipy.misc
from datetime import datetime
import copy
import time
import matplotlib.pyplot as plt

# Custom libraries.
from Utils import *

In [None]:
# GPU index. Default value is 0.
gpu         = 2
# Epsilon in FGSM. Default value is 0.25. In fact, we use range [-1, 1] for 
# each pixel, which differs from range [0, 1] FGSM paper. Thus, epsilon 0.25 
# here is equivalent to epsilon 0.125 in FGSM paper.
epsilon     = 0.25
# Image shape. For MNIST, it is [28, 28, 1].
image_shape = [28, 28, 1]
# Number of units in ResNet structure. 5 for ResNet-32.
units       = 5
# Batch size. Should be 1.
batch_size  = 1
# Path of baseline model. Default value is ./baseline/model/epoch_?_model.ckpt.
baseline_model = './baseline/model/epoch_129_model.ckpt'
# Path of WINN model. Default value is ./winn/model/epoch_?_model.ckpt.
winn_model     = './winn/model/epoch_33_model.ckpt'
# Root dir for adversarial examples.
root_dir       = './adv'

# Exported hyper-parameters. 
height, width, channels = image_shape

In [None]:
def layer_norm(scope, input_layer, is_training, reuse):
    output_layer = tf.contrib.layers.layer_norm(
        input_layer,
        scale = True,
        reuse = reuse,
        scope = scope
    )
    return output_layer

def conv2d_res(scope, input_layer, output_dim, use_bias=False,
               filter_size=3, strides=[1, 1, 1, 1]):
    
    input_dim = input_layer.get_shape().as_list()[-1]

    with tf.variable_scope(scope):
        conv_filter = tf.get_variable(
            'conv_weight',
            shape = [filter_size, filter_size, input_dim, output_dim],
            dtype = tf.float32,
            initializer = tf.contrib.layers.variance_scaling_initializer(),
            regularizer = tf.contrib.layers.l2_regularizer(scale = 0.0002)
        )
        conv = tf.nn.conv2d(input_layer, conv_filter, strides, 'SAME')

        if use_bias:
            bias = tf.get_variable(
                'conv_bias',
                shape = [output_dim],
                dtype = tf.float32,
                initializer = tf.constant_initializer(0.0)
            )

            output_layer = tf.nn.bias_add(conv, bias)
            output_layer = tf.reshape(output_layer, conv.get_shape())
        else:
            output_layer = conv

        return output_layer

def residual(scope, input_layer, is_training, reuse, 
             increase_dim=False, first=False):
    
    input_dim = input_layer.get_shape().as_list()[-1]

    if increase_dim:
        output_dim = input_dim * 2
        strides = [1, 2, 2, 1]
    else:
        output_dim = input_dim
        strides = [1, 1, 1, 1]

    with tf.variable_scope(scope):
        if first:
            h0    = input_layer
        else:
            h0_ln = layer_norm('h0_ln', input_layer, is_training, reuse)
            h0    = swish(h0_ln)

        h1_conv = conv2d_res('h1_conv', h0, output_dim, strides=strides)
        h1_ln   = layer_norm('h1_ln', h1_conv, is_training, reuse)
        h1      = swish(h1_ln)

        h2_conv = conv2d_res('h2_conv', h1, output_dim)
        if increase_dim:
            l = avg_pool('l_pool', input_layer)
            l = tf.pad(l, [[0, 0], [0, 0], 
                           [0, 0], [input_dim // 2, input_dim // 2]])
        else:
            l = input_layer
        h2 = tf.add(h2_conv, l)

        return h2
    
def network(images, is_training, reuse):
    with tf.variable_scope('layers', reuse=reuse):
        init_dim   = 16
        batch_size = images.get_shape().as_list()[0]

        r0_conv = conv2d_res('r0_conv', images, init_dim)
        r0_ln   = layer_norm('r0_bn', r0_conv, is_training, reuse)
        r0      = swish(r0_ln)

        r1_res=residual('r1.0', r0, is_training, reuse, first=True)
        for k in xrange(1, units):
            r1_res = residual('res1.{}'.format(k), r1_res, is_training, reuse)

        r2_res=residual('r2.0', r1_res, is_training, reuse, increase_dim=True)
        for k in xrange(1, units):
            r2_res = residual('res2.{}'.format(k), r2_res, is_training, reuse)

        r3_res=residual('r3.0', r2_res, is_training, reuse, increase_dim=True)
        for k in xrange(1, units):
            r3_res = residual('r3.{}'.format(k), r3_res, is_training, reuse)

        r4_bn = layer_norm('r4_ln', r3_res, is_training, reuse)
        r4 = swish(r4_bn)

        r5 = tf.reduce_mean(r4, axis = [1, 2])

        fc = fully_connected('fc', tf.reshape(r5, [batch_size, -1]), 10)
        wass = linear(tf.reshape(fc, [batch_size, -1]), 1, 'wass')
        return tf.nn.softmax(fc), fc, wass

In [None]:
def build_eval_op():
    
    # Variables, placeholders and operators.
    batch_size = 1
    e_images = tf.Variable(
        np.random.uniform(low = -1.0, high = 1.0, 
            size = [batch_size, height, width, channels]).astype('float32'),
        name = 'e_images'
    )
    e_images_place = tf.placeholder(
        tf.float32,
        shape = [batch_size, height, width, channels],
        name = 'e_images_place'
    )
    e_images_op = e_images.assign(e_images_place)
    e_labels_place = tf.placeholder(
        tf.int32,
        shape = [batch_size,],
        name = 'e_labels_place'
    )
    
    # Create network.
    e_probs, e_logits, e_wass = network(e_images, False, False)
    
    # Prediction and loss.
    e_preds = tf.argmax(e_probs, axis = 1)
    e_softmax_losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
        labels = e_labels_place,
        logits = e_logits
    )
    e_loss = tf.reduce_mean(e_softmax_losses)
    
    # Optimizer and gradients.
    e_vars = [var for var in tf.trainable_variables() 
                  if 'e_images' in var.name]
    e_optimizer = tf.train.GradientDescentOptimizer(0.1)
    e_vars_grad = e_optimizer.compute_gradients(e_loss, e_vars)
    
    return e_preds, e_vars_grad, e_images_op, e_images_place, e_labels_place

In [None]:
def main(sess):
    
    # Load data.
    test_images, test_labels = load_test_data()
    test_images = normalize(test_images)
    test_images_count = test_images.shape[0]
    
    # Create directories.
    if not os.path.exists(root_dir):
        os.mkdir(root_dir)
    
    # Set logging.
    tf.logging.set_verbosity(tf.logging.ERROR)
    log_file_path = os.path.join(root_dir, 'log.txt')
    
    # Build evaluation model.
    e_preds, e_vars_grad, e_images_op, e_images_place, e_labels_place = \
        build_eval_op()
    
    # Initialize all global variables.
    all_initializer_op = tf.global_variables_initializer()
    sess.run(all_initializer_op)
    
    # Create saver for variables in current network.
    network_vars = [var for var in tf.trainable_variables() 
                        if 'layers' in var.name]
    saver = tf.train.Saver(network_vars)
    
    # Log all global variables.
    global_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope = '')
    log(log_file_path, 'Global variables:')
    for i, var in enumerate(global_vars):
        log(log_file_path, '{}, {}, {}.'.format(i, var.name, var.get_shape()))
    
    # Consider two referee models: Baseline and WINN.
    final_results = {}
    for referee, referee_model in zip(['baseline', 'winn'], 
                                      [baseline_model, winn_model]):
        
        # Step 1. Generate adversarial examples from referee model.
        referee_adv_images = []
        # Restore the parameters from referee model.
        saver.restore(sess, referee_model)
        for idx in xrange(test_images_count):
            e_batch_images = test_images[idx].reshape(
                (batch_size, height, width, channels))
            e_batch_labels = test_labels[idx].reshape(
                (batch_size,))

            # Fetch gradient.
            sess.run(e_images_op, feed_dict = {e_images_place: e_batch_images})
            # e_vars_grad[0][0] extracts the gradient from [(grad, vars), ...]
            # which is returned by compute_gradients().
            e_batch_grad = sess.run(e_vars_grad[0][0], 
                feed_dict = {e_labels_place: e_batch_labels})
            
            # Compute gradient sign.
            e_batch_grad_sign = copy.deepcopy(e_batch_grad)
            e_batch_grad_sign[e_batch_grad_sign > 0.0] = +1.0
            e_batch_grad_sign[e_batch_grad_sign < 0.0] = -1.0

            # Get adversarial image.
            e_adv_batch_images = \
                np.clip(e_batch_images + epsilon * e_batch_grad_sign, -1, 1)
            # Due to batch size eq. to 1, we can directly append the single
            # adversarial image to the referee adversarial images list.
            referee_adv_images.append(e_adv_batch_images)

            if idx % 500 == 499:
                log(log_file_path, 
                    'Generating {} adv. examples...'.format(idx + 1))
                
        # Step 2. Test the adversarial examples on WINN.
        saver.restore(sess, winn_model)
        winn_pred_list = []
        for idx in xrange(test_images_count):
            e_batch_images = referee_adv_images[idx]
            sess.run(e_images_op, feed_dict = {e_images_place: e_batch_images})
            winn_pred = sess.run(e_preds)[0]
            winn_pred_list.append(winn_pred)

            if idx % 500 == 499:
                log(log_file_path, 
                  'Testing WINN with {} adv. examples...'.format(idx + 1))

        # Step 3. Test the adversarial examples on baseline.
        saver.restore(sess, baseline_model)
        baseline_pred_list = []
        for idx in xrange(test_images_count):
            e_batch_images = referee_adv_images[idx]
            sess.run(e_images_op, feed_dict = {e_images_place: e_batch_images})
            baseline_pred = sess.run(e_preds)[0]
            baseline_pred_list.append(baseline_pred)

            if idx % 500 == 499:
                log(log_file_path, 
                  'Testing baseline with {} adv. examples...'.format(idx + 1))

        # Step 4. Compare the WINN and baseline.
        baseline_fail_count, winn_fail_count = 0, 0
        better, worse = 0, 0

        for idx in xrange(test_images_count): 
            winn_pred     = winn_pred_list[idx]
            baseline_pred = baseline_pred_list[idx]
            current_label = test_labels[idx]

            if baseline_pred != current_label:
                baseline_fail_count += 1
                baseline_fail = True
            else:
                baseline_fail = False
                
            if winn_pred != current_label:
                winn_fail_count += 1
                winn_fail = True
            else:
                winn_fail = False

            if winn_fail == True and baseline_fail == False:
                worse  += 1

            if winn_fail == False and baseline_fail == True:
                better += 1

            if idx % 500 == 499:
                log(log_file_path, 
                    ('Test images: {}, referee: {}, baseline fails {}, ' + 
                     'WINN fails {}, better {}, worse {}.').format(
                     idx + 1, referee, baseline_fail_count,
                     winn_fail_count, better, worse))
        
        final_result = ('(Final result) test images: {}, referee: {}, ' + 
                        'baseline fails {}, WINN fails {}, ' + 
                        'better {}, worse {}, ').format(
                        test_images_count, referee, 
                        baseline_fail_count, winn_fail_count, 
                        better, worse)
        if referee == 'winn':
            correction_rate = 1.0 * worse / winn_fail_count
            final_result += 'correction rate by baseline: {}.'.format(
                            correction_rate)
        elif referee == 'baseline':
            correction_rate = 1.0 * better / baseline_fail_count
            final_result += 'correction rate by winn: {}.'.format(
                            correction_rate)
        final_results[referee] = final_result
    
    for referee in final_results:
        log(log_file_path, final_results[referee])

In [None]:
if __name__ == '__main__':
    os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu)
    
    # Session configuration.
    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True

    graph = tf.Graph()
    with graph.as_default():    
        with tf.device('/gpu:0'):
            with tf.Session(config = config) as sess:
                with tf.variable_scope('WINN', reuse = None):
                    main(sess)