In [1]:
%matplotlib inline

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import functools
import os
import sys
import time

import tensorflow as tf

import tensorflow.contrib.eager as tfe
from tensorflow.examples.tutorials.mnist import input_data

import matplotlib.pyplot as plt

In [2]:
class MNISTModel(tfe.Network):
  """MNIST Network.
  Network structure is equivalent to:
  https://github.com/tensorflow/tensorflow/blob/r1.4/tensorflow/examples/tutorials/mnist/mnist_deep.py
  and
  https://github.com/tensorflow/models/blob/master/tutorials/image/mnist/convolutional.py
  But written using the tf.layers API.
  """

  def __init__(self, data_format):
    """Creates a model for classifying a hand-written digit.
    Args:
      data_format: Either 'channels_first' or 'channels_last'.
        'channels_first' is typically faster on GPUs while 'channels_last' is
        typically faster on CPUs. See
        https://www.tensorflow.org/performance/performance_guide#data_formats
    """
    super(MNISTModel, self).__init__(name='')
    if data_format == 'channels_first':
        self._input_shape = [-1, 1, 28, 28]
    else:
        assert data_format == 'channels_last'
        self._input_shape = [-1, 28, 28, 1]
    self.conv1 = self.track_layer(
        tf.layers.Conv2D(32, 5, data_format=data_format, activation=tf.nn.relu))
    self.conv2 = self.track_layer(
        tf.layers.Conv2D(64, 5, data_format=data_format, activation=tf.nn.relu))
    self.fc1 = self.track_layer(tf.layers.Dense(1024, activation=tf.nn.relu))
    self.fc2 = self.track_layer(tf.layers.Dense(10))
    self.dropout = self.track_layer(tf.layers.Dropout(0.5))
    self.max_pool2d = self.track_layer(
        tf.layers.MaxPooling2D(
            (2, 2), (2, 2), padding='SAME', data_format=data_format))

  def call(self, inputs, training):
    """Computes labels from inputs.
    Users should invoke __call__ to run the network, which delegates to this
    method (and not call this method directly).
    Args:
      inputs: A batch of images as a Tensor with shape [batch_size, 784].
      training: True if invoked in the context of training (causing dropout to
        be applied).  False otherwise.
    Returns:
      A Tensor with shape [batch_size, 10] containing the predicted logits
      for each image in the batch, for each of the 10 classes.
    """

    x = tf.reshape(inputs, self._input_shape)
    x = self.conv1(x)
    x = self.max_pool2d(x)
    x = self.conv2(x)
    x = self.max_pool2d(x)
    x = tf.layers.flatten(x)
    x = self.fc1(x)
    if training:
      x = self.dropout(x)
    x = self.fc2(x)
    return x

In [3]:
def load_data(data_dir):
  """Returns training and test tf.data.Dataset objects."""
  data = input_data.read_data_sets(data_dir, one_hot=True)
  train_ds = tf.data.Dataset.from_tensor_slices((data.train.images,
                                                 data.train.labels))
  test_ds = tf.data.Dataset.from_tensor_slices((data.test.images, data.test.labels))
  return (train_ds, test_ds)

In [4]:
def loss(predictions, labels):
  return tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(
      logits=predictions, labels=labels))


def compute_accuracy(predictions, labels):
  return tf.reduce_sum(
      tf.cast(
          tf.equal(
              tf.argmax(predictions, axis=1,
                        output_type=tf.int64),
              tf.argmax(labels, axis=1,
                        output_type=tf.int64)),
          dtype=tf.float32)) / float(predictions.shape[0].value)


def train_one_epoch(model, optimizer, dataset, log_interval=None):
  """Trains model on `dataset` using `optimizer`."""

  tf.train.get_or_create_global_step()

  def model_loss(labels, images):
    prediction = model(images, training=True)
    loss_value = loss(prediction, labels)
    tf.contrib.summary.scalar('loss', loss_value)
    tf.contrib.summary.scalar('accuracy',
                              compute_accuracy(prediction, labels))
    return loss_value

  for (batch, (images, labels)) in enumerate(tfe.Iterator(dataset)):
    with tf.contrib.summary.record_summaries_every_n_global_steps(10):
      batch_model_loss = functools.partial(model_loss, labels, images)
      optimizer.minimize(
          batch_model_loss, global_step=tf.train.get_global_step())
      #if log_interval and batch % log_interval == 0:
      #  print('Batch #%d\tLoss: %.6f' % (batch, batch_model_loss()))


def test(model, dataset):
  """Perform an evaluation of `model` on the examples from `dataset`."""
  avg_loss = tfe.metrics.Mean('loss')
  accuracy = tfe.metrics.Accuracy('accuracy')

  for (images, labels) in tfe.Iterator(dataset):
    predictions = model(images, training=False)
    avg_loss(loss(predictions, labels))
    accuracy(tf.argmax(predictions, axis=1, output_type=tf.int64),
             tf.argmax(labels, axis=1, output_type=tf.int64))
  print('Test set: Average loss: %.4f, Accuracy: %4f%%' %
        (avg_loss.result(), 100 * accuracy.result()))

In [5]:
def craft_adv(model, dataset):
  """Craft adversarial examples of `model` based on the examples from `dataset`."""
  avg_loss = tfe.metrics.Mean('loss_adv')
  accuracy = tfe.metrics.Accuracy('accuracy_adv')

  def loss_wrapper(inputs, labels, model):
    preds = model(inputs, training=False)
    return loss(preds, labels)

  def extract_input_grad(grad_and_vars):
    for i in range(len(grad_and_vars)):
      if grad_and_vars[i][1].name == 'inputs:0':
        return grad_and_vars[i][0]

  gradients_fn = tfe.implicit_gradients(loss_wrapper)
  images_variable = tfe.Variable(tf.zeros((100, 784)), name='inputs')
  for (images, labels) in tfe.Iterator(dataset):
    #import pdb
    #pdb.set_trace()
    tf.assign(images_variable, images)
    grad_and_vars = gradients_fn(images_variable, labels, model)
    input_grad = extract_input_grad(grad_and_vars)
    normalized_grad = tf.sign(input_grad)
    scaled_grad = 0.3 * normalized_grad

    perturbed_images = images + scaled_grad
    perturbed_images = tf.clip_by_value(perturbed_images, 0, 1)
    #plt.imshow(tf.reshape(images[-1], (28, 28)), cmap='gray')
    #plt.imshow(tf.reshape(perturbed_images[-1], (28, 28)), cmap='gray')
    #exit()
    #import pdb
    #pdb.set_trace()
    predictions = model(perturbed_images, training=False)
    avg_loss(loss(predictions, labels))
    accuracy(tf.argmax(predictions, axis=1, output_type=tf.int64),
             tf.argmax(labels, axis=1, output_type=tf.int64))
  print('\nAdversarial test set: Average loss: %.4f, Accuracy: %4f%%\n' %
        (avg_loss.result(), 100 * accuracy.result()))

In [6]:
if __name__ == '__main__':
  tfe.enable_eager_execution()

  parser = argparse.ArgumentParser()
  parser.add_argument('--data-dir', type=str, default='/tmp/tensorflow/mnist/input_data',
          help='Directory for storing input data')
  parser.add_argument('--batch-size', type=int, default=64, metavar='N',
          help='input batch size for training (default: 64)')
  parser.add_argument('--log-interval', type=int, default=10, metavar='N',
          help='how many batches to wait before logging training status')
  parser.add_argument('--output_dir', type=str, default=None, metavar='N',
          help='Directory to write TensorBoard summaries')
  parser.add_argument('--checkpoint_dir', type=str, default='/tmp/tensorflow/mnist/checkpoints/', metavar='N',
          help='Directory to save checkpoints in (once per epoch)')
  parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
          help='learning rate (default: 0.01)')
  parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
          help='SGD momentum (default: 0.5)')
  parser.add_argument('--no-gpu', action='store_true', default=False,
          help='disables GPU usage even if a GPU is available')
  args, unparsed = parser.parse_known_args()

  (device, data_format) = ('/gpu:0', 'channels_first')
  if args.no_gpu or tfe.num_gpus() <= 0:
    (device, data_format) = ('/cpu:0', 'channels_last')
  print('Using device %s, and data format %s.' % (device, data_format))

  # Load the datasets
  (train_ds, test_ds) = load_data(args.data_dir)
  train_ds = train_ds.shuffle(60000).batch(args.batch_size)
  test_ds = test_ds.batch(100)

  # Create the model and optimizer
  model = MNISTModel(data_format)
  optimizer = tf.train.MomentumOptimizer(args.lr, args.momentum)

  if args.output_dir:
    train_dir = os.path.join(args.output_dir, 'train')
    test_dir = os.path.join(args.output_dir, 'eval')
    tf.gfile.MakeDirs(args.output_dir)
  else:
    train_dir = None
    test_dir = None


  with tf.device(device):
    for epoch in range(1, 5):
      global_step = tf.train.get_or_create_global_step()
      start = time.time()
      train_one_epoch(model, optimizer, train_ds, args.log_interval)
      end = time.time()
      print('\nTrain time for epoch #%d (global step %d): %f' % (
            epoch, global_step.numpy(), end - start))
      test(model, test_ds)
    craft_adv(model, test_ds)

Using device /gpu:0, and data format channels_first.
Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz

Train time for epoch #1 (global step 860): 8.537302
Test set: Average loss: 0.1628, Accuracy: 95.260000%

Train time for epoch #2 (global step 1720): 8.659573
Test set: Average loss: 0.0937, Accuracy: 97.110000%

Train time for epoch #3 (global step 2580): 7.923543
Test set: Average loss: 0.0729, Accuracy: 97.800000%

Train time for epoch #4 (global step 3440): 8.285579
Test set: Average loss: 0.0582, Accuracy: 98.210000%

Adversarial test set: Average loss: 6.1681, Accuracy: 2.670000%

