[View in Colaboratory](https://colab.research.google.com/github/ZacCranko/robustlearningexperiments/blob/master/robuster_mnist_cleverhans.ipynb)

In [0]:
# Copyright 2018 Google LLC and Zac Cranko

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     https://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys, os
!pip install -qq -e git+http://github.com/tensorflow/cleverhans.git#egg=cleverhans
sys.path.append('/content/src/cleverhans')


"""
This tutorial shows how to generate adversarial examples using FGSM
and train a model using adversarial training with TensorFlow.
It is very similar to mnist_tutorial_keras_tf.py, which does the same
thing but with a dependence on keras.
The original paper can be found at:
https://arxiv.org/abs/1412.6572
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import tensorflow as tf
from tensorflow.python.platform import flags
import logging

from cleverhans.loss import LossCrossEntropy
from cleverhans.utils_mnist import data_mnist
from cleverhans.utils_tf import train, model_eval
from cleverhans.attacks import FastGradientMethod, MomentumIterativeMethod, CarliniWagnerL2
from cleverhans.utils import AccuracyReport, set_log_level
from cleverhans_tutorials.tutorial_models import ModelBasicCNN

FLAGS = flags.FLAGS

In [0]:
from cleverhans.model   import Model
from cleverhans.loss    import Loss
from cleverhans.compat  import softmax_cross_entropy_with_logits

# Copyright 2018 Google LLC.
# SPDX-License-Identifier: Apache-2.0
def power_iterate_conv(layer, num_iter):
  """Perform power iteration for a convolutional layer."""
  assert isinstance(layer, tf.keras.layers.Conv2D)
  weights = layer.kernel
  strides = (1,) + layer.strides + (1,)
  padding = layer.padding.upper()
  
  with tf.variable_scope(None, default_name='power_iteration'):
    u_var = tf.get_variable(
       'u_conv', [1] + map(int, layer.output_shape[1:]),
       initializer=tf.random_normal_initializer(),
       trainable=False)
    u = u_var
    
    for _ in xrange(num_iter):
      v = tf.nn.conv2d_transpose(
         u, weights, [1] + map(int, layer.input_shape[1:]), strides, padding)
      v /= tf.sqrt(tf.maximum(2 * tf.nn.l2_loss(v), 1e-12))
      u = tf.nn.conv2d(v, weights, strides, padding)
      u /= tf.sqrt(tf.maximum(2 * tf.nn.l2_loss(u), 1e-12))
      
    tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, tf.assign(u_var, u))

    u = tf.stop_gradient(u)
    v = tf.stop_gradient(v)
    return tf.reduce_sum(u * tf.nn.conv2d(v, weights, strides, padding))
  
def power_iterate(layer, num_iter):
  """Perform power iteration for a fully connected layer."""
  assert isinstance(layer, tf.keras.layers.Dense)
  weights = layer.kernel
  output_shape, input_shape = weights.get_shape().as_list()

  with tf.variable_scope(None, default_name='power_iteration'):
    u_var = tf.get_variable(
       'u',  map(int, [output_shape]) + [1],
       initializer=tf.random_normal_initializer(),
       trainable=False)
    u = u_var

    for _ in xrange(num_iter):
      v = tf.matmul(weights, u, transpose_a=True)
      v /= tf.sqrt(tf.maximum(2 * tf.nn.l2_loss(v), 1e-12))
      u = tf.matmul(weights, v)
      u /= tf.sqrt(tf.maximum(2 * tf.nn.l2_loss(u), 1e-12))

    tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, tf.assign(u_var, u))

    u = tf.stop_gradient(u)
    v = tf.stop_gradient(v)
    return tf.reduce_sum(u * tf.matmul(weights, v))

class ModelRobustCNN(Model):
    def __init__(self, scope, num_classes, **kwargs):
        del kwargs
        Model.__init__(self, scope, num_classes, locals())
        self.num_pow     = 5
        self.num_pow_iter = 1
        self.lip = []

        with tf.variable_scope(scope):
          self.model = tf.keras.Sequential()

          conv1 = tf.keras.layers.Conv2D(32, 5, 1, padding='SAME',
                                         input_shape=(28, 28, 1))
          self.model.add(conv1)
          self.lip.append(power_iterate_conv(conv1, self.num_pow_iter))

          self.model.add(tf.keras.layers.Activation('relu'))
          self.model.add(tf.keras.layers.MaxPooling2D(2, 2, padding='SAME'))

          conv2 = tf.keras.layers.Conv2D(64, 5, 1, padding='SAME')
          self.model.add(conv2)
          self.lip.append(power_iterate_conv(conv2, self.num_pow_iter))

          self.model.add(tf.keras.layers.Activation('relu'))
          self.model.add(tf.keras.layers.MaxPooling2D(2, 2, padding='SAME'))
          self.model.add(tf.keras.layers.Flatten())

          fc1 = tf.keras.layers.Dense(1024)
          self.model.add(fc1)
          self.lip.append(power_iterate(fc1, self.num_pow_iter))

          self.model.add(tf.keras.layers.Activation('relu'))

          fc2 = tf.keras.layers.Dense(10)
          self.model.add(fc2)
          self.lip.append(power_iterate(fc2, self.num_pow_iter))
        
    def lipschitz(self):
        """Return the Lipschitz product."""
        return tf.reduce_prod(self.lip)
      
    def young_surrogate(self, weights=None):
        """Build the Young convex surrogate for the Lipschitz product."""
        if weights == None:
            weights = np.full(len(self.lip), 1/len(self.lip))
        else:
            assert all(w > 0 for w in weights)
            assert len(self.lips) == len(weights)

            if sum(weights) != 1.0:
                weights = np.array(weights)/sum(weights)

        return tf.reduce_sum([w * l ** (1/w) for l,w in zip(self.lip, weights)])

    def fprop(self, x, **kwargs):
        del kwargs
        logits = self.model(tf.reshape(x, [-1, 28, 28, 1]))

        return {self.O_LOGITS: logits,
                self.O_PROBS:  tf.nn.softmax(logits=logits)}
        

In [0]:
tf.reset_default_graph()

# Some of theses variables get used by cleverhans code I Frankensteined,
# so this remains here
train_start=0; train_end=60000; test_start=0; test_end=10000;
num_epochs=50; batch_size=128; learning_rate=0.001;                    


# Get MNIST test data
x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                              train_end=train_end,
                                              test_start=test_start,
                                              test_end=test_end)
# Use Image Parameters
img_rows, img_cols, nchannels = x_train.shape[1:4]
num_classes = y_train.shape[1]

# Define input TF placeholder
x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                      nchannels))
y = tf.placeholder(tf.float32, shape=(None, num_classes))

img_rows, img_cols, nchannels = x_train.shape[1:4]
num_classes = y_train.shape[1]
model = ModelRobustCNN('robust_model', num_classes)

In [0]:
import math
import time
from cleverhans.utils import batch_indices

eps = 0.3

cross_ent = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=model.get_logits(x)))
accuracy   = tf.reduce_mean(tf.cast(
     tf.equal(tf.argmax(model.get_logits(x), 1), tf.argmax(y, 1)), tf.float32))

lip_cst = model.lipschitz()
lip_reg = model.young_surrogate()
rho     = tf.placeholder(tf.float32)

# ojective function to minimise
obj     = cross_ent + rho * lip_reg

# bound on the adversarial entropy
cross_ent_bound = cross_ent + rho * lip_cst

# optimisation
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
  train_step = tf.train.AdamOptimizer(1e-4).minimize(obj)

fgsm_params = {
    'eps': eps,
    'clip_min': 0.,
    'clip_max': 1.,
    'batch_size': 100,
    'ord': 2 # spectral regularisation is only certified against l2 adversary
}
rng = np.random.RandomState([2017, 8, 30])

sess =  tf.Session()

fgsm  = MomentumIterativeMethod(model, sess=sess)

adv_x = fgsm.generate(x, y=y, **fgsm_params)
adv_logits = model.get_logits(adv_x)
adv_cross_ent = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=adv_logits))
adv_accuracy = tf.reduce_mean(tf.cast(
     tf.equal(tf.argmax(adv_logits, 1), tf.argmax(y, 1)), tf.float32))

metrics = (
    ('objective', obj),
    ('adv cross entropy', adv_cross_ent),
    ('cross entropy bound', cross_ent_bound),
    ('accuracy', accuracy),
    ('adv accuracy', adv_accuracy)
)

def evaluate(feed_dict):
  computed_metrics = sess.run(dict(metrics), feed_dict=feed_dict)
  for metric in zip(*metrics)[0]:
    print('{:>21} {:.04f}'.format(metric + ':', computed_metrics[metric]))
  print('')
  
sess.run(tf.global_variables_initializer())

feed_dict      = {rho : eps, x : None,   y : None}
test_feed_dict = {rho : eps, x : x_test, y : y_test}

with sess.as_default():
  for epoch in xrange(1, num_epochs + 1):
      # Compute number of batches
      num_batches = int(math.ceil(float(len(x_train)) / batch_size))
      assert num_batches * batch_size >= len(x_train)

      # Indices to shuffle training set
      index_shuf = list(range(len(x_train)))
      rng.shuffle(index_shuf)

      prev = time.time()
      for batch in range(num_batches):
          # Compute batch start and end indices
          start, end = batch_indices(batch, len(x_train), batch_size)

          # Perform one training step
          feed_dict[x] = x_train[index_shuf[start:end]]
          feed_dict[y] = y_train[index_shuf[start:end]]

          sess.run(train_step, feed_dict=feed_dict)

      assert end >= len(x_train)  # Check that all examples were used
      if epoch % 5 == 0:
        evaluate(test_feed_dict)
        
sess.close()

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

           objective: 1.7099
   adv cross entropy: 0.8043
 cross entropy bound: 1.6465
            accuracy: 0.9012
        adv accuracy: 0.8646

