In [None]:
"""
Use CW method to craft adversarial on CIFAR10.

Note that instead of find the optimized image for each image, we do a batched
attack without binary search for the best possible solution.  Thus, the result
is worse than reported in the original paper.  To achieve the best result
requires more computation, as demonstrated in another example.
"""
import os
from timeit import default_timer

import numpy as np

import matplotlib
matplotlib.use('Agg')           # noqa: E402
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import tensorflow.compat.v1 as tf

from attacks import cw

from PIL import Image
import time

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


img_size = 32
img_chan = 3
n_classes = 10
batch_size = 32

In [None]:
class Timer(object):
    def __init__(self, msg='Starting.....', timer=default_timer, factor=1,
                 fmt="------- elapsed {:.4f}s --------"):
        self.timer = timer
        self.factor = factor
        self.fmt = fmt
        self.end = None
        self.msg = msg

    def __call__(self):
        """
        Return the current time
        """
        return self.timer()

    def __enter__(self):
        """
        Set the start time
        """
        print(self.msg)
        self.start = self()
        return self

    def __exit__(self, exc_type, exc_value, exc_traceback):
        """
        Set the end time
        """
        self.end = self()
        print(str(self))

    def __repr__(self):
        return self.fmt.format(self.elapsed)

    @property
    def elapsed(self):
        if self.end is None:
            # if elapsed is called in the context manager scope
            return (self() - self.start) * self.factor
        else:
            # if elapsed is called out of the context manager scope
            return (self.end - self.start) * self.factor

In [None]:
print('\nLoading CIFAR10')

cifar = tf.keras.datasets.cifar10
(X_train, y_train), (X_test, y_test) = cifar.load_data()
print(np.shape(X_train))
X_train = np.reshape(X_train, [-1, img_size, img_size, img_chan])
X_train = X_train.astype(np.float32) / 255
X_test = np.reshape(X_test, [-1, img_size, img_size, img_chan])
X_test = X_test.astype(np.float32) / 255

to_categorical = tf.keras.utils.to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

print(X_test.shape)
print(y_test.shape)

In [5]:
print('\nSpliting data')

ind = np.random.permutation(X_train.shape[0])
X_train, y_train = X_train[ind], y_train[ind]

VALIDATION_SPLIT = 0.1
n = int(X_train.shape[0] * (1-VALIDATION_SPLIT))
X_valid = X_train[n:]
X_train = X_train[:n]
y_valid = y_train[n:]
y_train = y_train[:n]


Spliting data


In [6]:
print('\nConstruction graph')


def model(x, logits=False, training=False):
    
    with tf.variable_scope('conv0'):
        z = tf.layers.conv2d(x, filters=32, kernel_size=[3, 3],
                             padding='same', activation=tf.nn.relu)
        z = tf.layers.conv2d(z, filters=64, kernel_size=[3, 3],
                             padding='same', activation=tf.nn.relu)
        z = tf.layers.max_pooling2d(z, pool_size=[2, 2], strides=2)
    
    for i in range(10):
        with tf.variable_scope('conv1'):
            z = tf.layers.conv2d(x, filters=64, kernel_size=[3, 3],
                             padding='same', activation=tf.nn.relu)
            z = tf.layers.batch_normalization(z, training=training)
            z = tf.layers.conv2d(z, filters=64, kernel_size=[3, 3],
                             padding='same', activation=None)
            z = tf.layers.batch_normalization(z, training=training)
            z = tf.layers.max_pooling2d(z, pool_size=[2, 2], strides=2)
    
    with tf.variable_scope('conv2'):
        z = tf.layers.conv2d(z, filters=64, kernel_size=[3, 3], padding='same', activation=tf.nn.relu)
        z = tf.layers.average_pooling2d(z, pool_size=(2, 2), strides=2)
        z = tf.layers.dense(z, units=128, activation=tf.nn.relu)
    
    with tf.variable_scope('flatten'):
        shape = z.get_shape().as_list()
        z = tf.reshape(z, [-1, np.prod(shape[1:])])
    
    with tf.variable_scope('mlp'):
        z = tf.layers.dense(z, units=128, activation=tf.nn.relu)
        z = tf.layers.dropout(z, rate=0.25, training=training)
    
    logits_ = tf.layers.dense(z, units=10, name='logits')
    y = tf.nn.softmax(logits_, name='ybar')

    if logits:
        return y, logits_
    return y

class Dummy:
    pass


env = Dummy()

tf.disable_eager_execution()
with tf.variable_scope('model', reuse=tf.AUTO_REUSE):
    env.x = tf.placeholder(tf.float32, (None, img_size, img_size, img_chan),
                           name='x')
    env.y = tf.placeholder(tf.float32, (None, n_classes), name='y')
    env.training = tf.placeholder_with_default(False, (), name='mode')

    env.ybar, logits = model(env.x, logits=True, training=env.training)

    with tf.variable_scope('acc'):
        count = tf.equal(tf.argmax(env.y, axis=1), tf.argmax(env.ybar, axis=1))
        env.acc = tf.reduce_mean(tf.cast(count, tf.float32), name='acc')

    with tf.variable_scope('loss'):
        xent = tf.nn.softmax_cross_entropy_with_logits(labels=env.y,
                                                       logits=logits)
        env.loss = tf.reduce_mean(xent, name='loss')

    with tf.variable_scope('train_op'):
        optimizer = tf.train.AdamOptimizer()
        vs = tf.global_variables()
        env.train_op = optimizer.minimize(env.loss, var_list=vs)

    env.saver = tf.train.Saver()

    # Note here that the shape has to be fixed during the graph construction
    # since the internal variable depends upon the shape.
    env.x_fixed = tf.placeholder(
        tf.float32, (batch_size, img_size, img_size, img_chan),
        name='x_fixed')
    env.adv_eps = tf.placeholder(tf.float32, (), name='adv_eps')
    env.adv_y = tf.placeholder(tf.int32, (), name='adv_y')

    optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
    env.adv_train_op, env.xadv, env.noise = cw(model, env.x_fixed,
                                               y=env.adv_y, eps=env.adv_eps,
                                               optimizer=optimizer)


Construction graph
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
Instructions for updating:
Please use `layer.__call__` method instead.
Instructions for updating:
Use keras.layers.MaxPooling2D instead.
Instructions for updating:
Use keras.layers.BatchNormalization instead.  In particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not be used (consult the `tf.keras.layers.BatchNormalization` documentation).
Instructions for updating:
Use keras.layers.AveragePooling2D instead.
Instructions for updating:
Use keras.layers.Dense instead.
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [7]:
print('\nInitializing graph')

env.sess = tf.InteractiveSession()
env.sess.run(tf.global_variables_initializer())
env.sess.run(tf.local_variables_initializer())


Initializing graph


In [8]:
def evaluate(env, X_data, y_data, batch_size=128):
    """
    Evaluate TF model by running env.loss and env.acc.
    """
    print('\nEvaluating')

    n_sample = X_data.shape[0]
    n_batch = int((n_sample+batch_size-1) / batch_size)
    loss, acc = 0, 0

    for batch in range(n_batch):
        print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
        start = batch * batch_size
        end = min(n_sample, start + batch_size)
        cnt = end - start
        batch_loss, batch_acc = env.sess.run(
            [env.loss, env.acc],
            feed_dict={env.x: X_data[start:end],
                       env.y: y_data[start:end]})
        loss += batch_loss * cnt
        acc += batch_acc * cnt
    loss /= n_sample
    acc /= n_sample

    print(' loss: {0:.4f} acc: {1:.4f}'.format(loss, acc))
    return loss, acc


def train(env, X_data, y_data, X_valid=None, y_valid=None, epochs=1,
          load=False, shuffle=True, batch_size=128, name='model'):
    """
    Train a TF model by running env.train_op.
    """
    if load:
        if not hasattr(env, 'saver'):
            return print('\nError: cannot find saver op')
        print('\nLoading saved model')
        return env.saver.restore(env.sess, 'model/{}'.format(name))

    print('\nTrain model')
    n_sample = X_data.shape[0]
    n_batch = int((n_sample+batch_size-1) / batch_size)
    for epoch in range(epochs):
        print('\nEpoch {0}/{1}'.format(epoch + 1, epochs))

        if shuffle:
            print('\nShuffling data')
            ind = np.arange(n_sample)
            np.random.shuffle(ind)
            X_data = X_data[ind]
            y_data = y_data[ind]

        for batch in range(n_batch):
            print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
            start = batch * batch_size
            end = min(n_sample, start + batch_size)
            env.sess.run(env.train_op, feed_dict={env.x: X_data[start:end],
                                                  env.y: y_data[start:end],
                                                  env.training: True})
        if X_valid is not None:
            evaluate(env, X_valid, y_valid)

    if hasattr(env, 'saver'):
        print('\n Saving model')
        os.makedirs('model', exist_ok=True)
        env.saver.save(env.sess, 'model/{}'.format(name))


def predict(env, X_data, batch_size=128):
    """
    Do inference by running env.ybar.
    """
    print('\nPredicting')
    n_classes = env.ybar.get_shape().as_list()[1]

    n_sample = X_data.shape[0]
    n_batch = int((n_sample+batch_size-1) / batch_size)
    yval = np.empty((n_sample, n_classes))

    for batch in range(n_batch):
        print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
        start = batch * batch_size
        end = min(n_sample, start + batch_size)
        y_batch = env.sess.run(env.ybar, feed_dict={env.x: X_data[start:end]})
        yval[start:end] = y_batch
    print()
    return yval


# To do the boundary analysis for adversarial samples
def boundary_analysis(sess, env, X_data, y_data, batch_size=128):
    """
    Evaluate TF model by running env.loss and env.acc.
    """
    print('\nPredicting')
    n_classes = env.ybar.get_shape().as_list()[1]

    n_sample = X_data.shape[0]
    n_batch = int((n_sample+batch_size-1) / batch_size)
    yval = np.empty((n_sample, n_classes))

    img_count = 0
    diff_confidence_score_total = 0.0
    for batch in range(n_batch):
        #print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
        start = batch * batch_size
        end = min(n_sample, start + batch_size)
        y_batch = env.sess.run(env.ybar, feed_dict={env.x: X_data[start:end]})
        for yy in y_batch:
            yy2 = yy.tolist()
            yy2.sort(reverse=True)
            first_confidence_score = yy2[0]
            second_confidence_score = yy2[1]
            diff_confidence_score = first_confidence_score - second_confidence_score
            diff_confidence_score_total += diff_confidence_score
            img_count += 1
        yval[start:end] = y_batch
    print(diff_confidence_score_total / img_count)
    print()
    return yval


def make_cw(env, X_data, epochs=1, eps=0.1, batch_size=batch_size):
    """
    Generate adversarial via CW optimization.
    """
    print('\nMaking adversarials via CW')

    startTime = time.time()
    n_sample = X_data.shape[0]
    print(X_data.shape)
    n_batch = int((n_sample + batch_size - 1) / batch_size)
    X_adv = np.empty_like(X_data)

    for batch in range(n_batch):
        with Timer('Batch {0}/{1}   '.format(batch + 1, n_batch)):
            end = min(n_sample, (batch+1) * batch_size)
            start = end - batch_size
            feed_dict = {
                env.x_fixed: X_data[start:end],
                env.adv_eps: eps,
                env.adv_y: np.random.choice(n_classes)}

            # reset the noise before every iteration
            env.sess.run(env.noise.initializer)
            for epoch in range(epochs):
                env.sess.run(env.adv_train_op, feed_dict=feed_dict)

            xadv = env.sess.run(env.xadv, feed_dict=feed_dict)
            X_adv[start:end] = xadv
    endTime = time.time()  
    print(endTime - startTime)

    return X_adv

In [9]:
print('\nTraining')

train(env, X_train, y_train, X_valid, y_valid, load=False, epochs=5,
      name='cifar10')


Training

Train model

Epoch 1/5

Shuffling data
 batch 352/352
Evaluating
 loss: 2.2222 acc: 0.1552

Epoch 2/5

Shuffling data
 batch 352/352
Evaluating
 loss: 2.2293 acc: 0.1418

Epoch 3/5

Shuffling data
 batch 352/352
Evaluating
 loss: 2.2914 acc: 0.1476

Epoch 4/5

Shuffling data
 batch 352/352
Evaluating
 loss: 2.4102 acc: 0.1050

Epoch 5/5

Shuffling data
 batch 352/352
Evaluating
 loss: 2.3668 acc: 0.1244

 Saving model


In [13]:
print('\nEvaluating on clean data')

evaluate(env, X_test, y_test)


Evaluating on clean data

Evaluating
 loss: 1.0430 acc: 0.6406


(1.0430326461791992, 0.640625)

In [14]:
print('\nGenerating adversarial data')

# It takes a while to run through the full dataset, thus, we demo the result
# through a smaller dataset.  We could actually find the best parameter
# configuration on a smaller dataset, and then apply to the full dataset.
n_sample = 128
ind = np.random.choice(X_test.shape[0], size=n_sample, replace=False)
#X_test = X_test[ind]
#y_test = y_test[ind]

X_adv = make_cw(env, X_test, eps=0.0, epochs=100)
X_adv1 = make_cw(env, X_test, eps=0.1, epochs=100)
X_adv2 = make_cw(env, X_test, eps=0.2, epochs=100)
X_adv3 = make_cw(env, X_test, eps=0.3, epochs=100)
X_adv4 = make_cw(env, X_test, eps=0.4, epochs=100)
X_adv5 = make_cw(env, X_test, eps=0.5, epochs=100)


Generating adversarial data

Making adversarials via CW
(10000, 32, 32, 3)
Batch 1/313   
------- elapsed 4.1178s --------
Batch 2/313   
------- elapsed 4.2174s --------
Batch 3/313   
------- elapsed 4.1861s --------
Batch 4/313   
------- elapsed 4.3733s --------
Batch 5/313   
------- elapsed 4.3489s --------
Batch 6/313   
------- elapsed 4.2703s --------
Batch 7/313   
------- elapsed 4.3534s --------
Batch 8/313   
------- elapsed 4.2206s --------
Batch 9/313   
------- elapsed 4.2268s --------
Batch 10/313   
------- elapsed 4.2552s --------
Batch 11/313   
------- elapsed 4.2798s --------
Batch 12/313   
------- elapsed 4.2134s --------
Batch 13/313   
------- elapsed 4.2279s --------
Batch 14/313   
------- elapsed 3.9974s --------
Batch 15/313   
------- elapsed 4.2689s --------
Batch 16/313   
------- elapsed 4.3053s --------
Batch 17/313   
------- elapsed 4.1223s --------
Batch 18/313   
------- elapsed 4.5410s --------
Batch 19/313   
------- elapsed 4.0898s --------
Ba

In [14]:
i = 0
for imageArr in X_adv5:
    im = Image.fromarray((imageArr * 255).astype(np.uint8))
    im.save("cw_perturbed_images/0.5/" + str(i) + ".png")
    i += 1

In [21]:
print('\nEvaluating on adversarial data')

evaluate(env, X_adv, y_test)
evaluate(env, X_adv1, y_test)
evaluate(env, X_adv2, y_test)
evaluate(env, X_adv3, y_test)
evaluate(env, X_adv4, y_test)
evaluate(env, X_adv5, y_test)
evaluate(env, X_adv6, y_test)


Evaluating on adversarial data

Evaluating
 loss: 0.9570 acc: 0.6172

Evaluating
 loss: 9.6645 acc: 0.0469

Evaluating
 loss: 13.2707 acc: 0.1484

Evaluating
 loss: 17.0657 acc: 0.1172

Evaluating
 loss: 18.2636 acc: 0.0547

Evaluating
 loss: 12.6715 acc: 0.0781

Evaluating
 loss: 14.8348 acc: 0.0703


(14.834754943847656, 0.0703125)

In [14]:
print('\nDoing boundary analysis on adversarial data')

print('\nDifference in confidence score when eps = 0.0')
boundary_analysis(env.sess, env, X_adv, y_test)
print('\nDifference in confidence score when eps = 0.1')
boundary_analysis(env.sess, env, X_adv1, y_test)
print('\nDifference in confidence score when eps = 0.2')
boundary_analysis(env.sess, env, X_adv2, y_test)
print('\nDifference in confidence score when eps = 0.3')
boundary_analysis(env.sess, env, X_adv3, y_test)
print('\nDifference in confidence score when eps = 0.4')
boundary_analysis(env.sess, env, X_adv4, y_test)
print('\nDifference in confidence score when eps = 0.5')
boundary_analysis(env.sess, env, X_adv5, y_test)
print('\nDifference in confidence score when eps = 0.6')
boundary_analysis(env.sess, env, X_adv6, y_test)


Doing boundary analysis on adversarial data

Difference in confidence score when eps = 0.0

Predicting
0.44111807009539916


Difference in confidence score when eps = 0.1

Predicting
0.7535063742808048


Difference in confidence score when eps = 0.2

Predicting
0.8598167589778352


Difference in confidence score when eps = 0.3

Predicting
0.8420317467980699


Difference in confidence score when eps = 0.4

Predicting
0.82558247359188


Difference in confidence score when eps = 0.5

Predicting
0.8202673532281026


Difference in confidence score when eps = 0.6

Predicting
0.7662050826318847



array([[1.94493310e-10, 9.99976158e-01, 1.53936475e-13, ...,
        5.37800574e-17, 4.79626006e-10, 2.38259490e-05],
       [1.05688500e-03, 9.34082389e-01, 2.05151809e-06, ...,
        3.60122798e-09, 2.32806802e-03, 6.25299513e-02],
       [9.06631747e-09, 9.99622345e-01, 1.88453289e-12, ...,
        1.34455777e-17, 1.20905383e-06, 3.76423326e-04],
       ...,
       [5.21661190e-04, 1.20093336e-03, 7.29028287e-08, ...,
        3.36524477e-06, 1.98453311e-02, 2.19814043e-04],
       [4.03192502e-09, 2.30352092e-03, 3.56550913e-07, ...,
        3.46162476e-09, 2.58027230e-05, 6.73055425e-08],
       [6.35939523e-06, 3.56989426e-06, 6.95611504e-07, ...,
        1.26274813e-06, 8.67588187e-06, 3.27998286e-08]])