In [None]:
import os
import numpy as np
import matplotlib

import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

from functools import partial
from attacks import fgm, jsma, deepfool, cw

matplotlib.use('Agg')
%matplotlib inline

config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.allow_soft_placement = True
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["CUDA_VISIBLE_DEVICES"] = '6'

In [None]:
def model(x, sess, logits=False, training=False):
    with sess.graph.as_default() :
        with tf.variable_scope('conv0'):
            z = tf.layers.conv2d(x, filters=32, kernel_size=[3, 3],
                                 padding='same', activation=tf.nn.relu)
            z = tf.layers.max_pooling2d(z, pool_size=[2, 2], strides=2)

        with tf.variable_scope('flatten'):
            shape = z.get_shape().as_list()
            z = tf.reshape(z, [-1, np.prod(shape[1:])])

        with tf.variable_scope('mlp0'):
            z = tf.layers.dense(z, units=256, activation=tf.nn.relu)

        logits_ = tf.layers.dense(z, units=10, name='logits')
        y = tf.nn.softmax(logits_, name='ybar')

        if logits:
            return y, logits_
        return y
def evaluate(sess, env, X_data, y_data, batch_size=batch_size):
    """
    Evaluate TF model by running env.loss and env.acc.
    """
    print('\nEvaluating')

    n_sample = X_data.shape[0]
    n_batch = int((n_sample+batch_size-1) / batch_size)
    loss, acc = 0, 0

    for batch in range(n_batch):
        print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
        start = batch * batch_size
        end = min(n_sample, start + batch_size)
        cnt = end - start
        batch_loss, batch_acc = sess.run(
            [env.loss, env.acc],
            feed_dict={env.x: X_data[start:end],
                       env.y: y_data[start:end]})
        loss += batch_loss * cnt
        acc += batch_acc * cnt
    loss /= n_sample
    acc /= n_sample

    print(' loss: {0:.4f} acc: {1:.4f}'.format(loss, acc))
    return loss, acc


def train(sess, env, X_data, y_data, X_valid=None, y_valid=None, epochs=1,
          load=False, shuffle=True, batch_size=batch_size, name='model', 
          train_method = 'normal', debug=False):
    """
    Train a TF model by running env.train_op.
    """
    if load:
        if not hasattr(env, 'saver'):
            return print('\nError: cannot find saver op')
        print('\nLoading saved model')
        return env.saver.restore(sess, 'model_pilot/{}'.format(name))

    print('\nTrain model')
    n_sample = X_data.shape[0]
    n_batch = int((n_sample+batch_size-1) / batch_size)
    for epoch in range(epochs):
        print('\nEpoch {0}/{1}'.format(epoch + 1, epochs))

        if shuffle:
            print('\nShuffling data')
            ind = np.arange(n_sample)
            np.random.shuffle(ind)
            X_data = X_data[ind]
            y_data = y_data[ind]

        for batch in range(n_batch):
            print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
            start = batch * batch_size
            end = min(n_sample, start + batch_size)
            if train_method != 'normal':
                ybar_t_1 = sess.run(env.ybar, 
                                    feed_dict={env.x: X_data[start:end],
                                                  env.y: y_data[start:end],
                                                  env.training: False})
                
            if debug :
                print('=============ybar_t_1===================')
                print(ybar_t_1)
                print('************y_data_t_1*************')
                print(y_data[start:end])
                
            [_]=sess.run([env.train_op], feed_dict={env.x: X_data[start:end],
                                              env.y: y_data[start:end],
                                              env.training: True})
            if train_method != 'normal':
                ybar_t=sess.run(env.ybar, feed_dict={env.x: X_data[start:end],
                                                  env.y: y_data[start:end],
                                                  env.training: False})
                y_next_t = update_y(ybar_t_1, ybar_t,y_data[start:end])
                
                if debug :
                    print('------------------ybar_t----------------------------')
                    print(ybar_t)
                    print('------------------ynext_t----------------------------')
                    print(y_next_t)
                    print('++++++++++++++y_data_t++++++++++++')
                
                y_data[start:end] = y_next_t
            
        if X_valid is not None:
            evaluate(sess, env, X_valid, y_valid)

    if hasattr(env, 'saver'):
        print('\n Saving model')
        os.makedirs('model_pilot', exist_ok=True)
        env.saver.save(sess, 'model_pilot/{}'.format(name))
        
    return y_data

def predict(sess, env, X_data, batch_size=batch_size, need_logit=False):
    """
    Do inference by running env.ybar.
    """
    print('\nPredicting')
    n_classes = env.ybar.get_shape().as_list()[1]

    n_sample = X_data.shape[0]
    n_batch = int((n_sample+batch_size-1) / batch_size)
    yval = np.empty((n_sample, n_classes))
    logits = np.empty((n_sample, n_classes))

    for batch in range(n_batch):
        print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
        start = batch * batch_size
        end = min(n_sample, start + batch_size)
        if not need_logit:
            y_batch = sess.run(env.ybar, feed_dict={env.x: X_data[start:end]})
            yval[start:end] = y_batch
        else:
            [y_batch,logit_batch] = sess.run(env.ybar, feed_dict={env.x: X_data[start:end]})
            yval[start:end] = y_batch
            logits[start:end] = logit_batch
    return yval

def update_y(ybar_old, ybar_current, y_target, alpha=alpha, w_target=w_target) :
    prev_bar = ybar_old + alpha*(ybar_current - ybar_old)
    prev_bar = np.minimum(np.maximum(prev_bar, np.zeros(prev_bar.shape)), np.ones(prev_bar.shape))  
    y_new = (1-w_target)*prev_bar/np.sum(prev_bar, axis=1, keepdims=True) + y_target*w_target
    return y_new

class Environment:
    pass

In [None]:
def make_fgsm(sess, env, X_data, epochs=1, eps=0.01, batch_size=batch_size):
    print('\nMaking adversarials via FGSM')

    n_sample = X_data.shape[0]
    n_batch = int((n_sample + batch_size - 1) / batch_size)
    X_adv = np.empty_like(X_data)

    for batch in range(n_batch):
        print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
        start = batch * batch_size
        end = min(n_sample, start + batch_size)
        feed_dict = {env.x: X_data[start:end], env.adv_eps: eps,
                     env.adv_epochs: epochs}
        adv = sess.run(env.x_fgsm, feed_dict=feed_dict)
        X_adv[start:end] = adv
    print()

    return X_adv


def make_jsma(sess, env, X_data, epochs=0.2, eps=1.0, batch_size=batch_size):
    print('\nMaking adversarials via JSMA')

    n_sample = X_data.shape[0]
    n_batch = int((n_sample + batch_size - 1) / batch_size)
    X_adv = np.empty_like(X_data)

    for batch in range(n_batch):
        print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
        start = batch * batch_size
        end = min(n_sample, start + batch_size)
        feed_dict = {
            env.x: X_data[start:end],
            env.adv_y: np.random.choice(n_classes),
            env.adv_epochs: epochs,
            env.adv_eps: eps}
        adv = sess.run(env.x_jsma, feed_dict=feed_dict)
        X_adv[start:end] = adv
    print()

    return X_adv


def make_deepfool(sess, env, X_data, epochs=1, eps=0.01, batch_size=128):
    print('\nMaking adversarials via FGSM')

    n_sample = X_data.shape[0]
    n_batch = int((n_sample + batch_size - 1) / batch_size)
    X_adv = np.empty_like(X_data)

    for batch in range(n_batch):
        print(' batch {0}/{1}'.format(batch + 1, n_batch), end='\r')
        start = batch * batch_size
        end = min(n_sample, start + batch_size)
        feed_dict = {env.x: X_data[start:end], env.adv_epochs: epochs,
                      env.adv_eps:eps}
        adv = sess.run(env.x_deepfool, feed_dict=feed_dict)
        X_adv[start:end] = adv
    print()

    return X_adv

def make_cw(env, X_data, epochs=1, eps=0.1, batch_size=batch_size):
    """
    Generate adversarial via CW optimization.
    """
    print('\nMaking adversarials via CW')

    n_sample = X_data.shape[0]
    n_batch = int((n_sample + batch_size - 1) / batch_size)
    X_adv = np.empty_like(X_data)

    for batch in range(n_batch):
        end = min(n_sample, (batch+1) * batch_size)
        start = end - batch_size
        feed_dict = {
            env.x_fixed: X_data[start:end],
            env.adv_eps: eps,
            env.adv_y: np.random.choice(n_classes)}

        # reset the noise before every iteration
        env.sess.run(env.cw_noise.initializer)
        for epoch in range(epochs):
            env.sess.run(env.cw_adv_train_op, feed_dict=feed_dict)

        xadv = env.sess.run(env.cw_xadv, feed_dict=feed_dict)
        X_adv[start:end] = xadv

    return X_adv

In [None]:
train_method='normal'
method='1.1'
alpha = 10
w_target = 0.9

img_size = 28
img_chan = 1
n_classes = 10
batch_size = 128

In [None]:
def generate_graph(env, sess) :
    with sess.graph.as_default() :
        with tf.variable_scope('model'):
            env.x = tf.placeholder(tf.float32, (None, img_size, img_size, img_chan),
                                   name='x')
            env.y = tf.placeholder(tf.float32, (None, n_classes), name='y')
            env.training = tf.placeholder_with_default(False, (), name='mode')

            env.ybar, env.logits = model(env.x, sess, logits=True, training=env.training)

            with tf.variable_scope('acc'):
                count = tf.equal(tf.argmax(env.y, axis=1), tf.argmax(env.ybar, axis=1))
                env.acc = tf.reduce_mean(tf.cast(count, tf.float32), name='acc')

            with tf.variable_scope('loss'):
                xent = tf.nn.softmax_cross_entropy_with_logits(labels=env.y,
                                                               logits=env.logits)
                env.loss = tf.reduce_mean(xent, name='loss')

            with tf.variable_scope('train_op'):
                optimizer = tf.train.MomentumOptimizer(learning_rate = 0.1,
                                                      momentum=0.1)
                env.train_op = optimizer.minimize(env.loss)

            env.saver = tf.train.Saver()

        with tf.variable_scope('model', reuse=tf.AUTO_REUSE):
            env.adv_eps = tf.placeholder(tf.float32, (), name='adv_eps')
            env.adv_epochs = tf.placeholder(tf.int32, (), name='adv_epochs')
            env.adv_y = tf.placeholder(tf.int32, (), name='adv_y')

            partial_model = partial(model, sess=sess)
            env.x_fgsm = fgm(partial_model, env.x, epochs=env.adv_epochs, eps=env.adv_eps)
            env.x_deepfool = deepfool(partial_model, env.x, epochs=env.adv_epochs, 
                                      eta=env.adv_eps,batch=True)
            env.x_jsma = jsma(partial_model, env.x, env.adv_y, eps=env.adv_eps,
                              epochs=env.adv_epochs)
            optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
            env.x_fixed = tf.placeholder(tf.float32, (batch_size, img_size, img_size, img_chan), name='x_fixed')
            env.cw_adv_train_op, env.cw_xadv, env.cw_noise = cw(partial_model, env.x_fixed,
                                                       y=env.adv_y, eps=env.adv_eps,
                                                       optimizer=optimizer)

In [None]:
print('Loading MNIST')

mnist = tf.keras.datasets.mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = np.reshape(X_train, [-1, img_size, img_size, img_chan])
X_train = X_train.astype(np.float32) / 255
X_test = np.reshape(X_test, [-1, img_size, img_size, img_chan])
X_test = X_test.astype(np.float32) / 255

to_categorical = tf.keras.utils.to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

y_train_origin = np.array(y_train)

pw = 5
single_weight = 1/(n_classes-1+pw)
y_train_t_0 = np.array([(y * (pw-1) * single_weight) + single_weight for y in y_train])

print('Spliting data')

if train_method!='normal':
    y_train = np.array(y_train_t_0)

ind = np.random.permutation(X_train.shape[0])
X_train, y_train = X_train[ind], y_train[ind]

VALIDATION_SPLIT = 0.1
n = int(X_train.shape[0] * (1-VALIDATION_SPLIT))
X_valid = X_train[n:]
X_train = X_train[:n]
y_valid = y_train[n:]
y_train = y_train[:n]
y_origin = np.array(y_train)

In [None]:
teacher_sess = tf.InteractiveSession(config=config, graph=tf.Graph())
teacher_env = Environment()
teacher_env.sess = teacher_sess

generate_graph(teacher_env, teacher_sess)
teacher_sess.run(tf.global_variables_initializer())
teacher_sess.run(tf.local_variables_initializer())

print('Training Teacher')
y_data = train(teacher_sess, teacher_env, X_train, y_train, X_valid, y_valid, load=True, 
               epochs=30, shuffle=False, name='mnist_teacher_adding_deepfool',train_method="normal")

evaluate(teacher_sess, teacher_env, X_test, y_test)

In [None]:
eps_candidate_cw = [1.5**i * 1e-4 for i in (list(range(3,23)))]
eps_candidate_fgsm = [(0.001*i+0.0015) for i in (list(range(0,20)))]
eps_candidate_jsma = [(0.1*i+0.15) for i in (list(range(0,20)))]
eps_candidate_deepfool = [i * 1e-1 - 1.1 for i in (list(range(1,21)))]

T_x_fgsm_res = []
T_x_jsma_res = []
T_X_cw_res = []
T_X_deepfool_res = []

for (eps_cw,eps_fgsm,eps_jsma,eps_deepfool) in zip(eps_candidate_cw,eps_candidate_fgsm,eps_candidate_jsma,eps_candidate_deepfool):
    x_fgsm_advs = make_fgsm(teacher_sess, teacher_env, X_test, eps=eps_fgsm, epochs=10)
    result = evaluate(teacher_sess, teacher_env, x_fgsm_advs, y_test)
    T_x_fgsm_res.append(result[1])
    x_jsma_advs = make_jsma(teacher_sess, teacher_env, X_test, eps=eps_jsma, epochs=40)
    result = evaluate(teacher_sess, teacher_env, x_jsma_advs, y_test)
    T_x_jsma_res.append(result[1])
    X_cw = make_cw(teacher_env, X_test, eps=eps_cw, epochs=100)
    result = evaluate(teacher_sess, teacher_env, X_cw, y_test)
    T_X_cw_res.append(result[1])
    x_deepfool_advs = make_deepfool(teacher_sess, teacher_env, X_test, eps=eps_deepfool, epochs=10)
    result = evaluate(democracy_learning_sess, democracy_learning_env, x_deepfool_advs, y_test)
    T_X_deepfool_res.append(result[1])

In [None]:
print('Generating label for student')
y_train_logit = predict(teacher_sess, teacher_env, X_train)
ind = np.random.permutation(X_train.shape[0])
X_train, y_train_logit = X_train[ind], y_train_logit[ind]

VALIDATION_SPLIT = 0.1
n = int(X_train.shape[0] * (1-VALIDATION_SPLIT))
X_valid = X_train[n:]
X_train = X_train[:n]
y_valid_logit = y_train_logit[n:]
y_train_logit = y_train_logit[:n]

In [None]:
student_sess = tf.InteractiveSession(config=config, graph=tf.Graph())
student_env = Environment()
student_env.sess = student_sess

generate_graph(student_env, student_sess)
student_sess.run(tf.global_variables_initializer())
student_sess.run(tf.local_variables_initializer())

print('Training Student')
y_data = train(student_sess, student_env, X_train, y_train_logit, X_valid, y_valid_logit, load=True, 
               epochs=30, shuffle=False, name='mnist_student_adding_deepfool',train_method="normal")
evaluate(student_sess, student_env, X_test, y_test)

In [None]:
S_x_fgsm_res = []
S_x_jsma_res = []
S_X_cw_res = []
S_X_deepfool_res = []

for (eps_cw,eps_fgsm,eps_jsma,eps_deepfool) in zip(eps_candidate_cw,eps_candidate_fgsm,eps_candidate_jsma,eps_candidate_deepfool):
    x_fgsm_advs = make_fgsm(student_sess, student_env, X_test, eps=eps_fgsm, epochs=10)
    result = evaluate(student_sess, student_env, x_fgsm_advs, y_test)
    S_x_fgsm_res.append(result[1])
    x_jsma_advs = make_jsma(student_sess, student_env, X_test, eps=eps_jsma, epochs=40)
    result = evaluate(student_sess, student_env, x_jsma_advs, y_test)
    S_x_jsma_res.append(result[1])
    X_cw = make_cw(student_env, X_test, eps=eps_cw, epochs=100)
    result = evaluate(student_sess, student_env, X_cw, y_test)
    S_X_cw_res.append(result[1])
    x_deepfool_advs = make_deepfool(student_sess, student_env, X_test, eps=eps_deepfool, epochs=10)
    result = evaluate(democracy_learning_sess, democracy_learning_env, x_deepfool_advs, y_test)
    S_X_deepfool_res.append(result[1])

In [None]:
democracy_learning_sess = tf.InteractiveSession(config=config, graph=tf.Graph())
democracy_learning_env = Environment()
democracy_learning_env.sess = democracy_learning_sess

generate_graph(democracy_learning_env, democracy_learning_sess)
democracy_learning_sess.run(tf.global_variables_initializer())
democracy_learning_sess.run(tf.local_variables_initializer())

print('\nTraining')
print(alpha)
print(w_target)
print(pw)

y_data = train(democracy_learning_sess, democracy_learning_env, X_train, y_train, X_valid, y_valid, 
               load=False, epochs=30, shuffle=False, 
               name='mnist_method_adding_deepfool_near_normal_w_target_0.9_pw_30'+method,method="1.1")

evaluate(democracy_learning_sess, democracy_learning_env, X_test, y_test)

In [None]:
x_fgsm_res = []
x_jsma_res = []
X_cw_res = []
X_deepfool_res = []
for (eps_cw,eps_fgsm,eps_jsma,eps_deepfool) in zip(eps_candidate_cw,eps_candidate_fgsm,eps_candidate_jsma,eps_candidate_deepfool):
    x_fgsm_advs = make_fgsm(democracy_learning_sess, democracy_learning_env, X_test, eps=eps_fgsm, epochs=10)
    result = evaluate(democracy_learning_sess, democracy_learning_env, x_fgsm_advs, y_test)
    x_fgsm_res.append(result[1])
    x_jsma_advs = make_jsma(democracy_learning_sess, democracy_learning_env, X_test, eps=eps_jsma, epochs=40)
    result = evaluate(democracy_learning_sess, democracy_learning_env, x_jsma_advs, y_test)
    x_jsma_res.append(result[1])
    X_cw = make_cw(democracy_learning_env, X_test, eps=eps_cw, epochs=100)
    result = evaluate(democracy_learning_sess, democracy_learning_env, X_cw, y_test)
    X_cw_res.append(result[1])
    x_deepfool_advs = make_deepfool(democracy_learning_sess, democracy_learning_env, X_test,eps=eps_deepfool, epochs=10)
    result = evaluate(democracy_learning_sess, democracy_learning_env, x_deepfool_advs, y_test)
    X_deepfool_res.append(result[1])

In [None]:
fig1 = plt.figure(figsize=(20,10))
cw_plot = fig1.add_subplot(111)
cw_plot.plot(T_X_cw_res,label='teacher')
cw_plot.plot(S_X_cw_res,label='student')
cw_plot.plot(X_cw_res,label='our method')
cw_plot.legend(loc=3)

fig2 = plt.figure(figsize=(20,10))
fgsm_plot = fig2.add_subplot(111)
fgsm_plot.plot(T_x_fgsm_res,label='teacher')
fgsm_plot.plot(S_x_fgsm_res,label='student')
fgsm_plot.plot(x_fgsm_res,label='our method')
fgsm_plot.legend(loc=3)

fig3 = plt.figure(figsize=(20,10))
jsma_plot = fig3.add_subplot(111)
jsma_plot.plot(T_x_jsma_res,label='teacher')
jsma_plot.plot(S_x_jsma_res,label='student')
jsma_plot.plot(x_jsma_res,label='our method')
jsma_plot.legend(loc=3)

fig3 = plt.figure(figsize=(20,10))
deepfool_plot = fig3.add_subplot(111)
deepfool_plot.plot(T_x_deepfool_res,label='teacher')
deepfool_plot.plot(S_x_deepfool_res,label='student')
deepfool_plot.plot(x_deepfool_res,label='our method')
deepfool_plot.legend(loc=3)