In [42]:
import numpy as np
import math
import os
import keras
import copy
import tensorflow as tf
from cleverhans.utils_mnist import data_mnist
from cleverhans.utils import batch_indices
from cleverhans.attacks import DeepFool
from cleverhans.utils_keras import KerasModelWrapper

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten

In [50]:
NUM_NOISES = 10
NUM_PIXELS = 784
NUM_IMAGES = 60000 # number of original (uncorrupted) training points
NUM_CLASSES = 10 # 10 classes, 0 to 9 for MNIST

In [None]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("data/MNIST", one_hot=True)
# size (60000, 10) = (NUM_IMAGES, NUM_CLASSES) -- Kojin's data includes both 'train' and 'validation'
# each row is a one-hot encoding of the 10 classes
images = np.vstack((mnist.train.images, mnist.validation.images))
labels = np.vstack((mnist.train.labels, mnist.validation.labels))

In [47]:
# advx is of length 392, the changed part of the mnist data
# we need to recreate the full perturbed data for robust training
# we call these advf (adversarial full)


# advf_all = np.zeros((NUM_IMAGES, NUM_PIXELS, NUM_NOISES))
# indices_all = np.zeros((NUM_PIXELS, NUM_NOISES))

for i in range(NUM_NOISES):
    advx = np.load("../../data/advx_{}.npy".format(i))
    ind  = np.load("../../data/indices_{}.npy".format(i))
    advf = copy.deepcopy(images)
    advf[:,ind] = advx
    advf.dump("../../data/advf_{}.npy".format(i))
    # advx_all[:,:,i]  = np.load("../../data/advx_{}.npy".format(i))
    # indices_all[:,i] = np.load("../../data/indices_{}.npy".format(i))

In [52]:
advf_all = np.zeros((NUM_IMAGES, NUM_PIXELS, NUM_NOISES))
for i in range(NUM_NOISES):
    advf_all[:,:,i]  = np.load("../../data/advf_{}.npy".format(i))

In [63]:
# copied from logistic_noise_generator.ipynb, I didn't use this
def logistic_regression_model(input_ph,num_inputs, nb_classes=10):
    model = Sequential()
    model.add(Dense(nb_classes,input_shape=(num_inputs,)))
    model.add(Activation('softmax'))
    return model

In [56]:
# I implement the Composite Method here

NUM_ORACLE_ITER = 2 # Big T in the paper
MINIBATCH_SIZE = 100  # don't really need minibatch in logistic regression
NUM_TRAINING_ITER = 100 # how many iterations to do the network training like SGD within the oracle 

weights_distribution = np.full((NUM_ORACLE_ITER, NUM_NOISES), 1./NUM_NOISES) # each row is w_t, simplex vector over noises
losses = np.zeros((NUM_ORACLE_ITER, NUM_NOISES)) # has value L_i(x_t) for each i in noises and t in oracle_iter


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    

# number of times the Bayesian oracle is invoked
for oracle_iter in range(NUM_ORACLE_ITER):
    # compute the weights for the distributional oracle for this iteration
    eta = np.sqrt(np.log(NUM_NOISES)/(2*NUM_ORACLE_ITER))
    unnormalized_current_weights = np.exp(eta*losses[0:oracle_iter,:].sum(axis=0))
    weights_distribution[oracle_iter,:] = unnormalized_current_weights/np.sum(unnormalized_current_weights)
    # before creating a tensorflow session, first create a computational graph
    

    # the oracle is logistic regression
    train_data   = tf.placeholder(tf.float32, shape=(MINIBATCH_SIZE, NUM_PIXELS, NUM_NOISES))
    train_labels = tf.placeholder(tf.float32, shape=(MINIBATCH_SIZE, NUM_CLASSES))
    logit_weight = tf.Variable(tf.zeros([NUM_PIXELS, NUM_CLASSES]))
    logit_bias   = tf.Variable(tf.zeros([NUM_CLASSES]))
    combined_loss = 0
    # loss_total_all_noises = tf.Variable(tf.zeros([NUM_NOISES]))
    loss_total_list = []
    
    # in theory, this additional for-loop can be implemented as an extra layer on NN
    # but the logit_weight and logit_bias must be shared and I'm not sure how
    # I don't think this is any slower, but it's definitely clearer
    for noise_type in range(NUM_NOISES):
        # simple linear (the logistic part is in the cross_entropy_with_logits)
        unscaled_pred = (tf.matmul(train_data[:,:,noise_type], logit_weight)
                             + logit_bias)
        # loss_vector_this_noise is a vector of length MINIBATCH_SIZE
        loss_vector_this_noise = tf.nn.softmax_cross_entropy_with_logits(
                        labels=train_labels,
                        logits=unscaled_pred,
                        )
        loss_total_this_noise = tf.reduce_sum(loss_vector_this_noise)
        loss_total_list.append(loss_total_this_noise)
        combined_loss += weights_distribution[oracle_iter,noise_type]*loss_total_this_noise
    loss_total_all_noises = tf.stack(loss_total_list)
    train_step = tf.train.AdamOptimizer(1e-4).minimize(combined_loss)
    
    # now that the computational graph is finished, start doing the computation
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for training_iter in range(NUM_TRAINING_ITER):
            minibatch_subset = np.random.choice(np.arange(NUM_IMAGES), size=MINIBATCH_SIZE, replace=False)
            minibatch_data = advf_all[minibatch_subset,:,:]
            minibatch_labels = labels[minibatch_subset,:]
            train_step.run(feed_dict={train_data:minibatch_data,train_labels:minibatch_labels})
        # training is done
        # keep the loss for this oracle iteration in 'losses' 
        losses[oracle_iter,:] = loss_total_all_noises.eval(
            feed_dict={train_data:minibatch_data,train_labels:minibatch_labels})
            
            

In [6]:
keras.__version__

'2.1.4'

In [7]:
tf.__version__

'1.4.1'

In [5]:
# ignore this one, this is just copied from a keras tutorial
# I didn't use keras in my code

import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD

# Generate dummy data
import numpy as np
x_train = np.random.random((1000, 20))
y_train = keras.utils.to_categorical(np.random.randint(10, size=(1000, 1)), num_classes=10)
x_test = np.random.random((100, 20))
y_test = keras.utils.to_categorical(np.random.randint(10, size=(100, 1)), num_classes=10)

model = Sequential()
# Dense(64) is a fully-connected layer with 64 hidden units.
# in the first layer, you must specify the expected input data shape:
# here, 20-dimensional vectors.
model.add(Dense(64, activation='relu', input_dim=20))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

model.fit(x_train, y_train,
          epochs=20,
          batch_size=128)
score = model.evaluate(x_test, y_test, batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [46]:
temp = np.full((3,3),9)

In [60]:
temp = np.array([[1,2,3], [4,5,6],[7,8,9]])

In [62]:
temp[[0,2],:]

array([[1, 2, 3],
       [7, 8, 9]])

In [59]:
for _ in range(10):
    print(np.random.choice(np.arange(4), size=3, replace=True))

[1 2 3]
[2 1 3]
[3 1 3]
[3 3 2]
[3 1 1]
[3 0 1]
[2 3 1]
[3 0 3]
[0 0 3]
[0 0 0]


In [68]:
tf.__version__

'1.1.0'

In [69]:
keras.__version__

'2.2.0'