In [1]:
#importing
import theano
from theano import tensor as T
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
import numpy as np
import pickle as cPickle
import os
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
matplotlib.use('Agg')
#from pprint import pprint

because the backend has already been chosen;
matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.



In [2]:
#Definitions
# Convert into correct type for theano
def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

# Weights are shared theano variables
def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

# RMSProp to update weights
def RMSprop(cost, params, lr=0.001, rho=0.9, epsilon=1e-6):
    grads = T.grad(cost=cost, wrt=params)
    updates = []
    for p, g in zip(params, grads):
        acc = theano.shared(p.get_value() * 0.)
        acc_new = rho * acc + (1 - rho) * g ** 2
        gradient_scaling = T.sqrt(acc_new + epsilon)
        g = g / gradient_scaling
        updates.append((acc, acc_new))
        updates.append((p, p - lr * g))
    return updates

# Dropout regularization 
def dropout(X, p=0.):
    if p > 0:
        retain_prob = 1 - p
        X *= srng.binomial(X.shape, p=retain_prob, dtype=theano.config.floatX)
        X /= retain_prob
    return X

# Neural network model, 3 fully connected layers
def model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden):
	# Input layer: dropout + relu 
    X = dropout(X, p_drop_input)
    h = T.nnet.relu(T.dot(X, w_h))

	# Hidden layer: dropout + relu 
    h = dropout(h, p_drop_hidden)
    h2 = T.nnet.relu(T.dot(h, w_h2))
	
	# Output layer: dropout + softmax 
    h2 = dropout(h2, p_drop_hidden)
    py_x = T.nnet.softmax(T.dot(h2, w_o))
    return h, h2, py_x

def one_hot(x,n):
	if type(x) == list:
		x = np.array(x)
	x = x.flatten()
	o_h = np.zeros((len(x),n))
	o_h[np.arange(len(x)),x] = 1
	return o_h

def mnist(ntrain=60000,ntest=10000,onehot=True):
	data_dir = os.path.join(datasets_dir,'mnist/')
	fd = open(os.path.join(data_dir,'train-images-idx3-ubyte'))
	loaded = np.fromfile(file=fd,dtype=np.uint8)
	trX = loaded[16:].reshape((60000,28*28)).astype(float)

	fd = open(os.path.join(data_dir,'train-labels-idx1-ubyte'))
	loaded = np.fromfile(file=fd,dtype=np.uint8)
	trY = loaded[8:].reshape((60000))

	fd = open(os.path.join(data_dir,'t10k-images-idx3-ubyte'))
	loaded = np.fromfile(file=fd,dtype=np.uint8)
	teX = loaded[16:].reshape((10000,28*28)).astype(float)

	fd = open(os.path.join(data_dir,'t10k-labels-idx1-ubyte'))
	loaded = np.fromfile(file=fd,dtype=np.uint8)
	teY = loaded[8:].reshape((10000))

	trX = trX/255.
	teX = teX/255.

	trX = trX[:ntrain]
	trY = trY[:ntrain]

	teX = teX[:ntest]
	teY = teY[:ntest]

	if onehot:
		trY = one_hot(trY, 10)
		teY = one_hot(teY, 10)
	else:
		trY = np.asarray(trY)
		teY = np.asarray(teY)

	return trX,teX,trY,teY

def plot_mnist_digit(image1, image2, name1, name2):
    global count_attack
    image1 = np.reshape(image1,[1,784])
    image2 = np.reshape(image2,[1,784])
    #print 'test image confidence' , np.mean(predict_conf(image1)), 'adversarial image confidence', np.mean(predict_conf(image2))
    if (predict(image1) != predict(image2)):
        count_attack = count_attack + 1

In [3]:
#Loading MNIST data
datasets_dir = 'media/datasets/'
srng = RandomStreams()
TRAINING = True

print('MNIST data Loaded')
trX, teX, trY, teY = mnist(onehot=True)

# Initialize theano variables for X, Y, and shared variables for weights
X = T.fmatrix()
Y = T.fmatrix()

if TRAINING:
    # For training of the net, we initialize weights to random values
    w_h = init_weights((784, 625))
    w_h2 = init_weights((625, 625))
    w_o = init_weights((625, 10))
    params = [w_h, w_h2, w_o]
else:
    # To run experiments, just read weights we learned before
    print('Loading model...')
    with open('LearnedParamsL1_2.model','rb') as fp:
        params = cPickle.load(fp)
    w_h, w_h2, w_o = params

# Dropout model for training
noise_h, noise_h2, noise_py_x = model(X, w_h, w_h2, w_o, 0.2, 0.5)
# Use all-weights model for prediction
h, h2, py_x = model(X, w_h, w_h2, w_o, 0., 0.)
y_x = T.argmax(py_x, axis=1)

# To find confidence of test set use the following value of y_x
y_x1 = T.max(py_x, axis = 1)
# Define cost and update theano expressions

l1 = abs(w_h).sum() + abs(w_h2).sum() + abs(w_o).sum()
l2 = (w_h**2).sum() + (w_h2**2).sum() + (w_o**2).sum()

MNIST data Loaded


In [9]:
#l1 and l2 coefficients & Training Data
#=================== Parameters to chnge ===============================#
l1coef = [0.0,0.00001,0.0001]
l2coef = [0.0,0.00001,0.0001]
#=======================================================================#

for i in l1coef:
    for j in l2coef:
        print("l1coef = %f, l2coef = %f" %(i,j))
        cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) + i * l1 + j * l2
        updates = RMSprop(cost, params, lr=0.001)

        # Define train and predict theano functions
        train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)

        predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
        predict_conf = theano.function(inputs=[X], outputs=y_x1, allow_input_downcast=True)
        print('Training MNIST data...')
        if TRAINING:
            # Train in 50 epochs
            for k in range(50):
                # Select minibatch and train
                for start, end in zip(range(0, len(trX), 128), range(128, len(trX), 128)):
                    cost = train(trX[start:end], trY[start:end])
                # Show test set accuracy. Its cost is not used for optimization,
                # it is just to show progress.
                print(k, ':  ', np.mean(np.argmax(teY, axis=1) == predict(teX)))
                # In each step save the learned weights
                with open('LearnedParamsL1.model','wb') as fp:
                    cPickle.dump(params,fp)
            print("Accuracy is:    ",np.mean(np.argmax(teY, axis=1) == predict(teX)))
            print("Confidence is:    ", np.mean(predict_conf(teX)))

l1coef = 0.000000, l2coef = 0.000000
Training MNIST data...
0 :   0.9816
1 :   0.9831
2 :   0.9824
3 :   0.9824
4 :   0.9833
5 :   0.983
6 :   0.9824
7 :   0.9836
8 :   0.983
9 :   0.9841
10 :   0.9838
11 :   0.9832
12 :   0.9847
13 :   0.9847
14 :   0.9835
15 :   0.9836
16 :   0.9839
17 :   0.9836
18 :   0.9823
19 :   0.9849
20 :   0.9845
21 :   0.9843
22 :   0.9831
23 :   0.9839
24 :   0.9837
25 :   0.9839
26 :   0.9842
27 :   0.9842
28 :   0.9839
29 :   0.985
30 :   0.9844
31 :   0.9837
32 :   0.9832
33 :   0.9845
34 :   0.984
35 :   0.9845
36 :   0.9838
37 :   0.9833
38 :   0.9846
39 :   0.9846
40 :   0.9839
41 :   0.9847
42 :   0.9839
43 :   0.9846
44 :   0.9845
45 :   0.9858
46 :   0.9847
47 :   0.9849
48 :   0.9857
49 :   0.9846
Accuracy is:     0.9846
Confidence is:     0.992364662768
l1coef = 0.000000, l2coef = 0.000010
Training MNIST data...
0 :   0.9836
1 :   0.9856
2 :   0.9843
3 :   0.985
4 :   0.9841
5 :   0.9852
6 :   0.985
7 :   0.986
8 :   0.9849
9 :   0.9847
10 :   0.