In [1]:
import os
import sys
import time
import struct
import numpy as np
from scipy import misc
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt
%matplotlib inline

import theano
import theano.tensor as T

import lasagne
import lasagne.layers as L
import lasagne.regularization as R
import lasagne.nonlinearities as NL
import lasagne.objectives as O
import lasagne.init as I

Couldn't import dot_parser, loading of dot files will not be possible.


Using gpu device 0: GeForce GTX TITAN X (CNMeM is disabled, cuDNN 5103)


# Step 0) Load MNIST digits dataset

In [2]:
def LoadMNIST(filename):
    with open(filename, 'rb') as f:
        header = bytearray(f.read(4)) #read the header bytes
        datatype = 'int32'  
        typecode = header[2] # read the type byte
        if(typecode==8):
            datatype = '>u1';
        elif(typecode == 12):
            datatype = '>f4'
        elif(typecode == 14):
            datatype = '>f8'    
        print datatype
        
        size = np.fromfile(f, '>i4', header[3]) #read the dimensions of the array  
        dshape = ()
        count = 1
        for d in range(0,len(size)):
            count *= size[d]
            dshape += (size[d], )        
        print dshape
        data = np.fromfile(f, datatype, count) #read the array data
        return data.reshape(dshape)   

In [3]:
mnist_trn_img = '/home/styvesg/Documents/PostDoc/Programs/JupyterNotebooks/DataSet/train-images.idx3-ubyte'
mnist_trn_lab = '/home/styvesg/Documents/PostDoc/Programs/JupyterNotebooks/DataSet/train-labels.idx1-ubyte'
mnist_val_img = '/home/styvesg/Documents/PostDoc/Programs/JupyterNotebooks/DataSet/t10k-images.idx3-ubyte'
mnist_val_lab = '/home/styvesg/Documents/PostDoc/Programs/JupyterNotebooks/DataSet/t10k-labels.idx1-ubyte'

In [33]:
mnist_trn_data = LoadMNIST(mnist_trn_img).astype(np.float32)[:,np.newaxis,:,:]
mnist_val_data = LoadMNIST(mnist_val_img).astype(np.float32)[:,np.newaxis,:,:]
mnist_trn_label = LoadMNIST(mnist_trn_lab)
mnist_val_label = LoadMNIST(mnist_val_lab)

>u1
(60000, 28, 28)
>u1
(10000, 28, 28)
>u1
(60000,)
>u1
(10000,)


# Some helpful functions

In [5]:
def conv(_in, *args, **kwargs):
    return L.Conv2DLayer(_in, *args, untie_biases=False, flip_filters=True, convolution=theano.tensor.nnet.conv2d, **kwargs)

def batch_norm(_in, *args, **kwargs):
    return L.batch_norm(_in, beta=None, gamma=None, *args, **kwargs)

def avg(_in, *args, **kwargs):
    return L.Pool2DLayer(_in, *args, ignore_border=True, mode='average_exc_pad', **kwargs)

def flatten(_in, **kwargs):
    return L.FlattenLayer(_in, **kwargs)

def sigmoid(_in, **kwargs):
    return L.NonlinearityLayer(_in, nonlinearity=NL.sigmoid)

def tanh(_in, **kwargs):
    return L.NonlinearityLayer(_in, nonlinearity=NL.tanh)

In [6]:
def print_lasagne_net(_net, skipnoparam=True):
    layers = L.get_all_layers(_net)
    for l in layers:
        out = l.output_shape
        par = l.get_params()
        if skipnoparam and len(par)==0 and l.name==None:
            continue
        print "Layer\t: %s\nName\t: %s\nType\t: %s" % (l, l.name, type(l))
        print "Shape\t: %s" % (out,)
        if len(par)>0:
            print "Params"
            for p in par:
                print "        |-- {:<10}: {:}".format(p.name, p.get_value().shape,)
        print "\n"

In [7]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.arange(len(inputs))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]
        
        
def embedding(x, c=10): 
    '''SparseIntegerEmbedding'''
    y = np.zeros((len(x), c), dtype=np.float32)
    y[np.arange(len(x)), x] = 1
    return y

# Step 1) Define the classifier net

In [8]:
npc = 1         # # of channels in image
npx = 28        # # of pixels width/height of images
ny = 10         # # of classes

lr = 2e-3       # initial learning rate for adam
l2 = 2.0e-3     # l2 weight decay

In [13]:
_X = T.tensor4() # a theano variable representing the input data
_Y = T.matrix()  # a theano variable representing the class label

def Classifier(__X):
    _input = L.InputLayer((None, npc, npx, npx), input_var=__X, name='X')
    _drop1 = L.DropoutLayer(_input, p=0.2, rescale=True)
    _conv1 = batch_norm(conv(_drop1, num_filters=64, filter_size=7, stride=3, pad=0, W=I.Normal(0.02), b=None, nonlinearity=NL.rectify))  
    _drop2 = L.DropoutLayer(_conv1, p=0.2, rescale=True)
    _conv2 = batch_norm(conv(_drop2, num_filters=128, filter_size=3, stride=1, pad=0, W=I.Normal(0.02), b=None, nonlinearity=NL.rectify))
    _pool2 = L.MaxPool2DLayer(_conv2, pool_size=2)     
    _fc1 = batch_norm(L.DenseLayer(L.FlattenLayer(_pool2, outdim=2), 256, W=I.Normal(0.02), b=None, nonlinearity=NL.rectify))
    _fc2 = L.DenseLayer(_fc1, ny, W=I.Normal(0.02), b=None, nonlinearity=NL.sigmoid) 
    return _fc2

In [40]:
# Look at the details of the network shape that we are attempting
_cls = Classifier(_X)
print_lasagne_net(_cls, skipnoparam=True)

Layer	: <lasagne.layers.input.InputLayer object at 0x7f0a85e09ad0>
Name	: X
Type	: <class 'lasagne.layers.input.InputLayer'>
Shape	: (None, 1, 28, 28)


Layer	: <lasagne.layers.conv.Conv2DLayer object at 0x7f0a85d5dbd0>
Name	: None
Type	: <class 'lasagne.layers.conv.Conv2DLayer'>
Shape	: (None, 64, 8, 8)
Params
        |-- W         : (64, 1, 7, 7)


Layer	: <lasagne.layers.normalization.BatchNormLayer object at 0x7f0aa4650350>
Name	: None
Type	: <class 'lasagne.layers.normalization.BatchNormLayer'>
Shape	: (None, 64, 8, 8)
Params
        |-- mean      : (64,)
        |-- inv_std   : (64,)


Layer	: <lasagne.layers.conv.Conv2DLayer object at 0x7f0adefeea50>
Name	: None
Type	: <class 'lasagne.layers.conv.Conv2DLayer'>
Shape	: (None, 128, 6, 6)
Params
        |-- W         : (128, 64, 3, 3)


Layer	: <lasagne.layers.normalization.BatchNormLayer object at 0x7f0adefee9d0>
Name	: None
Type	: <class 'lasagne.layers.normalization.BatchNormLayer'>
Shape	: (None, 128, 6, 6)
Params
        |-- m

# Step 2) Compile the theano expressions

In [41]:
tfX = np.float32
lrt = theano.shared(tfX(lr))

cls_params = L.get_all_params(_cls, trainable=True)

_cls_reg = l2 * R.regularize_layer_params(_cls, R.l2) # regularization loss
_cls_trn_pred = L.get_output(_cls, deterministic=False) # training prediction
_cls_trn_loss = O.squared_error(_cls_trn_pred, _Y).mean() + _cls_reg # total training loss

_cls_val_pred = L.get_output(_cls, deterministic=True) #validation prediction
_cls_val_loss = O.squared_error(_cls_val_pred, _Y).mean() + _cls_reg # total validation loss
_cls_val_acc = T.mean(T.eq(T.argmax(_cls_val_pred, axis=1), T.argmax(_Y, axis=1)), dtype=theano.config.floatX) # validation accuracies

_class = T.extra_ops.to_one_hot(T.argmax(_cls_val_pred, axis=1), ny)

cls_updates = lasagne.updates.adam(_cls_trn_loss, cls_params, learning_rate=lrt, beta1=0.5, epsilon=1e-12) 

print 'COMPILING'
t = time.time()
cls_trn_fn = theano.function([_X, _Y], [_cls_trn_loss, _cls_val_acc], updates=cls_updates)
cls_val_fn = theano.function([_X, _Y], [_cls_val_loss, _cls_val_acc])
cls_pred_fn = theano.function([_X], _class)
print '%.2f seconds to compile theano functions'%(time.time()-t)

COMPILING
3.68 seconds to compile theano functions


# Reloading 

In [None]:
# Uncomment to reload values from file

# #cls_param_file = open(params_dir + cls_filename, 'rb')
# #cls_param_value = pickle.load(cls_param_file)
# #cls_param_file.close()
# #L.set_all_param_values(_cls, cls_param_value)  

# Training the model. 

-Decrease learning rate by 'perc_decay'% every epoch after 'niter' epoch at initial rate 'lr'.

-Record the history of the training and validation accuracies.

In [None]:
niter = 10        # # of iter at starting learning rate
perc_decay = 10.0 # # of iter to linearly decay learning rate to zero
num_epochs = 50
batch_size = 500

lrt.set_value(tfX(lr))

trn_hist = []
val_hist = []

for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    trn_err = 0
    trn_acc = 0
    trn_batches = 0
    start_time = time.time()
    for xb, yb in tqdm(iterate_minibatches(mnist_trn_data, mnist_trn_label, batch_size, shuffle=True)):
        err, acc = cls_trn_fn(xb, embedding(yb))
        trn_err += err
        trn_acc += acc
        trn_batches += 1

    # And a full pass over the validation data:
    val_err = 0
    val_acc = 0
    val_batches = 0
    for xb, yb in iterate_minibatches(mnist_val_data, mnist_val_label, batch_size, shuffle=False):
        err, acc = cls_val_fn(xb, embedding(yb))
        val_err += err
        val_acc += acc
        val_batches += 1

    trn_hist += [trn_acc / trn_batches,]
    val_hist += [val_acc / val_batches,]
    # Then we print the results for this epoch:
    print("\n  Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time))
    print("  training loss:       {:.6f}".format(trn_err / trn_batches))
    print("  validation accuracy: {:.2f} %".format(trn_acc / trn_batches * 100))
    print("  validation loss:     {:.6f}".format(val_err / val_batches))
    print("  validation accuracy: {:.2f} %".format(val_acc / val_batches * 100))
    
    if epoch > niter:
        lrt.set_value(tfX(lrt.get_value() * (1.0 - perc_decay / 100.0)))     
    sys.stdout.flush()

120it [00:02, 53.00it/s]


  Epoch 1 of 50 took 2.333s
  training loss:       0.028682
  validation accuracy: 93.05 %
  validation loss:     0.014054
  validation accuracy: 98.04 %



120it [00:02, 52.85it/s]


  Epoch 2 of 50 took 2.327s
  training loss:       0.014761
  validation accuracy: 98.40 %
  validation loss:     0.012355
  validation accuracy: 98.58 %



120it [00:02, 56.91it/s]


  Epoch 3 of 50 took 2.178s
  training loss:       0.013732
  validation accuracy: 98.77 %
  validation loss:     0.011852
  validation accuracy: 98.95 %



120it [00:02, 54.26it/s]


  Epoch 4 of 50 took 2.268s
  training loss:       0.013298
  validation accuracy: 98.95 %
  validation loss:     0.011632
  validation accuracy: 99.01 %



120it [00:02, 54.02it/s]


  Epoch 5 of 50 took 2.277s
  training loss:       0.012995
  validation accuracy: 99.08 %
  validation loss:     0.011083
  validation accuracy: 99.04 %



120it [00:02, 54.81it/s]


  Epoch 6 of 50 took 2.244s
  training loss:       0.012784
  validation accuracy: 99.15 %
  validation loss:     0.011124
  validation accuracy: 99.15 %



120it [00:02, 54.23it/s]


  Epoch 7 of 50 took 2.230s
  training loss:       0.012610
  validation accuracy: 99.25 %
  validation loss:     0.011171
  validation accuracy: 98.94 %



120it [00:02, 53.41it/s]


  Epoch 8 of 50 took 2.202s
  training loss:       0.012470
  validation accuracy: 99.28 %
  validation loss:     0.010958
  validation accuracy: 99.18 %



120it [00:02, 57.00it/s]


  Epoch 9 of 50 took 2.161s
  training loss:       0.012370
  validation accuracy: 99.34 %
  validation loss:     0.010956
  validation accuracy: 99.04 %



120it [00:02, 51.52it/s]


  Epoch 10 of 50 took 2.386s
  training loss:       0.012283
  validation accuracy: 99.35 %
  validation loss:     0.010867
  validation accuracy: 99.02 %



120it [00:02, 52.55it/s]


  Epoch 11 of 50 took 2.366s
  training loss:       0.012216
  validation accuracy: 99.37 %
  validation loss:     0.010701
  validation accuracy: 99.17 %



120it [00:02, 53.18it/s]


  Epoch 12 of 50 took 2.310s
  training loss:       0.012176
  validation accuracy: 99.42 %
  validation loss:     0.010843
  validation accuracy: 99.20 %



120it [00:02, 53.05it/s]


  Epoch 13 of 50 took 2.318s
  training loss:       0.012050
  validation accuracy: 99.46 %
  validation loss:     0.010617
  validation accuracy: 99.21 %



120it [00:02, 55.41it/s]


  Epoch 14 of 50 took 2.220s
  training loss:       0.011977
  validation accuracy: 99.49 %
  validation loss:     0.011097
  validation accuracy: 99.13 %



120it [00:02, 51.78it/s]


  Epoch 15 of 50 took 2.393s
  training loss:       0.011877
  validation accuracy: 99.54 %
  validation loss:     0.010496
  validation accuracy: 99.19 %



120it [00:02, 55.78it/s]


  Epoch 16 of 50 took 2.205s
  training loss:       0.011784
  validation accuracy: 99.58 %
  validation loss:     0.010414
  validation accuracy: 99.22 %



120it [00:02, 56.57it/s]


  Epoch 17 of 50 took 2.180s
  training loss:       0.011714
  validation accuracy: 99.57 %
  validation loss:     0.010506
  validation accuracy: 99.22 %



120it [00:02, 51.88it/s]


  Epoch 18 of 50 took 2.220s
  training loss:       0.011655
  validation accuracy: 99.61 %
  validation loss:     0.010448
  validation accuracy: 99.18 %



120it [00:02, 51.81it/s]


  Epoch 19 of 50 took 2.373s
  training loss:       0.011625
  validation accuracy: 99.63 %
  validation loss:     0.010566
  validation accuracy: 99.16 %



120it [00:02, 53.79it/s]


  Epoch 20 of 50 took 2.285s
  training loss:       0.011575
  validation accuracy: 99.66 %
  validation loss:     0.010433
  validation accuracy: 99.26 %



120it [00:02, 55.74it/s]


  Epoch 21 of 50 took 2.206s
  training loss:       0.011557
  validation accuracy: 99.67 %
  validation loss:     0.010490
  validation accuracy: 99.19 %



120it [00:02, 52.59it/s]


  Epoch 22 of 50 took 2.339s
  training loss:       0.011489
  validation accuracy: 99.69 %
  validation loss:     0.010564
  validation accuracy: 99.18 %



120it [00:02, 54.06it/s]


  Epoch 23 of 50 took 2.274s
  training loss:       0.011450
  validation accuracy: 99.69 %
  validation loss:     0.010448
  validation accuracy: 99.23 %



120it [00:02, 50.37it/s]


  Epoch 24 of 50 took 2.452s
  training loss:       0.011432
  validation accuracy: 99.71 %
  validation loss:     0.010398
  validation accuracy: 99.22 %



84it [00:01, 49.75it/s]

In [None]:
plt.figure(figsize=(20,5))
plt.plot(trn_hist, 'b', label='trn', lw=2)
plt.plot(val_hist, 'r', label='val', lw=2)
plt.axhline(1./ny, color='k', linestyle='--', label='chance')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.ylim(0.95, 1.0)
plt.legend(bbox_to_anchor=(1., 0.3))
plt.grid()

In [17]:
params_dir = os.getcwd()
cls_filename = "cls_params.pkl"

cls_param_file = open(params_dir + cls_filename, 'wb')
cls_param_values = L.get_all_param_values(_cls)
pickle.dump(cls_param_values, cls_param_file)
cls_param_file.close() 