# lasagne
* lasagne is a library for neural network building and training
* it's a low-level library with almost seamless integration with theano

For a demo we shall solve the same digit recognition problem, but at a different scale
* images are now 28x28
* 10 different digits
* 50k samples

In [28]:
#!nvidia-smi

In [8]:
%env THEANOI_FLAGS=device_gpu,floatX=float32

env: THEANOI_FLAGS=device_gpu,floatX=float32


In [None]:
import numpy as np
import theano
import theano.tensor as T
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from mnist import load_dataset
X_train, y_train, X_val, y_val, X_test, y_test = load_dataset()
print (X_train.shape, y_train.shape)

In [43]:
input_X = T.tensor4("X")

#input dimension (None means "Arbitrary" and only works at the first axes [samples])
input_shape = [None, 1, 28, 28]

target_y = T.vector("target Y integer", dtype='int32')

Defining network architecture

In [44]:
import lasagne
from lasagne.layers import *

#Input layer (auxilary)
l1 = InputLayer(shape = input_shape, input_var=input_X)

#fully connected layer, that takes input layer and applies 50 neurons to it.
# nonlinearity here is sigmoid as in logistic regression
# you can give a name to each layer (optional)
##l2 = DenseLayer(l1,num_units=50,nonlinearity = lambda x: 1./(1.+2.71**-x))

# This OK:
#l2 = DenseLayer(l1,num_units=50, nonlinearity = lasagne.nonlinearities.sigmoid, name='dense1')
#l2 = DropoutLayer(l2, p=0.25)
#l3 = DenseLayer(l2, num_units=500, nonlinearity=lasagne.nonlinearities.tanh)
#l_out = DenseLayer(l3, num_units=10, nonlinearity=lasagne.nonlinearities.softmax)


# Alternative: Improve by convoluting
l2=Conv2DLayer(l1,32,(3,3),nonlinearity=lasagne.nonlinearities.sigmoid)
l3=Pool2DLayer(l2,(2,2))
l4=DenseLayer(l3,512,nonlinearity=lasagne.nonlinearities.sigmoid)
l4=DropoutLayer(l4,0.5)
l_out = DenseLayer(l4,num_units = 10,nonlinearity=lasagne.nonlinearities.softmax)

#fully connected output layer that takes dense_1 as input and has 10 neurons (1 for each digit)
#We use softmax nonlinearity to make probabilities add up to 1
#l_out = DenseLayer(l4,num_units = 10,nonlinearity=lasagne.nonlinearities.softmax)


In [45]:
#network prediction (theano-transformation)
#y_predicted = lasagne.layers.get_output(l_out)
y_predicted = lasagne.layers.get_output(l_out, deterministic=True)  # added deterministic=True

In [46]:
#theano.printing.debugprint(y_predicted)

In [47]:
#all network weights (shared variables)
all_weights = lasagne.layers.get_all_params(l_out)
print(all_weights)

[W, b, W, b, W, b]


### Than you could simply
* define loss function manually
* compute error gradient over all weights
* define updates
* But that's a whole lot of work and life's short
  * not to mention life's too short to wait for SGD to converge

Instead, we shall use Lasagne builtins

In [53]:
#Mean categorical crossentropy as a loss function - similar to logistic loss but for multiclass targets
loss = lasagne.objectives.categorical_crossentropy(y_predicted, target_y).mean()
# MH: tested additionally this, but it's extra
# MH: loss = T.nnet.categorical_crossentropy(y_predicted, target_y).mean()

#MH: Installed the latest lasagne with pip installl
accuracy = lasagne.objectives.categorical_accuracy(y_predicted, target_y).mean()
#MH: If not installed, newest version from https://github.com/Lasagne/Lasagne/blob/master/lasagne/objectives.py 

print('acc, loss:', accuracy, loss)

#This function computes gradient AND composes weight updates just like you did earlier
#updates_sgd = lasagne.updates.sgd(loss, all_weights,learning_rate=0.01)
#updates_sgd = lasagne.updates.rmsprop(loss, all_weights, learning_rate=0.01)
updates_sgd = lasagne.updates.adam(loss, all_weights, learning_rate=0.01)

acc, loss: mean mean


In [54]:
#function that computes loss and updates weights
train_fun = theano.function([input_X,target_y],[loss,accuracy],updates= updates_sgd)

#function that just computes accuracy
accuracy_fun = theano.function([input_X,target_y],accuracy)

### That's all, now let's train it!
* We got a lot of data, so it's recommended that you use SGD
* So let's implement a function that splits the training sample into minibatches

In [55]:
# An auxilary function that returns mini-batches for neural network training

#Parameters
# inputs - a tensor of images with shape (many, 1, 28, 28), e.g. X_train
# outputs - a vector of answers for corresponding images e.g. Y_train
#batch_size - a single number - the intended size of each batches

def iterate_minibatches(inputs, targets, batchsize):
    assert len(inputs) == len(targets)
    indices = np.arange(len(inputs))
    np.random.shuffle(indices)
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        excerpt = indices[start_idx:start_idx + batchsize]
        yield inputs[excerpt], targets[excerpt]

# Training loop

In [56]:
import time

#num_epochs = 100 #amount of passes through the data
num_epochs = 25 #amount of passes through the data

batch_size = 50 #number of samples processed at each function call

for epoch in range(num_epochs):
    # In each epoch, we do a full pass over the training data:
    train_err = 0
    train_acc = 0
    train_batches = 0
    start_time = time.time()
    for batch in iterate_minibatches(X_train, y_train,batch_size):
        inputs, targets = batch
        train_err_batch, train_acc_batch= train_fun(inputs, targets)
        train_err += train_err_batch
        train_acc += train_acc_batch
        train_batches += 1

    # And a full pass over the validation data:
    val_acc = 0
    val_batches = 0
    for batch in iterate_minibatches(X_val, y_val, batch_size):
        inputs, targets = batch
        val_acc += accuracy_fun(inputs, targets)
        val_batches += 1

    
    # Then we print the results for this epoch:
    print("Epoch {} of {} took {:.3f}s".format(
        epoch + 1, num_epochs, time.time() - start_time))

    print("  training loss (in-iteration):\t\t{:.6f}".format(train_err / train_batches))
    print("  train accuracy:\t\t{:.2f} %".format(
        train_acc / train_batches * 100))
    print("  validation accuracy:\t\t{:.2f} %".format(
        val_acc / val_batches * 100))

Epoch 1 of 25 took 508.592s
  training loss (in-iteration):		2.493938
  train accuracy:		9.92 %
  validation accuracy:		10.90 %
Epoch 2 of 25 took 426.320s
  training loss (in-iteration):		2.471374
  train accuracy:		10.05 %
  validation accuracy:		10.90 %


KeyboardInterrupt: 

In [34]:
test_acc = 0
test_batches = 0
for batch in iterate_minibatches(X_test, y_test, 500):
    inputs, targets = batch
    acc = accuracy_fun(inputs, targets)
    test_acc += acc
    test_batches += 1
print("Final results:")
print("  test accuracy:\t\t{:.2f} %".format(
    test_acc / test_batches * 100))

if test_acc / test_batches * 100 > 99:
    print ("Achievement unlocked: 80lvl Warlock!")
else:
    print ("We need more magic!")

Final results:
  test accuracy:		96.81 %
We need more magic!


# Now improve it!

* Moar layers!
* Moar units!
* Different nonlinearities!