In [1]:
%load_ext autoreload

%autoreload 2

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
from datetime import datetime

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    plt.savefig(datetime.now().strftime("%Y/%m/%d %H:%M:%S") + ".pdf")
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider, EMNISTDataProvider

# Seed a random number generator
seed = 10102016 
rng = np.random.RandomState(seed)
batch_size = 100
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)



['inputs', 'targets']
['inputs', 'targets']


In [None]:
from mlp.layers import *
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import *
from mlp.optimisers import Optimiser

In [None]:
# The model set up code below is provided as a starting point.
# You will probably want to add further code cells for the
# different experiments you run.

from mlp.layers import *
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import *
from mlp.optimisers import Optimiser

#setup hyperparameters
learning_rate = 0.1
num_epochs = 50
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 100
kernel_dim = 5

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)
model = MultipleLayerModel([
    ReshapeLayer((1,28,28,)) , #(1, 28, 28)
    PrintLayer(),
    ConvolutionalLayer(1, 5, 28, 28, 5, 5) , # (5, 24, 24)
    PrintLayer(),
    ReluLayer(), # (5, 24, 24)
    PrintLayer(),
    MaxPoolingLayer() ,
    PrintLayer(),
    ReshapeLayer((5*12*12,)),
    PrintLayer(),
    AffineLayer(5*12*12, hidden_dim, weights_init, biases_init), 
    PrintLayer(),
    ReluLayer(),
    PrintLayer(),
    AffineLayer(hidden_dim, output_dim, weights_init, biases_init), # 47
    PrintLayer()
])

error = CrossEntropySoftmaxError()
# Use a basic gradient descent learning rule
learning_rule = GradientDescentLearningRule()

#Remember to use notebook=False when you write a script to be run in a terminal
_ = train_model_and_plot_stats(
    model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)

PrintLayer fprop: (100, 1, 28, 28)
PrintLayer fprop: (100, 5, 24, 24)
PrintLayer fprop: (100, 5, 24, 24)
PrintLayer fprop: (100, 5, 12, 12)
PrintLayer fprop: (100, 720)
PrintLayer fprop: (100, 100)
PrintLayer fprop: (100, 100)
PrintLayer fprop: (100, 47)


A Jupyter Widget

A Jupyter Widget

PrintLayer bprop: (100, 47)
PrintLayer param: (100, 47)
PrintLayer bprop: (100, 100)
PrintLayer param: (100, 100)
PrintLayer bprop: (100, 100)
PrintLayer param: (100, 100)
PrintLayer bprop: (100, 720)
PrintLayer param: (100, 720)
PrintLayer bprop: (100, 5, 12, 12)
PrintLayer param: (100, 5, 12, 12)
PrintLayer bprop: (100, 5, 24, 24)
PrintLayer param: (100, 5, 24, 24)
PrintLayer bprop: (100, 5, 24, 24)
PrintLayer param: (100, 5, 24, 24)
PrintLayer bprop: (100, 1, 28, 28)
PrintLayer param: (100, 1, 28, 28)


Epoch 1: 330.4s to complete
    error(train)=3.85e+00, acc(train)=2.57e-02, error(valid)=3.85e+00, acc(valid)=2.53e-02


A Jupyter Widget

Epoch 2: 369.2s to complete
    error(train)=3.84e+00, acc(train)=2.93e-02, error(valid)=3.84e+00, acc(valid)=2.96e-02


A Jupyter Widget

Epoch 3: 251.0s to complete
    error(train)=3.82e+00, acc(train)=3.51e-02, error(valid)=3.82e+00, acc(valid)=3.59e-02


A Jupyter Widget

Epoch 4: 234.3s to complete
    error(train)=3.76e+00, acc(train)=5.51e-02, error(valid)=3.76e+00, acc(valid)=5.59e-02


A Jupyter Widget

Epoch 5: 228.2s to complete
    error(train)=3.57e+00, acc(train)=1.30e-01, error(valid)=3.58e+00, acc(valid)=1.32e-01


A Jupyter Widget

Epoch 6: 231.3s to complete
    error(train)=3.06e+00, acc(train)=2.50e-01, error(valid)=3.07e+00, acc(valid)=2.48e-01


A Jupyter Widget

Epoch 7: 230.4s to complete
    error(train)=2.29e+00, acc(train)=4.20e-01, error(valid)=2.28e+00, acc(valid)=4.23e-01


A Jupyter Widget

Epoch 8: 227.6s to complete
    error(train)=1.81e+00, acc(train)=5.16e-01, error(valid)=1.81e+00, acc(valid)=5.19e-01


A Jupyter Widget

Epoch 9: 229.3s to complete
    error(train)=1.59e+00, acc(train)=5.66e-01, error(valid)=1.58e+00, acc(valid)=5.68e-01


A Jupyter Widget

Epoch 10: 229.8s to complete
    error(train)=1.46e+00, acc(train)=5.94e-01, error(valid)=1.45e+00, acc(valid)=5.93e-01


A Jupyter Widget

Epoch 11: 232.9s to complete
    error(train)=1.38e+00, acc(train)=6.15e-01, error(valid)=1.38e+00, acc(valid)=6.15e-01


A Jupyter Widget

Epoch 12: 307.1s to complete
    error(train)=1.33e+00, acc(train)=6.28e-01, error(valid)=1.32e+00, acc(valid)=6.29e-01


A Jupyter Widget