# Coursework 1

This notebook is intended to be used as a starting point for your experiments. The instructions can be found in the instructions file located under spec/coursework1.pdf. The methods provided here are just helper functions. If you want more complex graphs such as side by side comparisons of different experiments you should learn more about matplotlib and implement them. Before each experiment remember to re-initialize neural network weights and reset the data providers so you get a properly initialized experiment. For each experiment try to keep most hyperparameters the same except the one under investigation so you can understand what the effects of each are.

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

def train_model_and_plot_stats(
        model, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True):
    
    # As well as monitoring the error over training also monitor classification
    # accuracy i.e. proportion of most-probable predicted classes being equal to targets
    data_monitors={'acc': lambda y, t: (y.argmax(-1) == t.argmax(-1)).mean()}

    # Use the created objects to initialise a new Optimiser instance.
    optimiser = Optimiser(
        model, error, learning_rule, train_data, valid_data, data_monitors, notebook=notebook)

    # Run the optimiser for 5 epochs (full passes through the training set)
    # printing statistics every epoch.
    stats, keys, run_time = optimiser.train(num_epochs=num_epochs, stats_interval=stats_interval)

    # Plot the change in the validation and training set error over training.
    fig_1 = plt.figure(figsize=(8, 4))
    ax_1 = fig_1.add_subplot(111)
    for k in ['error(train)', 'error(valid)']:
        ax_1.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_1.legend(loc=0)
    ax_1.set_xlabel('Epoch number')

    # Plot the change in the validation and training set accuracy over training.
    fig_2 = plt.figure(figsize=(8, 4))
    ax_2 = fig_2.add_subplot(111)
    for k in ['acc(train)', 'acc(valid)']:
        ax_2.plot(np.arange(1, stats.shape[0]) * stats_interval, 
                  stats[1:, keys[k]], label=k)
    ax_2.legend(loc=0)
    ax_2.set_xlabel('Epoch number')
    
    return stats, keys, run_time, fig_1, ax_1, fig_2, ax_2

In [None]:
# The below code will set up the data providers, random number
# generator and logger objects needed for training runs. As
# loading the data from file take a little while you generally
# will probably not want to reload the data providers on
# every training run. If you wish to reset their state you
# should instead use the .reset() method of the data providers.
import numpy as np
import logging
from mlp.data_providers import MNISTDataProvider, EMNISTDataProvider

# Seed a random number generator
seed = 11102019 
rng = np.random.RandomState(seed)
batch_size = 100
# Set up a logger object to print info about the training run to stdout
logger = logging.getLogger()
logger.setLevel(logging.INFO)
logger.handlers = [logging.StreamHandler()]

# Create data provider objects for the MNIST data set
train_data = EMNISTDataProvider('train', batch_size=batch_size, rng=rng)
valid_data = EMNISTDataProvider('valid', batch_size=batch_size, rng=rng)
test_data = EMNISTDataProvider('test', batch_size=batch_size, rng=rng)

In [None]:
# The model set up code below is s2045458 mlp cousework multilayer model
# The model implements dropout layer and weight penalties
# The parameters I investigate here are learning rate, dropout probability, penalty type, penalty coefficient

from mlp.layers import AffineLayer, SoftmaxLayer, SigmoidLayer, ReluLayer, DropoutLayer
from mlp.errors import CrossEntropySoftmaxError
from mlp.models import MultipleLayerModel
from mlp.initialisers import ConstantInit, GlorotUniformInit
from mlp.learning_rules import AdamLearningRule
from mlp.optimisers import Optimiser
from mlp.penalties import L1Penalty, L2Penalty
from mlp import DEFAULT_SEED


#setup hyperparameters
num_epochs = 100
stats_interval = 1
input_dim, output_dim, hidden_dim = 784, 47, 128

weights_init = GlorotUniformInit(rng=rng)
biases_init = ConstantInit(0.)

# here is the hyperparameter I will investigate
learning_rates = [0.0001] #[0.005,0.001,0.0005,0.0001]
l1_coefficients = [0.0001] #[0.1,0.01,0.001,0.0001,0.00001]
l2_coefficients = [0.001] #[0.1,0.01,0.001,0.0001,0.00001]
dropout_probs = [0.8] #0-1

error = CrossEntropySoftmaxError()
# Use a basic gradient descent learning rule
learning_rule = AdamLearningRule()

# learning rate experiment 
for lr in learning_rates:
    weights_init = GlorotUniformInit(rng=rng)
    biases_init = ConstantInit(0.)
    model = MultipleLayerModel([
        AffineLayer(input_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init), 
        ReluLayer(),
        AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
    ])
    
    learning_rule_learning_rate = AdamLearningRule(learning_rate = lr)
    
    stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
    model, error, learning_rule_learning_rate, train_data, valid_data, num_epochs, stats_interval, notebook=True)
    np.savetxt('Learning_rate_'+str(lr)+'_data.txt', stats, fmt='%f',delimiter=' ')
    fig_1.savefig('Learning_rate_'+str(lr)+'_error.pdf')
    fig_2.savefig('Learning_rate_'+str(lr)+'_accuracy.pdf')

# L1 penalty coefficient experiemnt  
for l1_coefficient in l1_coefficients:
    
    model_L1 = MultipleLayerModel([
        AffineLayer(input_dim, hidden_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient)), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient)),
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient)), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient)), 
        ReluLayer(),
        AffineLayer(hidden_dim, output_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient))
    ])
    
    stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
    model_L1, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)
    np.savetxt('L1Penalty_'+str(l1_coefficient)+'_data.txt', stats, fmt='%f',delimiter=' ')
    fig_1.savefig('L1Penalty_'+str(l1_coefficient)+'_error.pdf')
    fig_2.savefig('L1Penalty_'+str(l1_coefficient)+'_accuracy.pdf')

# L2 penalty coefficient experiment
for l2_coefficient in l2_coefficients:
    
    model_L2 = MultipleLayerModel([
        AffineLayer(input_dim, hidden_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient)), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient)),
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient)), 
        ReluLayer(),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient)), 
        ReluLayer(),
        AffineLayer(hidden_dim, output_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient))
    ])
    stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
    model_L2, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)
    np.savetxt('L2Penalty_'+str(l2_coefficient)+'_data.txt', stats, fmt='%f',delimiter=' ')
    fig_1.savefig('L2Penalty_'+str(l2_coefficient)+'_error.pdf')
    fig_2.savefig('L2Penalty_'+str(l2_coefficient)+'_accuracy.pdf')

# Dropout experiemnt 
for dropout_prob in dropout_probs:
    
    model_Dropout = MultipleLayerModel([
        AffineLayer(input_dim, hidden_dim, weights_init, biases_init),
        ReluLayer(),
        DropoutLayer(rng = rng, incl_prob=dropout_prob),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
        ReluLayer(),
        DropoutLayer(rng = rng, incl_prob=dropout_prob),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
        ReluLayer(),
        DropoutLayer(rng = rng, incl_prob=dropout_prob),
        AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init),
        ReluLayer(),
        DropoutLayer(rng = rng, incl_prob=dropout_prob),
        AffineLayer(hidden_dim, output_dim, weights_init, biases_init)
    ])
    stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
    model_Dropout, error, learning_rule, train_data, valid_data, num_epochs, stats_interval, notebook=True)
    np.savetxt('Dropout_prob_'+str(dropout_prob)+'_data.txt', stats, fmt='%f',delimiter=' ')
    fig_1.savefig('Dropout_prob_'+str(dropout_prob)+'_error.pdf')
    fig_2.savefig('Dropout_prob_'+str(dropout_prob)+'_accuracy.pdf')

# L1 penalty and dropout experiment
for lr in learning_rates:
    for l1_coefficient in l1_coefficients:
        for dropout_prob in dropout_probs:
            model_Dropout = MultipleLayerModel([
                AffineLayer(input_dim, hidden_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient)),
                ReluLayer(),
                DropoutLayer(rng = rng, incl_prob=dropout_prob),
                AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient)),
                ReluLayer(),
                DropoutLayer(rng = rng, incl_prob=dropout_prob),
                AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient)),
                ReluLayer(),
                DropoutLayer(rng = rng, incl_prob=dropout_prob),
                AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient)),
                ReluLayer(),
                DropoutLayer(rng = rng, incl_prob=dropout_prob),
                AffineLayer(hidden_dim, output_dim, weights_init, biases_init,weights_penalty=L1Penalty(l1_coefficient))
            ])

            learning_rule_learning_rate = AdamLearningRule(learning_rate = lr)

            stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
            model_Dropout, error, learning_rule_learning_rate, train_data, test_data, num_epochs, stats_interval, notebook=True)
            np.savetxt('A_Dropout_prob_'+str(dropout_prob)+'_L1Penalty_'+str(l1_coefficient)+'_LearningRate_'+str(lr)+'_data.txt', stats, fmt='%f',delimiter=' ')
            fig_1.savefig('A_Dropout_prob_'+str(dropout_prob)+'_L1Penalty_'+str(l1_coefficient)+'_LearningRate_'+str(lr)+'_error.pdf')
            fig_2.savefig('A_Dropout_prob_'+str(dropout_prob)+'_L1Penalty_'+str(l1_coefficient)+'_LearningRate_'+str(lr)+'_accuracy.pdf')

# L2 penalty and dropout experiment
for lr in learning_rates:
    for l2_coefficient in l2_coefficients:
        for dropout_prob in dropout_probs:
            model_Dropout = MultipleLayerModel([
                AffineLayer(input_dim, hidden_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient)),
                ReluLayer(),
                DropoutLayer(rng = rng, incl_prob=dropout_prob),
                AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient)),
                ReluLayer(),
                DropoutLayer(rng = rng, incl_prob=dropout_prob),
                AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient)),
                ReluLayer(),
                DropoutLayer(rng = rng, incl_prob=dropout_prob),
                AffineLayer(hidden_dim, hidden_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient)),
                ReluLayer(),
                DropoutLayer(rng = rng, incl_prob=dropout_prob),
                AffineLayer(hidden_dim, output_dim, weights_init, biases_init,weights_penalty=L2Penalty(l2_coefficient))
            ])

            learning_rule_learning_rate = AdamLearningRule(learning_rate = lr)

            stats, keys, run_time, fig_1, ax_1, fig_2, ax_2 = train_model_and_plot_stats(
            model_Dropout, error, learning_rule_learning_rate, train_data, test_data, num_epochs, stats_interval, notebook=True)
            np.savetxt('A_Dropout_prob_'+str(dropout_prob)+'_L2Penalty_'+str(l2_coefficient)+'_LearningRate_'+str(lr)+'_data.txt', stats, fmt='%f',delimiter=' ')
            fig_1.savefig('A_Dropout_prob_'+str(dropout_prob)+'_L2Penalty_'+str(l2_coefficient)+'_LearningRate_'+str(lr)+'_error.pdf')
            fig_2.savefig('A_Dropout_prob_'+str(dropout_prob)+'_L2Penalty_'+str(l2_coefficient)+'_LearningRate_'+str(lr)+'_accuracy.pdf')
