# Task 1 - Learn the basics of Keras and TensorFlow

Note that two different persons have worked on this task. The part for MLP's was performed by one person and the part for CNN's was performed by the other. Hence there are slight differences in the coding for implementing the exploration of the hyperparameter idea but the same conceptuality applies.

In [None]:
import tensorflow 
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import load_model
from tensorflow.keras.datasets import mnist 
from tensorflow.keras.models import Sequential
from keras.utils.np_utils import to_categorical
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import RMSprop
from functools import partial

import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import sys
import time

### MLP's
Here we studied the hyperparameters for MLP's

In [None]:
# Parameters we will test:
# this function returns the values we will use for our reference model
def get_default_params():

  nlay = 3                                      # number of layers
  k_reg = 'none'                                # kernel_regularizer
  b_reg = 'none'                                # bias_regularizer
  width = [100,30,10]                           # layer width
  act_fun = ['relu', 'relu', 'softmax']         # activation functions
  weights = 'glorot_uniform'                    # weights
  dropout = 0                                   # dropout rate
  loss = 'sparse_categorical_crossentropy'      # loss function
  opt = 'sgd'                                   # optimizer
  n_epochs = 40                                 # number of epochs
  bs = 32                                       # batch size

  return nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs

In [None]:
# Class to create and train our MLP
class MLP(object):
  def __init__(self):
    pass


  # Function to collect the MNIST or fashion MNIST data
  def load_data(self, data, loss):
    (x_train, y_train), (x_test, y_test) = data.load_data()
    
    # Scaling the pixel intensities
    x_train, x_test = x_train / 255, x_test / 255

    # some loss functions require the y data to be categorical (one-hot encoding) 
    if loss != 'sparse_categorical_crossentropy':
      y_train = to_categorical(y_train)
      y_test = to_categorical(y_test)

    return x_train, y_train, x_test, y_test


  # Function to create a custom MLP
  def create_mlp(self, nlay, k_reg, b_reg, width, act_fun, weights, dropout):
    model = keras.Sequential()
    model.add(layers.Flatten(input_shape=[28,28]))
    
    # Not including output layer so that dropout is added after last hidden layer only
    for i in range(nlay-1):
        model.add(layers.Dense(width[i], activation=act_fun[i],
                  kernel_initializer=weights))

    # Adding dropout to last hidden layer    
    model.add(Dropout(dropout))  

    # Output layer
    model.add(layers.Dense(width[-1], activation=act_fun[-1],
                            kernel_initializer=weights))

    # Kernel regularizer
    if k_reg != 'none':
      for i,j in enumerate(k_reg):
        model.layers[i].kernel_regularizer = j

    # Bias regularizer
    if b_reg != 'none':
      for i,j in enumerate(b_reg):
        model.layers[i].bias_initializer = 'ones'
        model.layers[i].bias_regularizer = j

    return model


  # Function to compile our custom MLP
  def compile_model(self, model, loss, optimizer):
    model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
    return model

  # Function to train our custom MLP
  def train_model(self, model, x_train, y_train, n_epochs, bs):
    start_time = time.time()
    history = model.fit(x_train, y_train, epochs=n_epochs,
                    validation_split=0.2, batch_size=bs, 
                    callbacks=[early_stop])
    print('Runtime: %s seconds' % (time.time() - start_time))

    return history

In [None]:
# Function to construct, train, and save our custom MLP model 
def make_model(filename, nlay, k_reg, b_reg, width, act_fun, weights,
               dropout, loss, opt, n_epochs, bs, dataset, save=True):
  global early_stop

  # Dataset will be either the MNIST or fashion MNIST
  if dataset == 'digits':
    data = keras.datasets.mnist
  elif dataset == 'fashion':
    data = keras.datasets.fashion_mnist
  else:
    print('dataset must be "digits" or "fashion"')
    sys.exit()

  # Stopping criterion 
  early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

  # Running previous codes
  mlp = MLP()
  X_train, Y_train, X_test, Y_test = mlp.load_data(data, loss)
  model = mlp.create_mlp(nlay, k_reg, b_reg, width, act_fun, weights, dropout)
  model = mlp.compile_model(model, loss, opt)
  history = mlp.train_model(model, X_train, Y_train, n_epochs, bs)

  # Saving
  if save==True:
    savepath_d = '/content/drive/MyDrive/Colab Notebooks/savefiles_mnist_digits/'
    savepath_f = '/content/drive/MyDrive/Colab Notebooks/savefiles_mnist_fashion/'
 
    if dataset == 'digits':
      savepath = savepath_d
    else:
      savepath = savepath_f  

    hist_df = pd.DataFrame(history.history) 
    with open(savepath + filename + '.csv', mode='w') as f:
      hist_df.to_csv(f)
    
    # Test case performance
    test_case = np.array(model.evaluate(X_test, Y_test))
    np.save(savepath + filename + '.npy', test_case)
    with open(filename + '.npy', 'wb') as f:
      np.save(f, test_case)

  return model

In [None]:
# Creating functions to make our model for a range of values for each parameters

# Reference model
def test_reference(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()
  filename = 'defaults'
  print(filename)
  model = make_model(filename=filename, dataset=data, 
                      nlay=nlay, k_reg=k_reg, b_reg=b_reg, width=width, 
                      act_fun=act_fun, weights=weights, dropout=dropout, 
                      loss=loss, opt=opt, n_epochs=n_epochs, bs=bs)
     
# Weights
def test_weights(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()
  weights = ['zeros', 'random_normal', 'ones']
  filename = ['weights_zeros', 'weights_rand', 'weights_ones']
  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay, k_reg=k_reg, b_reg=b_reg, width=width, 
                        act_fun=act_fun, weights=weights[i], dropout=dropout, 
                        loss=loss, opt=opt, n_epochs=n_epochs, bs=bs)
     
# Dropouts
def test_dropouts(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()
  dropout = [0.2, 0.5, 0.9]
  filename = ['dropout02', 'dropout05', 'dropout09']
  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay, k_reg=k_reg, b_reg=b_reg, width=width, 
                        act_fun=act_fun, weights=weights, dropout=dropout[i], 
                        loss=loss, opt=opt, n_epochs=n_epochs, bs=bs)
    
# Optimizers    
def test_optimizer(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()
  opt = ['RMSprop', 'adam', 'nadam']
  filename = ['opti_RMSprop', 'opti_adam', 'opti_nadam']
  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay, k_reg=k_reg, b_reg=b_reg, width=width, 
                        act_fun=act_fun, weights=weights, dropout=dropout, 
                        loss=loss, opt=opt[i], n_epochs=n_epochs, bs=bs)  

# Kernel regularizer    
def test_k_reg(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()
  k_reg = ['l1', 'l2']
  filename = ['kreg_l1', 'kreg_l2']
  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay, k_reg=k_reg[i], b_reg=b_reg, width=width, 
                        act_fun=act_fun, weights=weights, dropout=dropout, 
                        loss=loss, opt=opt, n_epochs=n_epochs, bs=bs)  
    
# Bias regularizer 
def test_b_reg(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()
  b_reg = ['l1', 'l2']
  filename = ['breg_l1', 'breg_l2']
  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay, k_reg=k_reg, b_reg=b_reg[i], width=width, 
                        act_fun=act_fun, weights=weights, dropout=dropout, 
                        loss=loss, opt=opt, n_epochs=n_epochs, bs=bs)   

# Loss function
def test_loss(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()
  loss = ['categorical_crossentropy', 'poisson', 'kullback_leibler_divergence']
  filename = ['loss_cat', 'loss_poisson', 'loss_kl']
  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay, k_reg=k_reg, b_reg=b_reg, width=width, 
                        act_fun=act_fun, weights=weights, dropout=dropout, 
                        loss=loss[i], opt=opt, n_epochs=n_epochs, bs=bs)   

# Batch Size
def test_bs(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()
  bs = [100, 500, 1000]
  filename = ['bs_100', 'bs_500', 'bs_1000']
  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay, k_reg=k_reg, b_reg=b_reg, width=width, 
                        act_fun=act_fun, weights=weights, dropout=dropout, 
                        loss=loss, opt=opt, n_epochs=n_epochs, bs=bs[i])


# Widths
def test_widths(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()
  widths = [[50,50,50], [10,30,100], [500,250,50]]
  filename = ['width_50_50_50', 'width_10_30_100', 'width_500_250_50']
  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay, k_reg=k_reg, b_reg=b_reg, width=widths[i], 
                        act_fun=act_fun, weights=weights, dropout=dropout, 
                        loss=loss, opt=opt, n_epochs=n_epochs, bs=bs)    

# Activation Functions
def test_act_fun(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()

  act_funs = [['relu', 'relu', 'softplus'], ['relu', 'relu', 'sigmoid'], \
              ['relu', 'relu', 'exponential']]
  filename = ['act_plus', 'act_sig', 'act_exp']

  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay, k_reg=k_reg, b_reg=b_reg, width=width, 
                        act_fun=act_funs[i], weights=weights, dropout=dropout, 
                        loss=loss, opt=opt, n_epochs=n_epochs, bs=bs)  

# Number of layers
def test_nlay(data):
  nlay, k_reg, b_reg, width, act_fun, weights, dropout, loss, opt, \
  n_epochs, bs = get_default_params()

  nlay = [1, 10, 100]
  w1 = [10]
  w2 = [10]*10
  w3 = [10]*100
  widths = [w1, w2, w3]

  a1 = ['softmax']
  a2 = ['relu']*9
  a2.append('softmax')
  a3 = ['relu']*99
  a3.append('softmax')

  act_funs = [a1, a2, a3]
  filename = ['nlay_1', 'nlay_10', 'nlay_100']

  for i in range(len(filename)):
    print(filename[i])
    model = make_model(filename=filename[i], dataset=data, 
                        nlay=nlay[i], k_reg=k_reg, b_reg=b_reg, width=widths[i], 
                        act_fun=act_funs[i], weights=weights, dropout=dropout, 
                        loss=loss, opt=opt, n_epochs=n_epochs, bs=bs)  

In [None]:
# Running our code for both the MNIST and Fashion MNIST datasets
def run_tests():

  d = ['digits', 'fashion']

  for i in range(len(d)):  
    test_reference(d[i])
    test_weights(d[i])
    test_dropouts(d[i])
    test_optimizer(d[i])
    test_k_reg(d[i])
    test_b_reg(d[i])
    test_loss(d[i])
    test_bs(d[i])
    test_widths(d[i])
    test_act_fun(d[i])
    test_nlay(d[i])

run_tests()

### CNN's
Here we studied the hyperparameters for CNN's

In [None]:
#useful functions to create, train CNN's

#load up the data and normalise it 
def load_data(data):
    (x_train, y_train), (x_test, y_test) = data.load_data()
    x_train, x_test = x_train / 255, x_test / 255
    return x_train, y_train, x_test, y_test

#compile the model
def compile_model(model, loss = "sparse_categorical_crossentropy", optimizer = 'sgd'):
    model.compile(loss = loss,
                  optimizer = optimizer,
                  metrics = ["accuracy"])
    return model

#train the model
def train_model(model, x_train, y_train, num_epochs = 20, val_split = 0.2, bs = 32):
    history = model.fit(x_train, y_train, epochs = num_epochs,
                    validation_split = val_split, batch_size = bs, callbacks = [early_stop])
    return history

#make the appropraite for a CNN
def cnn_data(x_train, x_test, dim = 28):
    x_train = x_train.reshape(x_train.shape[0], dim, dim, 1)
    x_test = x_test.reshape(x_test.shape[0], dim, dim, 1)
    return x_train, x_test

#create a CNN
def create_cnn(in_shape = [28, 28, 1], nclay = 7, nlay = 3, 
              carch = ['maxp', 'conv', 'conv', 'maxp', 'conv', 'conv', 'maxp'],
              f = [64, 128, 128, 256, 256],
              ps = 2, ks = 3, width = [128, 64, 10],
              act_fun = ['relu', 'relu', 'softmax'],
              dropout = 0.5):
    #Meaning of the parameters:
    #in_shape = input shape, nclay = number of hidden convolutional layers, nlay = number of hidden dense layers
    #carch = order of convolutional and pooling layers in the model, f = number of filters fro each layer
    #ps = pooling size, ks = kernel size, width = number of neurones for the dense layers
    #act_fun = activation function for the layers ,dropout = dropout
    
    DefaultConv2D = partial(layers.Conv2D,
                            kernel_size = ks, activation='relu', padding="SAME")
    model = keras.Sequential()
    
    c_counter = 0
    mp_counter = 0
    
    model.add(DefaultConv2D(filters = f[c_counter], kernel_size = 7, 
                                    input_shape = in_shape, padding = 'same'))
    c_counter += 1
    for l in carch:
        if l == 'conv':
            model.add(DefaultConv2D(filters = f[c_counter]))
            c_counter += 1
        elif l == 'maxp':
            model.add(layers.MaxPooling2D(pool_size = ps))
            mp_counter += 1
    
    model.add(layers.Flatten())
    for i in range(nlay):
        model.add(layers.Dense(width[i], activation=act_fun[i]))
    return model

The following cell shows an example of how we trained a number of CNN by varying one single hyperparameter (here the number of filters for each convolutional layer). The same process was applied for the other hyperparameters so the code would be very similar

In [None]:
F = [32, 64, 128, 256]  #number of filters that we want to study

#first train the models on the fashion set

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5) #define a stopping condition
fashion_mnist = keras.datasets.fashion_mnist   #load up the data set
X_train, Y_train, X_test, Y_test = load_data(fashion_mnist) #define a training and test set 
X_train, X_test = cnn_data(X_train, X_test)
for i in F:
    f = np.zeros(5) + i   #number of filters for each layer, note in this analysis we have all the conv. layers having the same number of filters
    model = create_cnn(f = f) #create the model
    print(model.summary())  
    model = compile_model(model)   #compile the model
    history = train_model(model, X_train, Y_train, num_epochs = 40)   #train the model
    
    #save the training history
    hist = pd.DataFrame(history.history)
    hist_csv_file = 'cnn_fash_f={}.csv'.format(i)
    with open(hist_csv_file, mode='w') as f:
        hist.to_csv(f)
    
    #save the accuracy on the test set 
    test_acc = np.array(model.evaluate(X_test, Y_test))
    test_name = 'cnn_fash_f={}.npy'.format(i)
    with open(test_name, 'wb') as f:
        np.save(f, test_acc)



#repeat the same procedure for the digit set

digit_mnist = keras.datasets.mnist 
X_train, Y_train, X_test, Y_test = load_data(digit_mnist)
X_train, X_test = cnn_data(X_train, X_test)
for i in F:
    f = np.zeros(5) + i
    model = create_cnn(f = f)
    print(model.summary())
    model = compile_model(model)
    history = train_model(model, X_train, Y_train, num_epochs = 40)

    hist = pd.DataFrame(history.history)
    hist_csv_file = 'cnn_f={}.csv'.format(i)
    with open(hist_csv_file, mode='w') as f:
        hist.to_csv(f)

    test_acc = np.array(model.evaluate(X_test, Y_test))
    test_name = 'cnn_f={}.npy'.format(i)
    with open(test_name, 'wb') as f:
        np.save(f, test_acc)