# MNIST

## Data loading and manipulation

Import packages and load data

In [26]:
import os
import time

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard

tf.keras.backend.clear_session()

(X_train_data, y_train_data), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
print(X_train_data.shape)

(60000, 28, 28)


Create validation set and normalise pixel intensities

In [27]:
X_train, X_val, y_train, y_val = train_test_split(X_train_data, y_train_data, 
                                                  test_size=0.2, 
                                                  random_state=42)

X_train, X_val, X_test = (X_train/255.0), (X_val/255.0), (X_test/255.0)

num_classes = 10
rows, cols = X_train.shape[1], X_train.shape[2]

## Base model

Create model

In [None]:
base_model = Sequential([
             Flatten(input_shape=[rows, cols]),
             Dense(300, activation='relu'),
             Dense(100, activation='relu'),
             Dense(num_classes, activation='softmax')
])

base_model.summary()

Compile model

In [None]:
base_model.compile(loss="sparse_categorical_crossentropy",
                   optimizer='sgd', metrics=['accuracy'])

Fit model

In [None]:
base_trained_model = base_model.fit(X_train, y_train, 
                                    epochs=10,
                                    validation_data=(X_val, y_val))

Plot validation accuracy across epochs

In [None]:
print(base_trained_model.history)

In [None]:
pd.DataFrame(base_trained_model.history).plot(figsize=(8,5))
plt.grid(True)
plt.gca().set_ylim(0,1)
plt.show()

Evaluate model on test data

In [None]:
base_model.evaluate(X_test, y_test)

## Callbacks and Tensorboard

Create model

In [None]:
callback_model = Sequential([
                 Flatten(input_shape=[rows, cols]),
                 Dense(300, activation='relu'),
                 Dense(100, activation='relu'),
                 Dense(num_classes, activation='softmax')
])

Compile model

In [None]:
callback_model.compile(loss="sparse_categorical_crossentropy",
                       optimizer='sgd', metrics=['accuracy'])

Log file for TensorBoard

In [19]:
def get_run_logdir(logdir):
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(logdir, run_id) 

callback_root_logdir = os.path.join(os.curdir, "callback_logs")
callback_run_logdir = get_run_logdir(callback_root_logdir)

Callbacks for model saving, early stopping and TensorBoard

In [None]:
callbacks = [ModelCheckpoint("callback_model.h5"),
             EarlyStopping(patience=10, restore_best_weights=True),
             TensorBoard(callback_run_logdir)]

Fit model with large number of epochs to allow for early stopping

In [None]:
callback_trained_model = callback_model.fit(X_train, y_train, 
                                            epochs=100,
                                            validation_data=(X_val, y_val),
                                            callbacks=callbacks)

Run TensorBoard session

In [None]:
%reload_ext tensorboard
%tensorboard --logdir=./callback_logs --port=6008

## Varying learning rate

Callback class for increasing learning rate by a given factor

In [20]:
K = tf.keras.backend

class IncreasingLearningRate(tf.keras.callbacks.Callback):
    def __init__(self, factor):
        self.factor = factor
        self.rates = []
        self.losses = []
    def on_batch_end(self, batch, logs):
        self.rates.append(K.get_value(self.model.optimizer.lr))
        self.losses.append(logs["loss"])
        K.set_value(self.model.optimizer.lr, self.model.optimizer.lr * self.factor)

Create model

In [21]:
vlr_model = Sequential([
            Flatten(input_shape=[rows, cols]),
            Dense(300, activation='relu'),
            Dense(100, activation='relu'),
            Dense(num_classes, activation='softmax')
])

Compile model starting at 1e-3 learning rate

In [22]:
vlr_model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=SGD(lr=1e-3), 
                  metrics=['accuracy'])

TensorBoard logs

In [23]:
vlr_root_logdir = os.path.join(os.curdir, "vlr_logs")
vlr_run_logdir = get_run_logdir(vlr_root_logdir)

Callbacks including increase learning rate by 0.5% each epoch

In [24]:
callbacks = [ModelCheckpoint("vlr_model.h5"),
             EarlyStopping(patience=10, restore_best_weights=True),
             TensorBoard(callback_run_logdir),
             IncreasingLearningRate(factor=1.005)
            ]

Fit model with increasing learning rate

In [25]:
vlr_trained_model = vlr_model.fit(X_train, y_train,
                                  epochs=100,
                                  validation_data=(X_val, y_val),
                                  callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100

KeyboardInterrupt: 

This isnt working but the idea is to plot learning rate against loss to find an optimal learning rate.

In practice better optimisers such as the adam optimiser will vary their learnig rate themselves and produce great results

In [29]:
adam_model = Sequential([
             Flatten(input_shape=[rows, cols]),
             Dense(300, activation='relu'),
             Dense(100, activation='relu'),
             Dense(num_classes, activation='softmax')
])

adam_model.compile(loss="sparse_categorical_crossentropy",
#                   optimizer=SGD(lr=1e-3), 
                   optimizer='adam',
                   metrics=['accuracy'])

adam_root_logdir = os.path.join(os.curdir, "adam_logs")
adam_run_logdir = get_run_logdir(adam_root_logdir)

callbacks = [ModelCheckpoint("adam_model.h5"),
             EarlyStopping(patience=10, restore_best_weights=True),
             TensorBoard(callback_run_logdir),
#              IncreasingLearningRate(factor=1.005)
            ]

adam_trained_model = adam_model.fit(X_train, y_train,
                                  epochs=100,
                                  validation_data=(X_val, y_val),
                                  callbacks=callbacks)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
