<a href="https://colab.research.google.com/github/jphall663/GWU_ML/blob/main/notebook/lecture_8/Assignment_8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MNIST Data

1. Imports

In [None]:
import keras # overall package API for deep learning

# for data loading/prep
from keras.datasets import mnist
from keras.utils import np_utils

# for training
from keras.models import Sequential # for building sequential, not recurrent, networks
from keras.layers import Conv2D, MaxPooling2D, Activation, Flatten, Dense # for the components of a lenet5-like network

2. Try for reproducibility

In [None]:
!PYTHONHASHSEED=0

In [None]:
import numpy as np
import tensorflow as tf
import random as python_random

SEED = 12345

np.random.seed(SEED)
python_random.seed(SEED)
tf.random.set_seed(SEED)

3. Load and format MNIST data

In [None]:
# load data
(x_train, y_train), (x_valid, y_valid) = mnist.load_data()

# normalize data 
x_train = x_train/255
x_valid = x_valid/255

# ensure y's are treated as categories
y_train = np_utils.to_categorical(y_train)
y_valid = np_utils.to_categorical(y_valid)
num_classes = y_train.shape[1]

4. Examine data

In [None]:
print(np.array2string(x_train[0], max_line_width=150, precision=2)) # pixel intensities for first training image
print()
print(y_train[0]) # label for first training image

# Build Model

5. Initialize a sequential Keras model

In [None]:
model = Sequential()

6. Define a smaller version of Lenet-5 that uses the standard 28x28 MNIST images

In [None]:
# C1
# 1 image in, 6 feature maps out (1 image represented *in each of* the 6 feature maps)
# 28 x 28 x 1 inputs
# 1 image goes into 6 filters with (1 x 2 x 2 + 1) = 5 weights for 30 total weights in C1 layer
# resulting in ((28 - 2 + 0)/1) + 1 = 27 x 27 x 6 output volume
model.add(Conv2D(filters=6, kernel_size=(2,2), input_shape=(28,28,1)))

# S2
# 6 feature maps in, 6 activation maps out
# 27 x 27 x 6 input volume 
# goes into 0 weights in S2 layer
# pooling implies kernel size 4, stride 2 and padding of 0.5 b/c of odd input size
# resulting in a ((27 - 4 + 1)/2) + 1 = 13 x 13 x 6 output volume
model.add(MaxPooling2D(pool_size=2)) 
model.add(Activation()) # REQUIRES STUDENT INPUT

# C3
# 6 activation maps in, 16 feature maps out (6 activation maps represented *in each of* the 16 feature maps)
# 13 x 13 x 6 input volume
# 6 activation maps go into 16 filters with (6 x 5 x 5 + 1) = 151 weights for a total of 2416 weights in C3 layer
# resulting in ((13 - 5 + 0)/1) + 1 = 9 x 9 x 16 output volume
model.add(Conv2D(filters=16, kernel_size=(5,5)))

# S4
# 16 feature maps in, 16 activation maps out
# 9 x 9 x 16 input volume
# goes into 0 weights in S2 layer
# pooling implies kernel size 4, stride 2 and padding of 0.5 b/c of odd input size
# resulting in (9 - ? + 1)/2) + 1 = ? x ? x 16 output volume
model.add(MaxPooling2D(pool_size=2))
model.add(Activation()) # REQUIRES STUDENT INPUT

# C5
# 16 activation maps in, 120 feature maps out (16 activation maps represented *in each of* the 120 feature maps)
# ? x ? 16 input volume
# 16 activation maps go into 120 filters with (16 x ? x ? + 1) = ?7 weights for a total of ? weights in C5 layer
# resulting in a  (? - ? + 0)/1) + 1 = 1 x 1 x 120 output volume 
model.add(Conv2D(filters=120, kernel_size=(?,?))) # REQUIRES STUDENT INPUT

# F6
# 120 input elements are flattened and connected to ? weights with biases
# resulting in a 120 x ? + ? = ? weights in F6 and a ? x 1 x 1 output volume
model.add(Flatten())
model.add(Dense(?)) # REQUIRES STUDENT INPUT
model.add(Activation()) # REQUIRES STUDENT INPUT

# Output
# each of ? units in F6 feed into 10 softmax output units
# there are ? x ? + ? = ? weights in the output layer resulting in a ? x 1 x 1 output volume
# output unit with highest output "probability" is the prediction for the image
model.add(Dense(?)) # REQUIRES STUDENT INPUT
model.add(Activation('softmax'))

7. View a summary of the model architecture

In [None]:
model.summary()

8. Precompile model for faster training

In [None]:
model.compile(loss="categorical_crossentropy", optimizer="sgd", metrics=["accuracy"])

9. Train model

In [None]:
# restart and run notebook for reproducible results
# running this cell multiple times will result in irreproducible results
_ = model.fit(x_train, y_train, epochs=50, batch_size=128)

10. Evaluate model performance on validation data

In [None]:
model.evaluate(x_valid, y_valid, batch_size=128)