## Import libraries

In [None]:
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils
from keras import regularizers

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Set seed for reproducibility
np.random.seed(123)

## Define hyper-parameters

In [None]:
# ======= Hyper-parameters that can be tunned for model performance =======

# Number of times the model is exposed to the training set. At each iteration, the optimizer tries to adjust the weights
# so that the objective function is minimized.
NB_EPOCH = 250
# Number of training instances observed before the optimizer performs a weight update
BATCH_SIZE = 128
OPTIMIZER = Adam(learning_rate=0.001)  #SGD(), RMSprop()
N_HIDDEN = 512
DROPOUT = 0.3

# ======= "Parameters that won't affect model performance" =======

VERBOSE = 1 # Whether to print out training progress
NB_CLASSES = 10 # Number of classes to predict
VALIDATION_SPLIT=0.2 # Train/validate split ratio

## Pre-process data

In [None]:
# Load the train/test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Calculate vector size for each image
RESHAPED = X_train.shape[1] * X_train.shape[2]

# Flatten each image to a vector
X_train = X_train.reshape(X_train.shape[0], RESHAPED)
X_test = X_test.reshape(X_test.shape[0], RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# Convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# Normalize each image to gray scale
X_train /= 255
X_test /= 255

# The column size/number of features should correspond to the model input size
print('training set shape: ', X_train.shape)
print('test set shape: ', X_test.shape)

## Define model

In [None]:
model = Sequential()

# 3 layers neuro net
model.add(Dense(N_HIDDEN, input_dim=RESHAPED, kernel_regularizer=regularizers.l2(l=0.001)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))  # Dropout after input layer
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))  # Dropout after 1st hidden layer
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))

# Compile model once it is defined
# 'Loss' is the metrics used by the objective function during the gradient descent process; 'metrics' is the metrics used
# for evaluation purpose
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy'])

model.summary()

## Fit the model with training set data

In [None]:
history = model.fit(X_train,
                    Y_train,
                    batch_size=BATCH_SIZE,
                    epochs=NB_EPOCH,
                    verbose=VERBOSE,
                    validation_split=VALIDATION_SPLIT)

## Evaluate the model with test set

In [None]:
def evaluation(model_obj, X_test, Y_test):
    score = model_obj.evaluate(X_test, Y_test, verbose=VERBOSE)
    print('Test score: ', score[0])
    print('test accuracy: ', score[1])
    return score[1]

Adam_acc_lr_0001_neuron512_l2001 = evaluation(model, X_test, Y_test)

## Accuracy score for 3 different types of optimizer

In [None]:
print('SGD Accuracy: ', SGD_acc)
print('RMSprop Accuracy: ', RMSprop_acc)
print('Adam Accuracy: ', Adam_acc)
print('Adam lr0001 neurn512: ', Adam_acc_lr_0001_neuron512)
print('Adam lr0001 neurn512 l20001: ', Adam_acc_lr_0001_neuron512_l2001)