In [27]:
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, RMSprop, Adam
from keras.utils import np_utils
np.random.seed(1671) # for reproducibility
# network and training
NB_EPOCH = 40
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = Adam() # optimizer, explained later in this chapter
N_HIDDEN = 128

VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION
# data: shuffled and split between train and test sets
DROPOUT = 0.3
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784
RESHAPED = 784
#
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
# normalize
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)
# M_HIDDEN hidden layers
# 10 outputs
# final stage is softmax
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer=OPTIMIZER,
metrics=['accuracy'])
history = model.fit(X_train, Y_train,
batch_size=BATCH_SIZE, epochs=NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_22"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_65 (Dense)            (None, 128)               100480    
                                                                 
 activation_64 (Activation)  (None, 128)               0         
                                                                 
 dropout_40 (Dropout)        (None, 128)               0         
                                                                 
 dense_66 (Dense)            (None, 128)               16512     
                                                                 
 activation_65 (Activation)  (None, 128)               0         
                                                                 
 dropout_41 (Dropout)        (None, 128)               0         
                                                                 
 dense_67 (Den

**Changes Made**
I implemented the Dropout which increased the accuracy to 96
Per the documentation I added RMSprop with 20 epochs and it dramatically increased the accuracy rate to 97.85
I changed the optimizer to Adam() which decreased the accuracy to 97.78 at 20 epochs
Changed the number of epochs to 30 and the accuracy increased to 97.96
Increasing the number of epochs increased the accuracy to 98.22
Increasing the number of hidden layers decreased the accuracy to 97.79
So it appears the most accuracy I could achieve was 98.22 and that was with 40 epochs, using Adam optimizer, 0.3 dropout and two hidden layers.


