In [7]:
#code from textbook (baseline)
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
np.random.seed(1671) #for reproducibility
#network and training
NB_EPOCH = 20
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = SGD() # optimizer
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 #how much TRAIN is reserved for VALIDATION
# data: shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#X_train is 6000 rows of 28x28 values -->reshaped in 6000 x 784
RESHAPED = 784

X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
#normalize
X_train /= 255
X_test /= 255
print (X_train.shape[0], 'train samples')
print (X_test.shape[0], 'test samples')
#convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)
#N_HIDDEN hidden layers
#10 ouputs
#final stage is softmax
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dense (N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense (NB_CLASSES))
model.add(Activation ('softmax'))
model.summary()
model.compile(loss= 'categorical_crossentropy', 
             optimizer = OPTIMIZER, 
             metrics = ['accuracy'])
history = model.fit(X_train, Y_train, 
                   batch_size = BATCH_SIZE, epochs = NB_EPOCH, 
                   verbose = VERBOSE, validation_split = VALIDATION_SPLIT)
score = model.evaluate(X_test, Y_test, verbose = VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_4 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_5 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 10)                1290      
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

In [8]:
#Experiment number 1 limited number of epoch iterations to 10
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
np.random.seed(1671) #for reproducibility
#network and training
NB_EPOCH = 10
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = SGD() # optimizer
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 #how much TRAIN is reserved for VALIDATION
# data: shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#X_train is 6000 rows of 28x28 values -->reshaped in 6000 x 784
RESHAPED = 784

X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
#normalize
X_train /= 255
X_test /= 255
print (X_train.shape[0], 'train samples')
print (X_test.shape[0], 'test samples')
#convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)
#N_HIDDEN hidden layers
#10 ouputs
#final stage is softmax
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dense (N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense (NB_CLASSES))
model.add(Activation ('softmax'))
model.summary()
model.compile(loss= 'categorical_crossentropy', 
             optimizer = OPTIMIZER, 
             metrics = ['accuracy'])
history = model.fit(X_train, Y_train, 
                   batch_size = BATCH_SIZE, epochs = NB_EPOCH, 
                   verbose = VERBOSE, validation_split = VALIDATION_SPLIT)
score = model.evaluate(X_test, Y_test, verbose = VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_7 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_10 (Dense)             (None, 128)               16512     
_________________________________________________________________
activation_8 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 10)                1290      
_________________________________________________________________
activation_9 (Activation)    (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

In [9]:
#Experiment number 2 increase number of epoch iterations to 75
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
np.random.seed(1671) #for reproducibility
#network and training
NB_EPOCH = 75
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = SGD() # optimizer
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 #how much TRAIN is reserved for VALIDATION
# data: shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#X_train is 6000 rows of 28x28 values -->reshaped in 6000 x 784
RESHAPED = 784

X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
#normalize
X_train /= 255
X_test /= 255
print (X_train.shape[0], 'train samples')
print (X_test.shape[0], 'test samples')
#convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)
#N_HIDDEN hidden layers
#10 ouputs
#final stage is softmax
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dense (N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense (NB_CLASSES))
model.add(Activation ('softmax'))
model.summary()
model.compile(loss= 'categorical_crossentropy', 
             optimizer = OPTIMIZER, 
             metrics = ['accuracy'])
history = model.fit(X_train, Y_train, 
                   batch_size = BATCH_SIZE, epochs = NB_EPOCH, 
                   verbose = VERBOSE, validation_split = VALIDATION_SPLIT)
score = model.evaluate(X_test, Y_test, verbose = VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             (None, 128)               100480    
_________________________________________________________________
activation_10 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_13 (Dense)             (None, 128)               16512     
_________________________________________________________________
activation_11 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_14 (Dense)             (None, 10)                1290      
_________________________________________________________________
activation_12 (Activation)   (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

In [10]:
#Experiment number 3 increase number of epoch iterations to 200
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD
from keras.utils import np_utils
np.random.seed(1671) #for reproducibility
#network and training
NB_EPOCH = 200
BATCH_SIZE = 128
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits
OPTIMIZER = SGD() # optimizer
N_HIDDEN = 128
VALIDATION_SPLIT = 0.2 #how much TRAIN is reserved for VALIDATION
# data: shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
#X_train is 6000 rows of 28x28 values -->reshaped in 6000 x 784
RESHAPED = 784

X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
#normalize
X_train /= 255
X_test /= 255
print (X_train.shape[0], 'train samples')
print (X_test.shape[0], 'test samples')
#convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)
#N_HIDDEN hidden layers
#10 ouputs
#final stage is softmax
model = Sequential()
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dense (N_HIDDEN))
model.add(Activation('relu'))
model.add(Dense (NB_CLASSES))
model.add(Activation ('softmax'))
model.summary()
model.compile(loss= 'categorical_crossentropy', 
             optimizer = OPTIMIZER, 
             metrics = ['accuracy'])
history = model.fit(X_train, Y_train, 
                   batch_size = BATCH_SIZE, epochs = NB_EPOCH, 
                   verbose = VERBOSE, validation_split = VALIDATION_SPLIT)
score = model.evaluate(X_test, Y_test, verbose = VERBOSE)
print("Test score:", score[0])
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 128)               100480    
_________________________________________________________________
activation_13 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 128)               16512     
_________________________________________________________________
activation_14 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_17 (Dense)             (None, 10)                1290      
_________________________________________________________________
activation_15 (Activation)   (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

# Accuracy for Baseline (Epoch = 20): 94.78%

# Accuracy for Experiment 1 (Epoch = 10): 92.61%

# Accuracy for Experiment 2 (Epoch = 75): 96.89%

# Accuracy for Experiment 3 (Epoch = 200): 97.61%

**Analysis**
Adjusting the number of epochs determines the number of iterations the code runs through to test and train. I started by decreasing the number of iterations by half to see how that would effect the accuracy percentage. This lowered the percentage by a little more than 2%, demonstrating the program needs more iterations to obtain a higher accuracy percentage. The next experiment I increased the number of iterations to 75, this resulted in 96.89% test accuracy gaining just over 2% from the baseline percentage. This told me I needed to increase the epoch substantionally to increase the test accuracy. The final experiment I increased the epoch to 200, increasing the test accuracy to 97.61% (gaining nearly 3% accuracy). Watching the interations run I noticed that the accuracy wasn't increasing very quickly and would actually lose accuracy every other or every third iteration. I'm going to infer that increasing the epoch much more would lead to overfitting and the test accuracy would start to decrease. 

