In [None]:
'''
    Initial
'''
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras.utils import np_utils
np.random.seed(1671) # for reproducibility

# add this to see the difference in time it took to complete
import time


# parameters - original 
NB_EPOCH = 20
BATCH_SIZE = 40
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits

#  Set the optimization algorithm used to train the model's weights to Adam
#  This impacts how the neural network learns
OPTIMIZER = Adam() 
N_HIDDEN = 128
VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION
DROPOUT=0.3

# setting up the pixels of the image with a label 
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784
RESHAPED = 784

# transforms the image data from MNIST dataset
# to a data set that can be used by keras
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# continues to transform the test image data into pixel values.
# by dividing every pixel by 255 so it can be a number either between 0 and 1
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


# Converting the label data from MNIST dataset into a format
# that can be used by keras
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# set up model
model = Sequential()

# first hidden layer
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# second hidden layer 
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# output 
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# We set how the optimizer algorithm and how the model
# should handle loss between runs and what metric we are interested in
model.compile(loss='categorical_crossentropy',
optimizer=OPTIMIZER,
metrics=['accuracy'])

# start timer 
start_time = time.time()

# We configure the model to use training data, batch size, number of epochs etc.
history = model.fit(X_train, Y_train,
batch_size=BATCH_SIZE, epochs=NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

# end timer
end_time = time.time()
final_time = end_time - start_time

# contains the results of the training
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)

print("Test score:", score[0])
print('Test accuracy:', score[1])
print(f"Test completion time: {final_time:.2f} seconds")

In [None]:
'''
    Increased hidden neurons to 200
'''
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras.utils import np_utils
np.random.seed(1671) # for reproducibility

# add this to see the difference in time it took to complete
import time


# parameters - original 
NB_EPOCH = 20
BATCH_SIZE = 40
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits

#  Set the optimization algorithm used to train the model's weights to Adam
#  This impacts how the neural network learns
OPTIMIZER = Adam() 
N_HIDDEN = 200
VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION
DROPOUT=0.3

# setting up the pixels of the image with a label 
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784
RESHAPED = 784

# transforms the image data from MNIST dataset
# to a data set that can be used by keras
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# continues to transform the test image data into pixel values.
# by dividing every pixel by 255 so it can be a number either between 0 and 1
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


# Converting the label data from MNIST dataset into a format
# that can be used by keras
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# set up model
model = Sequential()

# first hidden layer
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# second hidden layer 
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# output 
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# We set how the optimizer algorithm and how the model
# should handle loss between runs and what metric we are interested in
model.compile(loss='categorical_crossentropy',
optimizer=OPTIMIZER,
metrics=['accuracy'])

# start timer 
start_time = time.time()

# We configure the model to use training data, batch size, number of epochs etc.
history = model.fit(X_train, Y_train,
batch_size=BATCH_SIZE, epochs=NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

# end timer
end_time = time.time()
final_time = end_time - start_time

# contains the results of the training
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)

print("Test score:", score[0])
print('Test accuracy:', score[1])
print(f"Test completion time: {final_time:.2f} seconds")

In [None]:
'''
    Decreased hidden neurons to 75
'''
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras.utils import np_utils
np.random.seed(1671) # for reproducibility

# add this to see the difference in time it took to complete
import time


# parameters - original 
NB_EPOCH = 20
BATCH_SIZE = 40
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits

#  Set the optimization algorithm used to train the model's weights to Adam
#  This impacts how the neural network learns
OPTIMIZER = Adam() 
N_HIDDEN = 75
VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION
DROPOUT=0.3

# setting up the pixels of the image with a label 
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784
RESHAPED = 784

# transforms the image data from MNIST dataset
# to a data set that can be used by keras
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# continues to transform the test image data into pixel values.
# by dividing every pixel by 255 so it can be a number either between 0 and 1
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


# Converting the label data from MNIST dataset into a format
# that can be used by keras
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# set up model
model = Sequential()

# first hidden layer
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# second hidden layer 
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# output 
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# We set how the optimizer algorithm and how the model
# should handle loss between runs and what metric we are interested in
model.compile(loss='categorical_crossentropy',
optimizer=OPTIMIZER,
metrics=['accuracy'])

# start timer 
start_time = time.time()

# We configure the model to use training data, batch size, number of epochs etc.
history = model.fit(X_train, Y_train,
batch_size=BATCH_SIZE, epochs=NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

# end timer
end_time = time.time()
final_time = end_time - start_time

# contains the results of the training
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)

print("Test score:", score[0])
print('Test accuracy:', score[1])
print(f"Test completion time: {final_time:.2f} seconds")

In [None]:
'''
    Increased hidden neurons to 200 with additional layer
'''
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras.utils import np_utils
np.random.seed(1671) # for reproducibility

# add this to see the difference in time it took to complete
import time


# parameters - original 
NB_EPOCH = 20
BATCH_SIZE = 40
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits

#  Set the optimization algorithm used to train the model's weights to Adam
#  This impacts how the neural network learns
OPTIMIZER = Adam() 
N_HIDDEN = 200
VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION
DROPOUT=0.3

# setting up the pixels of the image with a label 
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784
RESHAPED = 784

# transforms the image data from MNIST dataset
# to a data set that can be used by keras
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# continues to transform the test image data into pixel values.
# by dividing every pixel by 255 so it can be a number either between 0 and 1
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


# Converting the label data from MNIST dataset into a format
# that can be used by keras
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# set up model
model = Sequential()

# first hidden layer
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# second hidden layer 
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# third hidden layer 
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# output 
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# We set how the optimizer algorithm and how the model
# should handle loss between runs and what metric we are interested in
model.compile(loss='categorical_crossentropy',
optimizer=OPTIMIZER,
metrics=['accuracy'])

# start timer 
start_time = time.time()

# We configure the model to use training data, batch size, number of epochs etc.
history = model.fit(X_train, Y_train,
batch_size=BATCH_SIZE, epochs=NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

# end timer
end_time = time.time()
final_time = end_time - start_time

# contains the results of the training
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)

print("Test score:", score[0])
print('Test accuracy:', score[1])
print(f"Test completion time: {final_time:.2f} seconds")

In [None]:
'''
   Increased hidden neurons to 200 with SGD optimization algorithm
'''
from __future__ import print_function
import numpy as np
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras.utils import np_utils
np.random.seed(1671) # for reproducibility

# add this to see the difference in time it took to complete
import time


# parameters - original 
NB_EPOCH = 20
BATCH_SIZE = 40
VERBOSE = 1
NB_CLASSES = 10 # number of outputs = number of digits

#  Set the optimization algorithm used to train the model's weights to Stochastic Gradient Descent (SGD).
#  This impacts how the neural network learns
OPTIMIZER = Adam() 
N_HIDDEN = 200
VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for VALIDATION
DROPOUT=0.3

# setting up the pixels of the image with a label 
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#X_train is 60000 rows of 28x28 values --> reshaped in 60000 x 784
RESHAPED = 784

# transforms the image data from MNIST dataset
# to a data set that can be used by keras
X_train = X_train.reshape(60000, RESHAPED)
X_test = X_test.reshape(10000, RESHAPED)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

# continues to transform the test image data into pixel values.
# by dividing every pixel by 255 so it can be a number either between 0 and 1
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')


# Converting the label data from MNIST dataset into a format
# that can be used by keras
Y_train = np_utils.to_categorical(y_train, NB_CLASSES)
Y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# set up model
model = Sequential()

# first hidden layer
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,)))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# second hidden layer 
model.add(Dense(N_HIDDEN))
model.add(Activation('relu'))
model.add(Dropout(DROPOUT))

# output 
model.add(Dense(NB_CLASSES))
model.add(Activation('softmax'))
model.summary()

# We set how the optimizer algorithm and how the model
# should handle loss between runs and what metric we are interested in
model.compile(loss='categorical_crossentropy',
optimizer=OPTIMIZER,
metrics=['accuracy'])

# start timer 
start_time = time.time()

# We configure the model to use training data, batch size, number of epochs etc.
history = model.fit(X_train, Y_train,
batch_size=BATCH_SIZE, epochs=NB_EPOCH,
verbose=VERBOSE, validation_split=VALIDATION_SPLIT)

# end timer
end_time = time.time()
final_time = end_time - start_time

# contains the results of the training
score = model.evaluate(X_test, Y_test, verbose=VERBOSE)

print("Test score:", score[0])
print('Test accuracy:', score[1])
print(f"Test completion time: {final_time:.2f} seconds")

# Experiments 

- Initial 
- Increased hidden neurons to 200
- Decreases hidden neurons to 75
- Increased hidden neurons to 200 with an additional hidden layer
- Increased hidden neurons to 200 with SGD optimization algorithm

# Results 

|                                                              | Accuracy | Validation | Test | Time (minutes) |
|--------------------------------------------------------------|----------|------------|------|------|
| Initial                                                      |    98.1%      |  97.8%  | 97.9%       |      11.7     |
| Increased hidden neurons to 200                              |     98.6%     |    98%        |  98%    |   19   |
| Decreased hidden neurons to 75                               |       96.6%   |       97.3%     |   97.4%   |  7.1    |
| Increased hidden neurons to 200 with additional hidden layer |       98.4%   |     97.8%       |   98%   |   14.58   |
| Increased hidden neurons to 200 with SGD algorithm |       95.9%   |     96.9%       |   96.7%   |   8.1   |

# Conclusion

In my experiment, I worked with the `N_HIDDEN` parameter, representing the hidden neurons in the code. Initially, I set the hidden neurons to 128, following the guidance from Chapter 1, "Neural Network Foundations," in "Deep Learning with Keras." I used the code from this chapter for the neural network but made minor adjustments to include a variable measuring the completion time `final_time` for each test. I believed this data would clarify the performance of each experiment, especially since the accuracy, validation, and test results were closely matched.

My hypothesis was that increasing the hidden neurons would lead to higher accuracy, validation, and testing percentages but would also extend the training time. This proved correct, as we achieved 98% for accuracy, validation, and testing, and the training took 19 minutes to complete. Compared to the initial run and the run with fewer hidden neurons, this training took much longer. Since our results in terms of accuracy, validation, and testing were really good, I wanted to optimize our model further. The goal was to maintain 200 hidden neurons, keep the results at 98%, and reduce the training time.

I set up two experiments to try and reduce the training time. The first experiment was to add an additional layer. I thought that having another hidden layer would make managing the complexity of having such a high number of hidden neurons per epoch easier, so the model could quickly interpret the data. This approach did reduce the training time. However, it also lowered the results for accuracy, validation, and testing. So overall, this was not an ideal optimization.

The second experiment I tried was changing the optimization algorithm. In the initial script, we changed the algorithm from SGD to Adam because Adam is more complex than SGD, so it should theoretically train better on the data. However, I suspected that running a more complex algorithm to handle the loss between runs might be slowing things down. So, I tried using SGD, which has a more straightforward impact, hoping the results would remain the same and the training time would decrease. This hypothesis also turned out to be incorrect; the training time decreased, but the results for the other metrics were also lower.



