In [4]:
from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Activation 
from keras.optimizers import SGD 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 
# network and training 
NB_EPOCH = 20 
BATCH_SIZE = 128 
VERBOSE = 1 
NB_CLASSES = 10 # number of outputs = number digits
OPTIMIZER = SGD() # optimizer, explained later in this chapter
N_HIDDEN = 128 

VALIDATION_SPLIT=0.2 # how much TRAIN is reserved 
# data: shuffled and split between train and test 
(X_train, y_train), (X_test, y_test) = mnist.load_data() 
#X_train is 60000 rows of 28x28 values --> reshaped in 
RESHAPED = 784 
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# M_HIDDEN hidden layers 
# 10 outputs 
# final stage is softmax 
model = Sequential() 
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary() 
model.compile(loss='categorical_crossentropy', 
              optimizer=OPTIMIZER, 
              metrics=['accuracy']) 
history = model.fit(X_train, Y_train, 
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH, 
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT) 
score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])


60000 train samples
10000 test samples
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             (None, 128)               100480    
_________________________________________________________________
activation_10 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_11 (Dense)             (None, 128)               16512     
_________________________________________________________________
activation_11 (Activation)   (None, 128)               0         
_________________________________________________________________
dense_12 (Dense)             (None, 10)                1290      
_________________________________________________________________
activation_12 (Activation)   (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

In [None]:
#following book to establish accuracy rate data sets with two hidden layers
from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Dropout, Activation 
from keras.optimizers import SGD 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 
# network and training 
NB_EPOCH = 250
BATCH_SIZE = 128 
VERBOSE = 1 
NB_CLASSES = 10 # number of outputs = number digits
OPTIMIZER = SGD() # optimizer, explained later in this chapter
N_HIDDEN = 128 

VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for validation 
DROPOUT = 0.3
# data: shuffled and split between train and test 
(X_train, y_train), (X_test, y_test) = mnist.load_data() 
#X_train is 60000 rows of 28x28 values --> reshaped in 
RESHAPED = 784 
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# M_HIDDEN hidden layers 
# 10 outputs 
# final stage is softmax 
model = Sequential() 
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary() 
model.compile(loss='categorical_crossentropy', 
              optimizer=OPTIMIZER, 
              metrics=['accuracy']) 
history = model.fit(X_train, Y_train, 
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH, 
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT) 
score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])


In [3]:
#testing different batch sizes (50)
from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Dropout, Activation 
from keras.optimizers import SGD 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 
# network and training 
NB_EPOCH = 20
BATCH_SIZE = 50
VERBOSE = 1 
NB_CLASSES = 10 # number of outputs = number digits
OPTIMIZER = SGD() # optimizer, explained later in this chapter
N_HIDDEN = 128 

VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for validation 
DROPOUT = 0.3
# data: shuffled and split between train and test 
(X_train, y_train), (X_test, y_test) = mnist.load_data() 
#X_train is 60000 rows of 28x28 values --> reshaped in 
RESHAPED = 784 
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# M_HIDDEN hidden layers 
# 10 outputs 
# final stage is softmax 
model = Sequential() 
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary() 
model.compile(loss='categorical_crossentropy', 
              optimizer=OPTIMIZER, 
              metrics=['accuracy']) 
history = model.fit(X_train, Y_train, 
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH, 
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT) 
score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_7 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_7 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_8 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_9 (Dense)

In [2]:
#Testing different batch sizes (100)
from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Activation 
from keras.optimizers import SGD 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 
# network and training 
NB_EPOCH = 20 
BATCH_SIZE = 100 
VERBOSE = 1 
NB_CLASSES = 10 # number of outputs = number digits
OPTIMIZER = SGD() # optimizer, explained later in this chapter
N_HIDDEN = 128 

VALIDATION_SPLIT=0.2 # how much TRAIN is reserved 
# data: shuffled and split between train and test 
(X_train, y_train), (X_test, y_test) = mnist.load_data() 
#X_train is 60000 rows of 28x28 values --> reshaped in 
RESHAPED = 784 
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# M_HIDDEN hidden layers 
# 10 outputs 
# final stage is softmax 
model = Sequential() 
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary() 
model.compile(loss='categorical_crossentropy', 
              optimizer=OPTIMIZER, 
              metrics=['accuracy']) 
history = model.fit(X_train, Y_train, 
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH, 
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT) 
score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])

60000 train samples
10000 test samples
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_4 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_5 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_5 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 10)                1290      
_________________________________________________________________
activation_6 (Activation)    (None, 10)                0         
Total params: 118,282
Trainable params: 118,282
Non-trainable params: 0
_________

In [1]:
#Testing different batch sizes (250)
from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Dropout, Activation 
from keras.optimizers import SGD 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 
# network and training 
NB_EPOCH = 20
BATCH_SIZE = 250
VERBOSE = 1 
NB_CLASSES = 10 # number of outputs = number digits
OPTIMIZER = SGD() # optimizer, explained later in this chapter
N_HIDDEN = 128 

VALIDATION_SPLIT=0.2 # how much TRAIN is reserved for validation 
DROPOUT = 0.3
# data: shuffled and split between train and test 
(X_train, y_train), (X_test, y_test) = mnist.load_data() 
#X_train is 60000 rows of 28x28 values --> reshaped in 
RESHAPED = 784 
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# M_HIDDEN hidden layers 
# 10 outputs 
# final stage is softmax 
model = Sequential() 
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary() 
model.compile(loss='categorical_crossentropy', 
              optimizer=OPTIMIZER, 
              metrics=['accuracy']) 
history = model.fit(X_train, Y_train, 
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH, 
                    verbose=VERBOSE, validation_split=VALIDATION_SPLIT) 
score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])

Using TensorFlow backend.


60000 train samples
10000 test samples
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)

To test different parameters that affect the training and testing accuracy, I adjusted the batch size for three different tests. I first ran a test with a batch size of 50 and received a training accuracy of 94.73 and a testing accuracy 96.36 indicating that a higher epoch would be necessary gain better accuracy at this batch size. I then tested a batch size of 100 and got a training accuracy of 95.13 and testing accuracy 94.98. The training accuracy should be higher than the testing, so this is a step in the right direction but still a lower score than it could be. I then tested a batch size of 250 and got a training accuracy of 88.49 and a testing accuracy of 92.3. This was a significant backslide compared to the the lesser batch sizes. Comparing to the test using a batch size of 128 and a epoch 250, where the training accuracy is 91.54 and the testing accuracy is 94.25, these results indicate that a higher epoch is needed to gain higher training accuracy as a higher batch size is not enough and weakens the results.