In [3]:
from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Activation 
from keras.optimizers import SGD 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 
# network and training 
NB_EPOCH = 20 
BATCH_SIZE = 128 
VERBOSE = 1 
NB_CLASSES = 10
OPTIMIZER = SGD() 
N_HIDDEN = 128 
VALIDATION_SPLIT = 0.2
(X_train, y_train), (X_test, y_test) = mnist.load_data() #X_train is 60000 rows of 28x28 values
RESHAPED = 784 
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# M_HIDDEN hidden layers # 10 outputs # final stage is softmax 
model = Sequential() 
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary() 
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy']) 
history = model.fit(X_train, Y_train,
                    batch_size=BATCH_SIZE, 
                    epochs=NB_EPOCH, 
                    verbose=VERBOSE, 
                    validation_split=VALIDATION_SPLIT) 
score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])


Using TensorFlow backend.


Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz
60000 train samples
10000 test samples
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 10)                1290      
_________________________________________________________________
activation_3 (Activation)    (None, 10)                0         
Total param

In [5]:
from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import SGD 
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 
# network and training 
NB_EPOCH = 250 
BATCH_SIZE = 128 
VERBOSE = 1 
NB_CLASSES = 10
OPTIMIZER = SGD() 
N_HIDDEN = 128 
VALIDATION_SPLIT = 0.2
DROPOUT = 0.3
(X_train, y_train), (X_test, y_test) = mnist.load_data() #X_train is 60000 rows of 28x28 values
RESHAPED = 784 
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# M_HIDDEN hidden layers # 10 outputs # final stage is softmax 
model = Sequential() 
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary() 
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy']) 
history = model.fit(X_train, Y_train,
                    batch_size=BATCH_SIZE, 
                    epochs=NB_EPOCH, 
                    verbose=VERBOSE, 
                    validation_split=VALIDATION_SPLIT) 
score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])


60000 train samples
10000 test samples
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_5 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_6 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_7 (Dense)

KeyboardInterrupt: 

In [6]:
from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import RMSprop, Adam
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 
# network and training 
NB_EPOCH = 20 
BATCH_SIZE = 128 
VERBOSE = 1 
NB_CLASSES = 10
OPTIMIZER = RMSprop()  #Changed optimizer
N_HIDDEN = 128 
VALIDATION_SPLIT = 0.2
DROPOUT = 0.5 #increased dropout rate to  see results
(X_train, y_train), (X_test, y_test) = mnist.load_data() #X_train is 60000 rows of 28x28 values
RESHAPED = 784 
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# M_HIDDEN hidden layers # 10 outputs # final stage is softmax 
model = Sequential() 
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary() 
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy']) 
history = model.fit(X_train, Y_train,
                    batch_size=BATCH_SIZE, 
                    epochs=NB_EPOCH, 
                    verbose=VERBOSE, 
                    validation_split=VALIDATION_SPLIT) 
score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])


60000 train samples
10000 test samples
Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_8 (Dense)              (None, 128)               100480    
_________________________________________________________________
activation_8 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_9 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_9 (Activation)    (None, 128)               0         
_________________________________________________________________
dropout_4 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_10 (Dense

In [8]:
#### from __future__ import print_function 
import numpy as np 
from keras.datasets import mnist 
from keras.models import Sequential 
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import Adam
from keras.utils import np_utils 
np.random.seed(1671) # for reproducibility 
# network and training 
NB_EPOCH = 20
BATCH_SIZE = 180 # Altered batch size
VERBOSE = 1 
NB_CLASSES = 10
OPTIMIZER = Adam() # Changed Optimizer
N_HIDDEN = 180 # Also altered N_hidden
VALIDATION_SPLIT = 0.2
DROPOUT = 0.3
(X_train, y_train), (X_test, y_test) = mnist.load_data() #X_train is 60000 rows of 28x28 values
RESHAPED = 784 
X_train = X_train.reshape(60000, RESHAPED) 
X_test = X_test.reshape(10000, RESHAPED) 
X_train = X_train.astype('float32') 
X_test = X_test.astype('float32') 
# normalize 
X_train /= 255 
X_test /= 255 
print(X_train.shape[0], 'train samples') 
print(X_test.shape[0], 'test samples') 
# convert class vectors to binary class matrices 
Y_train = np_utils.to_categorical(y_train, NB_CLASSES) 
Y_test = np_utils.to_categorical(y_test, NB_CLASSES) 
# M_HIDDEN hidden layers # 10 outputs # final stage is softmax 
model = Sequential() 
model.add(Dense(N_HIDDEN, input_shape=(RESHAPED,))) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(N_HIDDEN)) 
model.add(Activation('relu')) 
model.add(Dropout(DROPOUT))
model.add(Dense(NB_CLASSES)) 
model.add(Activation('softmax')) 
model.summary() 
model.compile(loss='categorical_crossentropy', optimizer=OPTIMIZER, metrics=['accuracy']) 
history = model.fit(X_train, Y_train,
                    batch_size=BATCH_SIZE, 
                    epochs=NB_EPOCH, 
                    verbose=VERBOSE, 
                    validation_split=VALIDATION_SPLIT) 
score = model.evaluate(X_test, Y_test, verbose=VERBOSE) 
print("Test score:", score[0]) 
print('Test accuracy:', score[1])


60000 train samples
10000 test samples
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_14 (Dense)             (None, 180)               141300    
_________________________________________________________________
activation_14 (Activation)   (None, 180)               0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 180)               0         
_________________________________________________________________
dense_15 (Dense)             (None, 180)               32580     
_________________________________________________________________
activation_15 (Activation)   (None, 180)               0         
_________________________________________________________________
dropout_8 (Dropout)          (None, 180)               0         
_________________________________________________________________
dense_16 (Dense

In the second cell, I altered the number of epochs that are run just like in the book. This gave the program more time to learn, and resulted in a much higher accuracy rate.

In the third trial, I changed optimizers from SGD, to RMSprop and increased the dropout from 0.3 to 0.5. This resulted an interesting find. At epoch #15 the program reached a peak of 95.50 and then dropped slightly. Then went up slightly, but never over 95. it continued to do this until finished. The function must have peaked slightly and stopped learning as fast. I believe with more epochs, that the growth would slowly increase. However, this would take considerable resources for a very small result. 

For the final trial, I again changed the optimizer to Adam, and changed the BATCH_SIZE and N_HIDDEN variable from 128 to 180. The first epoch had a very high accuracy of 84% and continued to increase fast until it hit 97% then the program finished with a very high 98.81%. The sudden jump from 84% to 95% in the first 2 epochs can be attributed to the better optimizer as well as the increased batch size. I also read that increasing hiddens can increase accuracy by adding more layers to the network.