In [2]:
import numpy
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout

Using TensorFlow backend.


### Load MNIST Dataset

In [3]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Downloading data from https://s3.amazonaws.com/img-datasets/mnist.npz

### Flatten 28*28 images to a 784 vector for each image

In [4]:
num_pixels = X_train.shape[1] * X_train.shape[2]
X_train = X_train.reshape(X_train.shape[0], -1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], -1).astype('float32')

In [13]:
print('No. Training examples: ', X_train.shape[0])
print('X_train shape: ', X_train.shape)
print('X_test shape: ' , X_test.shape)

No. Training examples:  60000
X_train shape:  (60000, 784)
X_test shape:  (10000, 784)


### Encode outputs to one-hot

In [14]:
y_train = keras.utils.to_categorical(y_train, num_classes=10)
y_test = keras.utils.to_categorical(y_test, num_classes=10)

### Normalize inputs from 0-255 to 0-1

In [15]:
X_train /= 255
X_test /= 255 

## Shallow NN with 100-units hidden layer and softmax output layer

### Build model

In [16]:
model = Sequential()

In [19]:
# hidden layer
model.add(Dense(100, activation='relu', 
                input_dim=num_pixels, 
                kernel_initializer='VarianceScaling', 
                kernel_regularizer=keras.regularizers.l2(0.01)))

# output layer
model.add(Dense(10, activation='softmax'))

### Configure the learning process with choice of:
- **Optimizer**
- **Loss function**
- **List of Metrics**

In [21]:
model.compile(optimizer='Adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])

### Train the model

In [27]:
model.fit(X_train, y_train, 
         epochs=60,
         batch_size=256, verbose=2)

Epoch 1/60
0s - loss: 0.1941 - acc: 0.9675
Epoch 2/60
0s - loss: 0.1934 - acc: 0.9671
Epoch 3/60
0s - loss: 0.1881 - acc: 0.9686
Epoch 4/60
0s - loss: 0.1878 - acc: 0.9681
Epoch 5/60
0s - loss: 0.1858 - acc: 0.9693
Epoch 6/60
0s - loss: 0.1847 - acc: 0.9685
Epoch 7/60
0s - loss: 0.1837 - acc: 0.9691
Epoch 8/60
0s - loss: 0.1820 - acc: 0.9694
Epoch 9/60
0s - loss: 0.1788 - acc: 0.9698
Epoch 10/60
0s - loss: 0.1796 - acc: 0.9700
Epoch 11/60
0s - loss: 0.1803 - acc: 0.9696
Epoch 12/60
0s - loss: 0.1779 - acc: 0.9702
Epoch 13/60
0s - loss: 0.1764 - acc: 0.9706
Epoch 14/60
0s - loss: 0.1758 - acc: 0.9706
Epoch 15/60
0s - loss: 0.1725 - acc: 0.9715
Epoch 16/60
0s - loss: 0.1726 - acc: 0.9710
Epoch 17/60
0s - loss: 0.1725 - acc: 0.9711
Epoch 18/60
0s - loss: 0.1716 - acc: 0.9712
Epoch 19/60
0s - loss: 0.1685 - acc: 0.9718
Epoch 20/60
0s - loss: 0.1691 - acc: 0.9718
Epoch 21/60
0s - loss: 0.1693 - acc: 0.9721
Epoch 22/60
0s - loss: 0.1687 - acc: 0.9712
Epoch 23/60
0s - loss: 0.1649 - acc: 0.97

<keras.callbacks.History at 0x7fc1e5c544a8>

### Validate model

In [28]:
score = model.evaluate(X_test, y_test, batch_size=256)
print("Test set accuracy: %.2f%%" % (score[1]*100))



## Trying the same network replacing L2 Regularization with Dropout

In [49]:
model_2 = Sequential()

# hidden layer
model_2.add(Dense(100, activation='relu', 
                input_dim=num_pixels, 
                kernel_initializer='VarianceScaling',))
model_2.add(Dropout(0.5))

# output layer
model_2.add(Dense(10, activation='softmax'))

model_2.compile(optimizer='Adam',
             loss='categorical_crossentropy',
             metrics=['accuracy'])

model_2.fit(X_train, y_train, 
         epochs=60,
         batch_size=256, verbose=2)

scores = model_2.evaluate(X_test, y_test, batch_size=256)
print("Test set accuracy: %.2f%%" % (scores[1]*100))