# MNIST in a bit more complext network

#### Load dependencies and make reproducible

In [1]:
import numpy as np
np.random.seed(42)

import os
import keras
from time import time
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, LeakyReLU, BatchNormalization
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


#### Load MNIST data

In [2]:
(X_train, y_train), (X_test, y_test) = mnist.load_data()

#### Basic Preprocessing
Flatten the input and then normalise it

In [3]:
X_train = X_train.reshape(60000, 784).astype('float32')
X_test = X_test.reshape(10000, 784).astype('float32')

In [4]:
X_train /= 255
X_test /= 255

#### One-hot encode the output

In [5]:
n_classes = 10
y_train = keras.utils.to_categorical(y_train, n_classes)
y_test = keras.utils.to_categorical(y_test, n_classes)

#### Neural Network architecture

The `dropout` function Dropout consists in randomly setting a fraction rate of input units to 0 at each update during training time, which helps prevent overfitting.

The `BatchNormalization` just normalizes the activations of the previous layer at each batch


In [6]:
model = Sequential()

model.add(Dense(128, input_shape = (784,), activation = 'relu'))
model.add(Dropout(0.2))

model.add(BatchNormalization())
model.add(Dense(256, activation = 'relu'))
model.add(Dropout(0.3))

model.add(BatchNormalization())
model.add(Dense(1024, activation = 'relu'))
model.add(Dropout(0.5))

model.add(BatchNormalization())
model.add(Dense(10, activation = 'softmax'))

In [7]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 128)               100480    
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 128)               512       
_________________________________________________________________
dense_2 (Dense)              (None, 256)               33024     
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
batch_normalization_2 (Batch (None, 256)               1024      
_________________________________________________________________
dense_3 (Dense)              (None, 1024)              263168    
__________

#### Callbacks

In [8]:
modelCheckpoint = ModelCheckpoint(monitor='val_acc', filepath='model_output/weights-deepnet-mnist.hdf5',
                                               save_best_only=True, mode='max')
earlyStopping = EarlyStopping(monitor='val_acc', mode='max', patience=20)


if not os.path.exists('model_output'):
    os.makedirs('model_output')
    
tensorboard = TensorBoard("logs/02_model-deepnet-mnist")

#### Configure model

In [9]:
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

#### Train!

In [10]:
history = model.fit(X_train, y_train, batch_size = 128, epochs = 5, verbose = 1,
          validation_split = 0.1, callbacks=[modelCheckpoint, earlyStopping, tensorboard])

Train on 54000 samples, validate on 6000 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


#### Test Predictions

In [12]:
saved_model = keras.models.load_model('model_output/weights-deepnet-mnist.hdf5')
predictions = saved_model.predict_classes(X_test, verbose = 2)

#### Test Final Accuracy

In [13]:
final_loss, final_acc = model.evaluate(X_test, y_test, verbose = 1)
print("Final loss: {0:.4f}, final accuracy: {1:.4f}".format(final_loss, final_acc))

Final loss: 0.0837, final accuracy: 0.9743
