# MNIST classification using Keras

## Load the dataset

In [1]:
import tensorflow as tf
from tensorflow import keras

(X_train_full, y_train_full), (X_test_full, y_test_full) = keras.datasets.mnist.load_data()

## Create a validation set & scale the data

In [2]:
import numpy as np

boundary = 50000
scale = 255.0

X_train = (X_train_full[:boundary]/scale).astype(np.float32)
y_train = y_train_full[:boundary].astype(np.int8)

X_val = (X_train_full[boundary:]/scale).astype(np.float32)
y_val = y_train_full[boundary:].astype(np.int8)

X_test = (X_test_full / scale).astype(np.float32)
y_test = y_test_full.astype(np.int8)

print('\nTrain')
print(X_train.shape, X_train.dtype)
print(y_train.shape, y_train.dtype)

print('\nVal')
print(X_val.shape, X_val.dtype)
print(y_val.shape, y_val.dtype)

print('\nTest')
print(X_test.shape, X_test.dtype)
print(y_test.shape, y_test.dtype)

print('\nMax values:')
print(X_train.max())
print(X_val.max())
print(X_test.max())


Train
(50000, 28, 28) float32
(50000,) int8

Val
(10000, 28, 28) float32
(10000,) int8

Test
(10000, 28, 28) float32
(10000,) int8

Max values:
1.0
1.0
1.0


## Build the neural network structure

In [10]:
num_neurons = 300

model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=X_train.shape[1:]))
model.add(keras.layers.Dense(num_neurons, activation='relu'))
model.add(keras.layers.Dense(num_neurons, activation='relu'))
model.add(keras.layers.Dense(num_neurons, activation='relu'))
model.add(keras.layers.Dense(10, activation='softmax')) # predict prob of being of class

model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 300)               235500    
_________________________________________________________________
dense_1 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_2 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_3 (Dense)              (None, 10)                3010      
Total params: 419,110
Trainable params: 419,110
Non-trainable params: 0
_________________________________________________________________


## Compile the model

In [11]:
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=keras.optimizers.SGD(lr=0.05),
              metrics=['accuracy'])

## Setup TensorBoard

### Setup the log files

In [12]:
import os
import time

root_logdir = os.path.join(os.curdir, 'my_logs')  # Specify where to store the log files

def get_run_log_dir(logdir):
    run_id = time.strftime('run_%Y_%m_%d-%H_%M_%S')  # Use the date/time so all logs are kept
    return os.path.join(logdir, run_id)

## Setup callbacks

In [13]:
checkpoint_cb = keras.callbacks.ModelCheckpoint('best_mnist_model.h5', save_best_only=True)
tensorboard_cb = keras.callbacks.TensorBoard(get_run_log_dir(root_logdir))

## Train the model

In [15]:
history = model.fit(X_train, y_train, 
                    epochs=30, 
                    validation_data=(X_val, y_val),
                    callbacks=[tensorboard_cb, checkpoint_cb])

Train on 50000 samples, validate on 10000 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


## Save the model (including weights)

In [16]:
model_filename = 'mnist_keras_model.h5'

model.save(model_filename)

## Load the model (including weights)

In [17]:
model = keras.models.load_model('best_mnist_model.h5')

## Evaluate the model

In [20]:
print(model.evaluate(X_train, y_train, verbose=0))
print(model.evaluate(X_val, y_val, verbose=0))
print(model.evaluate(X_test, y_test, verbose=0))

[0.01641685505398549, 0.99536]
[0.07758548712676856, 0.9784]
[0.06852413096951786, 0.9782]
