# Objective: Use Keras to create a Deep NN to classify hand written numbers #

We will be using the MNIST dataset. 

## Step 0: Load the data ##

In [3]:
import os
import struct
import numpy as np
 
def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path, 
                               '%s-labels-idx1-ubyte' % kind)
    images_path = os.path.join(path, 
                               '%s-images-idx3-ubyte' % kind)
        
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II', 
                                 lbpath.read(8))
        labels = np.fromfile(lbpath, 
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII", 
                                               imgpath.read(16))
        images = np.fromfile(imgpath, 
                             dtype=np.uint8).reshape(len(labels), 784)
 
    return images, labels

In [4]:
'''
Training data
'''
X_train, y_train = load_mnist('mnist', kind='train')
print('Rows: %d, columns: %d' % (X_train.shape[0], X_train.shape[1]))

Rows: 60000, columns: 784


In [5]:
'''
Testing data
'''
X_test, y_test = load_mnist('mnist', kind='t10k')
print('Rows: %d, columns: %d' % (X_test.shape[0], X_test.shape[1]))

Rows: 10000, columns: 784


In [6]:
'''
Investigate data
'''
X_train[:10]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

## Step 1: Data preprocessing ## 

In [7]:
'''
Cast the MNIST image array into 32 bit format
'''
import theano 

# Define config for float
theano.config.floatX = 'float32'

# Cast training and testing data into float32
X_train = X_train.astype(theano.config.floatX)
X_test = X_test.astype(theano.config.floatX)

In [9]:
'''
Convert class labels into one-hot format
'''
from keras.utils import np_utils
import pprint

print('First 3 labels: ', y_train[:3])

# One hot encode using to_categorical()
y_train_ohe = np_utils.to_categorical(y_train) 

print '\nFirst 3 labels (one-hot):\n'
pprint.pprint(y_train_ohe[:3])

('First 3 labels: ', array([5, 0, 4], dtype=uint8))

First 3 labels (one-hot):

array([[ 0.,  0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.],
       [ 1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  0.]])


## Step 2: Set up Deep NN using Keras##

* Set hyperbolic tangent as the activation function.
* Set softmax for the output layer classification.

In [11]:
'''
Deep NN set up using Keras
'''
# Necessary imports
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import SGD

np.random.seed(1)

# Feedforward Neural Network
model = Sequential()

# Add layers
# Input layer - input dimension is the number of rows it gets as input, activation function is the tanh function.
model.add(Dense(input_dim = X_train.shape[1], 
                output_dim = 50, 
                init = 'uniform', 
                activation = 'tanh'))

# Hidden layer
# Sam input dimension as output of input layer
model.add(Dense(input_dim = 50, 
                output_dim = 50, 
                init ='uniform', 
                activation= 'tanh'))

# Output layer
# Activation function is the softmax function
model.add(Dense(input_dim = 50, 
                output_dim = y_train_ohe.shape[1], 
                init = 'uniform', 
                activation = 'softmax'))

# Define optimizer - Stochastic gradient descent
sgd = SGD(lr = 0.001, # learning rate
          decay = 1e-7, # weight decay constant
          momentum = .9) # momentum learning

# Set the loss function and compile the model using the optimizer
# categorical_crossentropy is the generalization of binary cross entropy for multiclass classification problems.
model.compile(loss = 'categorical_crossentropy', 
              optimizer = sgd)

## Step 3: Train the model ##

In [13]:
'''
Train the model on the training data using .fit
'''
model.fit(X_train, 
          y_train_ohe, 
          nb_epoch = 50, # train over 50 cycles
          batch_size = 300, # 300 training samples per batch
          verbose = 1, 
          validation_split = 0.1, # reserve 10% of training data after each epoch,to check for overfitting during training
          show_accuracy = True)

Train on 54000 samples, validate on 6000 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x123db1c10>

## Step 4: Make predictions ##

In [14]:
'''
Use predict_classes
'''
y_train_pred = model.predict_classes(X_train, 
                                     verbose=0)
print('First 3 predictions: ', y_train_pred[:3])

('First 3 predictions: ', array([5, 0, 4]))


## Step 5: Performance evaluation ##

In [20]:
'''
Accuracy on training set
'''
print y_train_pred
print y_train
print X_train.shape[0]
from __future__ import division

# Computer accuracy
train_acc = np.sum(y_train == y_train_pred, axis=0) / X_train.shape[0]
print('Training accuracy: %.2f%%' % (train_acc * 100))

[5 0 4 ..., 5 6 8]
[5 0 4 ..., 5 6 8]
60000
Training accuracy: 94.60%


In [21]:
'''
Accuracy on testing set
'''
y_test_pred = model.predict_classes(X_test, verbose=0)
test_acc = np.sum(y_test == y_test_pred, axis=0) / X_test.shape[0]
print('Test accuracy: %.2f%%' % (test_acc * 100))

Test accuracy: 93.95%
