# Objective: Use Keras to create a Deep NN to classify hand written numbers #

We will be using the MNIST dataset. 

## Step 0: Load the data ##

In [1]:
import os
import struct
import numpy as np
 
def load_mnist(path, kind='train'):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path, 
                               '%s-labels-idx1-ubyte' % kind)
    images_path = os.path.join(path, 
                               '%s-images-idx3-ubyte' % kind)
        
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II', 
                                 lbpath.read(8))
        labels = np.fromfile(lbpath, 
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack(">IIII", 
                                               imgpath.read(16))
        images = np.fromfile(imgpath, 
                             dtype=np.uint8).reshape(len(labels), 784)
 
    return images, labels

In [5]:
'''
Training data
'''
X_train, y_train = load_mnist('mnist', kind='train')
print('Rows: %d, columns: %d' % (X_train.shape[0], X_train.shape[1]))

Rows: 60000, columns: 784


In [6]:
'''
Testing data
'''
X_test, y_test = load_mnist('mnist', kind='t10k')
print('Rows: %d, columns: %d' % (X_test.shape[0], X_test.shape[1]))

Rows: 10000, columns: 784


In [7]:
'''
Investigate data
'''
X_train[:10]

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

## Step 1: Data preprocessing ## 

In [4]:
'''
Cast the MNIST image array into 32 bit format
'''
import theano 

# Define config for float
theano.config.floatX = 'float32'

# Cast training and testing data into float32
X_train = X_train.astype(theano.config.floatX)
X_test = X_test.astype(theano.config.floatX)

In [12]:
'''
Convert class labels into one-hot format
'''
from keras.utils import np_utils

Using TensorFlow backend.


ImportError: cannot import name pywrap_tensorflow

In [None]:
print('First 3 labels: ', y_train[:3])

# One hot encode using to_categorical()
y_train_ohe = np_utils.to_categorical(y_train) 

print('\nFirst 3 labels (one-hot):\n', y_train_ohe[:3])