In [14]:
# Import dependencies
from __future__ import print_function
import mxnet as mx
import numpy as np
from mxnet import nd, autograd
print("Dependencies imported")


Dependencies imported


In [15]:
# Use a GPU with MXNet
ctx = mx.cpu()


In [16]:
# Get the MNIST image dataset
mnist = mx.test_utils.get_mnist()


In [17]:
# Parameters for the neural network

# Number of inputs: A 1-dimensional input consisting of a single image (28 pixels by 28 pixels)
num_inputs = 784

# Number of Outputs: Number of outputs to be predicted by the network (Digits 0-9)
num_outputs = 10

# Batch size is the number of images processed in a single batch
batch_size = 64


In [18]:
# Split the dataset into training data dn test data

def transform(data, label):
    return data.astype(np.float32)/255, label.astype(np.float32)

train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),batch_size, shuffle=False)


In [19]:
# Number of hidden neurons
num_hidden = 256

# Weights scale
weight_scale = .01


In [20]:
# Allocate weights and bias for the first layer
w_hd_1 = nd.random_normal(shape=(num_inputs, num_hidden), scale=weight_scale, ctx=ctx)
b_hd_1 = nd.random_normal(shape=num_hidden, scale=weight_scale, ctx=ctx)

In [21]:
# Allocate weights and bias for the second layer
w_hd_2 = nd.random_normal(shape=(num_hidden, num_hidden), scale=weight_scale, ctx=ctx)
b_hd_2 = nd.random_normal(shape=num_hidden, scale=weight_scale, ctx=ctx)


In [22]:
# Allocate weights and bias for the output layer
w_output = nd.random_normal(shape=(num_hidden, num_outputs), scale=weight_scale, ctx=ctx)
b_output = nd.random_normal(shape=num_outputs, scale=weight_scale, ctx=ctx)


In [23]:
# Add parameters to calculate gradients
params = [w_hd_1, b_hd_1, w_hd_2, b_hd_2, w_output, b_output]


for param in params:
    param.attach_grad()


In [24]:
# Define a ReLU activiation function for the hidden layer
def relu(X):
    return nd.maximum(X, nd.zeros_like(X))


In [25]:
# Use a softmax action function for the output layer
def softmax_cross_entropy(yhat_linear, y):
    return - nd.nansum(y * nd.log_softmax(yhat_linear), axis=0, exclude=True)


In [26]:
# Neural network model
def net(X):

    #  Compute the first hidden layer
    h1_linear = nd.dot(X, w_hd_1) + b_hd_1
    h1 = relu(h1_linear)

    #  Compute the second hidden layer
    h2_linear = nd.dot(h1, w_hd_2) + b_hd_2
    h2 = relu(h2_linear)

    #  Compute the output layer.
    yhat_linear = nd.dot(h2, w_output) + b_output
    return yhat_linear


In [27]:
# Optimizer
def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad


In [28]:
# Evaluation metric
def evaluate_accuracy(data_iterator, net):
    numerator = 0.
    denominator = 0.
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx).reshape((-1, 784))
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        numerator += nd.sum(predictions == label)
        denominator += data.shape[0]
    return (numerator / denominator).asscalar()


In [29]:
# Epochs are iterations over the full network
epochs = 10

# Learning rate parameter determines the speed at which the network learns
learning_rate = 0.001

# Defining a smooth constant for the moving loss
smoothing_constant = 0.01


In [30]:
# Train the neural network model
for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx).reshape((-1, 784))
        label = label.as_in_context(ctx)
        label_one_hot = nd.one_hot(label, 10)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label_one_hot)
        loss.backward()
        SGD(params, learning_rate)

        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0))
                       else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, moving_loss, train_accuracy, test_accuracy))


Epoch 0. Loss: 0.475494587297, Train_acc 0.8905, Test_acc 0.8912
Epoch 1. Loss: 0.288055782388, Train_acc 0.924917, Test_acc 0.9249
Epoch 2. Loss: 0.196092822246, Train_acc 0.948167, Test_acc 0.9474
Epoch 3. Loss: 0.148628893256, Train_acc 0.962083, Test_acc 0.9584
Epoch 4. Loss: 0.121844707697, Train_acc 0.96965, Test_acc 0.9642
Epoch 5. Loss: 0.104030329089, Train_acc 0.974733, Test_acc 0.9678
Epoch 6. Loss: 0.0832342584997, Train_acc 0.97825, Test_acc 0.9706
Epoch 7. Loss: 0.0766369441815, Train_acc 0.9822, Test_acc 0.9718
Epoch 8. Loss: 0.06376962288, Train_acc 0.983583, Test_acc 0.9743
Epoch 9. Loss: 0.05726662875, Train_acc 0.986417, Test_acc 0.974


In [31]:
# Create an HTML canvas to evaluate the model
from IPython.display import HTML
import cv2
import numpy as np
import base64

def classify(img):
    img = base64.b64decode(img[len('data:image/png;base64,'):])
    img = cv2.imdecode(np.fromstring(img, np.uint8),-1)
    img = cv2.resize(img[:,:,3], (28,28))
    img = nd.array(img).as_in_context(ctx).reshape((-1, 784)).astype(np.float32)/255
    return int(nd.argmax(net(img), axis=1).asnumpy()[0])

HTML(filename = "mnist.html")
