In [4]:
import mxnet as mx
mnist = mx.test_utils.get_mnist()
mx.random.seed(42)
ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu() 

We’ll configure the data iterator to feed examples in batches of 100. Keep in mind that each example is a 28x28 grayscale image and the corresponding label.

Image batches are commonly represented by a 4-D array with shape (batch_size, num_channels, width, height). For the MNIST dataset, since the images are grayscale, there is only one color channel. 

In [12]:
size=100
train=mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], size,shuffle=True)
val_iter=mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], size)


The first approach makes use of a Multilayer Perceptron to solve this problem. We’ll define the MLP using MXNet’s imperative approach.

MLPs consist of several fully connected layers. A fully connected layer or FC layer for short, is one where each neuron in the layer is connected to every neuron in its preceding layer. From a linear algebra perspective, an FC layer applies an affine transform to the n x m input matrix X and outputs a matrix Y of size n x k, where k is the number of neurons in the FC layer. k is also referred to as the hidden size. The output Y is computed according to the equation Y = W X + b. The FC layer has two learnable parameters, the m x k weight matrix W and the m x 1 bias vector b.

In [13]:
data = mx.sym.var('data')
data = mx.sym.flatten(data=data)

In [14]:
fc1  = mx.sym.FullyConnected(data=data, num_hidden=128)
act1 = mx.sym.Activation(data=fc1, act_type="relu")

In [15]:
fc2  = mx.sym.FullyConnected(data=act1, num_hidden = 64)
act2 = mx.sym.Activation(data=fc2, act_type="relu")

In [16]:
fc3  = mx.sym.FullyConnected(data=act2, num_hidden=10)
# Softmax with cross entropy loss
mlp  = mx.sym.SoftmaxOutput(data=fc3, name='softmax')

In [18]:
import logging
logging.getLogger().setLevel(logging.DEBUG)  # logging to stdout
# create a trainable module on compute context
mlp_model = mx.mod.Module(symbol=mlp, context=ctx)
mlp_model.fit(train,  # train data
              eval_data=val_iter,  # validation data
              optimizer='sgd',  # use SGD to train
              optimizer_params={'learning_rate':0.1},  # use fixed learning rate
              eval_metric='acc',  # report accuracy during training
              batch_end_callback = mx.callback.Speedometer(size, 100), # output progress for each 100 data batches
              num_epoch=10)  # train for at most 10 dataset passes

INFO:root:Epoch[0] Batch [100]	Speed: 62277.34 samples/sec	accuracy=0.107525
INFO:root:Epoch[0] Batch [200]	Speed: 65108.83 samples/sec	accuracy=0.113200
INFO:root:Epoch[0] Batch [300]	Speed: 65534.46 samples/sec	accuracy=0.111300
INFO:root:Epoch[0] Batch [400]	Speed: 63459.77 samples/sec	accuracy=0.114400
INFO:root:Epoch[0] Batch [500]	Speed: 63865.03 samples/sec	accuracy=0.135500
INFO:root:Epoch[0] Train-accuracy=0.218889
INFO:root:Epoch[0] Time cost=0.952
INFO:root:Epoch[0] Validation-accuracy=0.290300
INFO:root:Epoch[1] Batch [100]	Speed: 61513.32 samples/sec	accuracy=0.452772
INFO:root:Epoch[1] Batch [200]	Speed: 63460.83 samples/sec	accuracy=0.695200
INFO:root:Epoch[1] Batch [300]	Speed: 53051.63 samples/sec	accuracy=0.779100
INFO:root:Epoch[1] Batch [400]	Speed: 54493.21 samples/sec	accuracy=0.810700
INFO:root:Epoch[1] Batch [500]	Speed: 56648.24 samples/sec	accuracy=0.828600
INFO:root:Epoch[1] Train-accuracy=0.844343
INFO:root:Epoch[1] Time cost=1.066
INFO:root:Epoch[1] Validat

In [20]:
test_iter = mx.io.NDArrayIter(mnist['test_data'], None, size)
prob = mlp_model.predict(test_iter)
assert prob.shape == (10000, 10)

In [21]:
test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], size)
# predict accuracy of mlp
acc = mx.metric.Accuracy()
mlp_model.score(test_iter, acc)
print(acc)
assert acc.get()[1] > 0.96, "Achieved accuracy (%f) is lower than expected (0.96)" % acc.get()[1]

EvalMetric: {'accuracy': 0.9701}


In [22]:
data = mx.sym.var('data')
# first conv layer
conv1 = mx.sym.Convolution(data=data, kernel=(5,5), num_filter=20)
tanh1 = mx.sym.Activation(data=conv1, act_type="tanh")
pool1 = mx.sym.Pooling(data=tanh1, pool_type="max", kernel=(2,2), stride=(2,2))
# second conv layer
conv2 = mx.sym.Convolution(data=pool1, kernel=(5,5), num_filter=50)
tanh2 = mx.sym.Activation(data=conv2, act_type="tanh")
pool2 = mx.sym.Pooling(data=tanh2, pool_type="max", kernel=(2,2), stride=(2,2))
# first fullc layer
flatten = mx.sym.flatten(data=pool2)
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=500)
tanh3 = mx.sym.Activation(data=fc1, act_type="tanh")
# second fullc
fc2 = mx.sym.FullyConnected(data=tanh3, num_hidden=10)
# softmax loss
lenet = mx.sym.SoftmaxOutput(data=fc2, name='softmax')

In [25]:
lenet_model = mx.mod.Module(symbol=lenet, context=ctx)
# train with the same
lenet_model.fit(train,
                eval_data=val_iter,
                optimizer='sgd',
                optimizer_params={'learning_rate':0.1},
                eval_metric='acc',
                batch_end_callback = mx.callback.Speedometer(size, 100),
                num_epoch=10)

INFO:root:Epoch[0] Batch [100]	Speed: 1101.60 samples/sec	accuracy=0.107327
INFO:root:Epoch[0] Batch [200]	Speed: 1106.22 samples/sec	accuracy=0.113200
INFO:root:Epoch[0] Batch [300]	Speed: 856.91 samples/sec	accuracy=0.111300
INFO:root:Epoch[0] Batch [400]	Speed: 898.94 samples/sec	accuracy=0.114400
INFO:root:Epoch[0] Batch [500]	Speed: 895.57 samples/sec	accuracy=0.107500
INFO:root:Epoch[0] Train-accuracy=0.116869
INFO:root:Epoch[0] Time cost=63.378
INFO:root:Epoch[0] Validation-accuracy=0.113500
INFO:root:Epoch[1] Batch [100]	Speed: 865.64 samples/sec	accuracy=0.178218
INFO:root:Epoch[1] Batch [200]	Speed: 828.52 samples/sec	accuracy=0.659200
INFO:root:Epoch[1] Batch [300]	Speed: 850.66 samples/sec	accuracy=0.867000
INFO:root:Epoch[1] Batch [400]	Speed: 861.26 samples/sec	accuracy=0.903700
INFO:root:Epoch[1] Batch [500]	Speed: 855.82 samples/sec	accuracy=0.924200
INFO:root:Epoch[1] Train-accuracy=0.937677
INFO:root:Epoch[1] Time cost=70.732
INFO:root:Epoch[1] Validation-accuracy=0.9

In [26]:
test_iter = mx.io.NDArrayIter(mnist['test_data'], None, size)
prob = lenet_model.predict(test_iter)
test_iter = mx.io.NDArrayIter(mnist['test_data'], mnist['test_label'], size)
# predict accuracy for lenet
acc = mx.metric.Accuracy()
lenet_model.score(test_iter, acc)
print(acc)
assert acc.get()[1] > 0.98, "Achieved accuracy (%f) is lower than expected (0.98)" % acc.get()[1]

EvalMetric: {'accuracy': 0.9895}
