In [1]:
# Reference: https://mxnet.apache.org/api/python/docs/tutorials/packages/gluon/image/mnist.html

import mxnet as mx

# Fixing the random seed
mx.random.seed(42)

mnist = mx.test_utils.get_mnist()

In [6]:
batch_size = 100

train_data = mx.io.NDArrayIter(
    mnist['train_data'], 
    mnist['train_label'], 
    batch_size, 
    shuffle = True # better to randomly shuffle training data to ensure that same labels are not feeded consequently
)

val_data = mx.io.NDArrayIter(
    mnist['test_data'], 
    mnist['test_label'], 
    batch_size
)

In [7]:
from __future__ import print_function
import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn
from mxnet import autograd as ag

### Multilayer Perceptron

In [8]:
# Define the architecture
net = nn.Sequential()

with net.name_scope():
    net.add(nn.Dense(128, activation = 'relu'))
    net.add(nn.Dense(64, activation = 'relu'))
    net.add(nn.Dense(10))

In [10]:
gpus = mx.test_utils.list_gpus()
print(gpus)

range(0, 0)


In [11]:
ctx = [mx.gpu()] if gpus else [mx.cpu(0), mx.cpu(1)]
net.initialize(mx.init.Xavier(magnitude = 2.24), ctx = ctx)

In [12]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.02})

In [13]:
%%time
epoch = 10

# Use Accuracy as the evaluation metric.
metric = mx.metric.Accuracy()
softmax_cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()

for i in range(epoch):
    
    # Reset the train data iterator.
    train_data.reset()
    
    # Loop over the train data iterator.
    for batch in train_data:
        
        # Splits train data into multiple slices along batch_axis
        # and copy each slice into a context.
        data = gluon.utils.split_and_load(batch.data[0], ctx_list = ctx, batch_axis = 0)
        
        # Splits train labels into multiple slices along batch_axis
        # and copy each slice into a context.
        label = gluon.utils.split_and_load(batch.label[0], ctx_list = ctx, batch_axis = 0)
        outputs = []
        
        # Inside training scope
        with ag.record():
            for x, y in zip(data, label):
                z = net(x)
                
                # Computes softmax cross entropy loss.
                loss = softmax_cross_entropy_loss(z, y)
                
                # Backpropagate the error for one iteration.
                loss.backward()
                outputs.append(z)

        # Updates internal evaluation
        metric.update(label, outputs)
        
        # Make one step of parameter update. Trainer needs to know the
        # batch size of data to normalize the gradient by 1/batch_size.
        trainer.step(batch.data[0].shape[0])

    # Gets the evaluation result.
    name, acc = metric.get()
    
    # Reset evaluation result to initial state.
    metric.reset()
    print('training acc at epoch %d: %s=%f'%(i, name, acc))

training acc at epoch 0: accuracy=0.781367
training acc at epoch 1: accuracy=0.899733
training acc at epoch 2: accuracy=0.914417
training acc at epoch 3: accuracy=0.923533
training acc at epoch 4: accuracy=0.931533
training acc at epoch 5: accuracy=0.937000
training acc at epoch 6: accuracy=0.942367
training acc at epoch 7: accuracy=0.946567
training acc at epoch 8: accuracy=0.950117
training acc at epoch 9: accuracy=0.953367
CPU times: user 59.7 s, sys: 6.59 s, total: 1min 6s
Wall time: 44.3 s


In [14]:
# Use Accuracy as the evaluation metric.
metric = mx.metric.Accuracy()

# Reset the validation data iterator.
val_data.reset()

# Loop over the validation data iterator.
for batch in val_data:
    
    # Splits validation data into multiple slices along batch_axis
    # and copy each slice into a context.
    data = gluon.utils.split_and_load(batch.data[0], ctx_list = # Use Accuracy as the evaluation metric.
metric = mx.metric.Accuracy()
# Reset the validation data iterator.
val_data.reset()
# Loop over the validation data iterator.
for batch in val_data:
    # Splits validation data into multiple slices along batch_axis
    # and copy each slice into a context.
    data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
    # Splits validation label into multiple slices along batch_axis
    # and copy each slice into a context.
    label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
    outputs = []
    for x in data:
        outputs.append(net(x))
    # Updates internal evaluation
    metric.update(label, outputs)
print('validation acc: %s=%f'%metric.get())
assert metric.get()[1] > 0.94ctx, batch_axis = 0)
    
    # Splits validation label into multiple slices along batch_axis
    # and copy each slice into a context.
    label = gluon.utils.split_and_load(batch.label[0], ctx_list = ctx, batch_axis = 0)
    outputs = []
    
    for x in data:
        outputs.append(net(x))
        
    # Updates internal evaluation
    metric.update(label, outputs)
    
print('validation acc: %s=%f'%metric.get())
assert metric.get()[1] > 0.94

validation acc: accuracy=0.953200


### Convolutional Neural Network

In [16]:
import mxnet.ndarray as F

class Net(gluon.Block):
    def __init__(self, **kwargs):
        super(Net, self).__init__(**kwargs)
        with self.name_scope():
            # Layers created in name_scope will inherit name space
            # from parent layer.
            self.conv1 = nn.Conv2D(20, kernel_size = (5,5))
            self.pool1 = nn.MaxPool2D(pool_size = (2,2), strides = (2,2))
            self.conv2 = nn.Conv2D(50, kernel_size = (5,5))
            self.pool2 = nn.MaxPool2D(pool_size = (2,2), strides = (2,2))
            self.fc1 = nn.Dense(500)
            self.fc2 = nn.Dense(10)

    def forward(self, x):
        x = self.pool1(F.tanh(self.conv1(x)))
        x = self.pool2(F.tanh(self.conv2(x)))
        # 0 means copy over size from corresponding dimension.
        # -1 means infer size from the rest of dimensions.
        x = x.reshape((0, -1))
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        return x

In [17]:
net = Net()

In [18]:
# Set the context on GPU is available otherwise CPU
ctx = [mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()]

In [19]:
net.initialize(mx.init.Xavier(magnitude = 2.24), ctx = ctx)
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.03})

In [20]:
metric = mx.metric.Accuracy()
softmax_cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()

for i in range(epoch):
    
    # Reset the train data iterator.
    train_data.reset()
    
    # Loop over the train data iterator.
    for batch in train_data:
        
        # Splits train data into multiple slices along batch_axis
        # and copy each slice into a context.
        data = gluon.utils.split_and_load(batch.data[0], ctx_list=ctx, batch_axis=0)
        
        # Splits train labels into multiple slices along batch_axis
        # and copy each slice into a context.
        label = gluon.utils.split_and_load(batch.label[0], ctx_list=ctx, batch_axis=0)
        outputs = []
        
        # Inside training scope
        with ag.record():
            for x, y in zip(data, label):
                z = net(x)
        
                # Computes softmax cross entropy loss.
                loss = softmax_cross_entropy_loss(z, y)
                
                # Backpropogate the error for one iteration.
                loss.backward()
                outputs.append(z)

        # Updates internal evaluation
        metric.update(label, outputs)
        
        # Make one step of parameter update. Trainer needs to know the
        # batch size of data to normalize the gradient by 1/batch_size.
        trainer.step(batch.data[0].shape[0])
    
    # Gets the evaluation result.
    name, acc = metric.get()
    
    # Reset evaluation result to initial state.
    metric.reset()
    print('training acc at epoch %d: %s=%f'%(i, name, acc))

training acc at epoch 0: accuracy=0.861983
training acc at epoch 1: accuracy=0.937683
training acc at epoch 2: accuracy=0.953133
training acc at epoch 3: accuracy=0.962083
training acc at epoch 4: accuracy=0.967867
training acc at epoch 5: accuracy=0.971417
training acc at epoch 6: accuracy=0.974233
training acc at epoch 7: accuracy=0.976317
training acc at epoch 8: accuracy=0.978400
training acc at epoch 9: accuracy=0.979900


In [21]:
# Use Accuracy as the evaluation metric.
metric = mx.metric.Accuracy()

# Reset the validation data iterator.
val_data.reset()

# Loop over the validation data iterator.
for batch in val_data:

    # Splits validation data into multiple slices along batch_axis
    # and copy each slice into a context.
    data = gluon.utils.split_and_load(batch.data[0], ctx_list = ctx, batch_axis = 0)
    
    # Splits validation label into multiple slices along batch_axis
    # and copy each slice into a context.
    label = gluon.utils.split_and_load(batch.label[0], ctx_list = ctx, batch_axis = 0)
    outputs = []
    
    for x in data:
        outputs.append(net(x))
    
    # Updates internal evaluation
    metric.update(label, outputs)
print('validation acc: %s=%f'%metric.get())
assert metric.get()[1] > 0.98

validation acc: accuracy=0.982100
