# High-level MXNet Example

**In the interest of comparison; a common (custom) data-generator (called yield_mb(X, y, batchsize=64, shuffle=False)) was originally used for all other frameworks - but not for MXNet. I have reproduced the MXNet example using this same generator (wrapping the results in the mx.io.DataBatch class) to test if MXNet is faster than other frameworks just because I was using its own data-generator. This does not appear to be the case. **

In [1]:
import os
import sys
import numpy as np
import mxnet as mx
import json
from common.params import *
from common.utils import *

In [2]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("MXNet: ", mx.__version__)
print("GPU: ", get_gpu_name())

OS:  linux
Python:  3.6.4 |Anaconda, Inc.| (default, Jan 16 2018, 18:10:19) 
[GCC 7.2.0]
Numpy:  1.14.0
MXNet:  1.0.0
GPU:  ['Tesla P100-PCIE-16GB', 'Tesla P100-PCIE-16GB']


In [3]:
def create_symbol():
    data = mx.symbol.Variable('data')
    # size = [(old-size - kernel + 2*padding)/stride]+1
    # if kernel = 3, pad with 1 either side
    conv1 = mx.symbol.Convolution(data=data, num_filter=50, pad=(1,1), kernel=(3,3))
    relu1 = mx.symbol.Activation(data=conv1, act_type="relu")
    conv2 = mx.symbol.Convolution(data=relu1, num_filter=50, pad=(1,1), kernel=(3,3))
    pool1 = mx.symbol.Pooling(data=conv2, pool_type="max", kernel=(2,2), stride=(2,2))
    relu2 = mx.symbol.Activation(data=pool1, act_type="relu")
    drop1 = mx.symbol.Dropout(data=relu2, p=0.25)
    
    conv3 = mx.symbol.Convolution(data=drop1, num_filter=100, pad=(1,1), kernel=(3,3))
    relu3 = mx.symbol.Activation(data=conv3, act_type="relu")
    conv4 = mx.symbol.Convolution(data=relu3, num_filter=100, pad=(1,1), kernel=(3,3))
    pool2 = mx.symbol.Pooling(data=conv4, pool_type="max", kernel=(2,2), stride=(2,2))
    relu4 = mx.symbol.Activation(data=pool2, act_type="relu")
    drop2 = mx.symbol.Dropout(data=relu4, p=0.25)
           
    flat1 = mx.symbol.Flatten(data=drop2)
    fc1 = mx.symbol.FullyConnected(data=flat1, num_hidden=512)
    relu7 = mx.symbol.Activation(data=fc1, act_type="relu")
    drop4 = mx.symbol.Dropout(data=relu7, p=0.5)
    fc2 = mx.symbol.FullyConnected(data=drop4, num_hidden=N_CLASSES) 
    
    input_y = mx.symbol.Variable('softmax_label')  
    m = mx.symbol.SoftmaxOutput(data=fc2, label=input_y, name="softmax")
    return m

In [4]:
def init_model(m):
    if GPU:
        ctx = [mx.gpu(0)]
    else:
        ctx = mx.cpu()
    
    mod = mx.mod.Module(context=ctx, symbol=m)
    mod.bind(data_shapes=[('data', (BATCHSIZE, 3, 32, 32))],
             label_shapes=[('softmax_label', (BATCHSIZE,))])

    # Glorot-uniform initializer
    mod.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))
    mod.init_optimizer(optimizer='sgd', 
                       optimizer_params=(('learning_rate', LR), ('momentum', MOMENTUM), ))
    return mod

In [5]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(channel_first=True)

# Load data-iterator
#train_iter = mx.io.NDArrayIter(x_train, y_train, BATCHSIZE, shuffle=True)
# Use custom iterator instead of mx.io.NDArrayIter() for consistency
# Wrap as DataBatch class
wrapper_db = lambda args: mx.io.DataBatch(data=[mx.nd.array(args[0])], label=[mx.nd.array(args[1])])

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 637 ms, sys: 617 ms, total: 1.25 s
Wall time: 1.25 s


In [6]:
%%time
# Load symbol
sym = create_symbol()

CPU times: user 2.45 ms, sys: 89 µs, total: 2.54 ms
Wall time: 1.95 ms


In [7]:
%%time
# Initialise model
model = init_model(sym)

CPU times: user 2.51 s, sys: 883 ms, total: 3.4 s
Wall time: 3.7 s


In [8]:
results=dict()
with Timer() as t:
    # Train and log accuracy
    metric = mx.metric.create('acc')
    for j in range(EPOCHS):
        #train_iter.reset()
        metric.reset()
        #for batch in train_iter:
        for batch in map(wrapper_db, yield_mb(x_train, y_train, BATCHSIZE, shuffle=True)):
            model.forward(batch, is_train=True) 
            model.update_metric(metric, batch.label)
            model.backward()              
            model.update()
        print('Epoch %d, Training %s' % (j, metric.get()))
print('Training took %.03f sec.' % t.interval)
results['training duration']=t.interval

Epoch 0, Training ('accuracy', 0.331145966709347)
Epoch 1, Training ('accuracy', 0.50238076184379)
Epoch 2, Training ('accuracy', 0.5883282650448144)
Epoch 3, Training ('accuracy', 0.6425056017925737)
Epoch 4, Training ('accuracy', 0.687560019206146)
Epoch 5, Training ('accuracy', 0.7175496158770807)
Epoch 6, Training ('accuracy', 0.7432778489116517)
Epoch 7, Training ('accuracy', 0.7682858514724712)
Epoch 8, Training ('accuracy', 0.7858314660691421)
Epoch 9, Training ('accuracy', 0.803377080665813)
Training took 52.344 sec.


In [9]:
%%time
y_guess = model.predict(mx.io.NDArrayIter(x_test, batch_size=BATCHSIZE, shuffle=False))
y_guess = np.argmax(y_guess.asnumpy(), axis=-1)

CPU times: user 273 ms, sys: 205 ms, total: 477 ms
Wall time: 308 ms


In [10]:
acc=sum(y_guess == y_test)/float(len(y_guess))
print("Accuracy: ", acc)
results['accuracy']=t.interval

Accuracy:  0.7747


In [11]:
with open('mxnet_cnn_{}.json'.format(get_gpu_name()[0].split(' ')[1].split('-')[0]), 'w') as f:
    json.dump(results, f)