# High-level MXNet Example

**In the interest of comparison; a common (custom) data-generator (called yield_mb(X, y, batchsize=64, shuffle=False)) was originally used for all other frameworks - but not for MXNet. I have reproduced the MXNet example using this same generator (wrapping the results in the mx.io.DataBatch class) to test if MXNet is faster than other frameworks just because I was using its own data-generator. This does not appear to be the case. **

In [3]:
# Parameters
EPOCHS = 10
N_CLASSES=10
BATCHSIZE = 64
LR = 0.01
MOMENTUM = 0.9
GPU = True

LOGGER_URL='msdlvm.southcentralus.cloudapp.azure.com'
LOGGER_USRENAME='admin'
LOGGER_PASSWORD='password'
LOGGER_DB='gpudata'
LOGGER_SERIES='gpu'

In [4]:
import os
from os import path
import sys
import numpy as np
import mxnet as mx
import codecs

from utils import cifar_for_library, yield_mb, create_logger, Timer
from nb_logging import NotebookLogger, output_to, error_to
from gpumon import db_log_context
import codecs

from influxdb import InfluxDBClient

In [5]:
client = InfluxDBClient(LOGGER_URL, 8086, LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB)

In [6]:
node_id = os.getenv('AZ_BATCH_NODE_ID', default='node')
task_id = os.getenv('AZ_BATCH_TASK_ID', default='mxnet')
job_id = os.getenv('AZ_BATCH_JOB_ID', default='mxnet')

In [7]:
logger = create_logger(client, node_id=node_id, task_id=task_id, job_id=job_id)

In [8]:
sys.__stdout__ = codecs.getwriter("utf-8")(sys.__stdout__.detach())

In [9]:
nb_teminal_logger = NotebookLogger(sys.stdout.session, sys.stdout.pub_thread, sys.stdout.name, sys.__stdout__)

In [10]:
rst_out = output_to(nb_teminal_logger)
rst_err = error_to(nb_teminal_logger)

INFO:gpumon.influxdb_gpu_logger:Logging GPU to Database msdlvm.southcentralus.cloudapp.azure.com
INFO:gpumon.influxdb_gpu_logger:['influxdb_gpu_logger.py', 'msdlvm.southcentralus.cloudapp.azure.com', '8086', 'admin', 'password', 'gpudata', 'gpu', '--task_id=mxnet', '--node_id=node', '--job_id=mxnet']


In [11]:
print("OS: ", sys.platform)
print("Python: ", sys.version)
print("Numpy: ", np.__version__)
print("MXNet: ", mx.__version__)

OS:  linux
Python:  3.5.2 (default, Nov 23 2017, 16:37:01) 
[GCC 5.4.0 20160609]
Numpy:  1.13.3
MXNet:  1.0.0


In [12]:
data_path = path.join(os.getenv('AZ_BATCHAI_INPUT_DATASET'), 'cifar-10-batches-py')

In [13]:
def create_symbol():
    data = mx.symbol.Variable('data')
    # size = [(old-size - kernel + 2*padding)/stride]+1
    # if kernel = 3, pad with 1 either side
    conv1 = mx.symbol.Convolution(data=data, num_filter=50, pad=(1,1), kernel=(3,3))
    relu1 = mx.symbol.Activation(data=conv1, act_type="relu")
    conv2 = mx.symbol.Convolution(data=relu1, num_filter=50, pad=(1,1), kernel=(3,3))
    relu2 = mx.symbol.Activation(data=conv2, act_type="relu")
    pool1 = mx.symbol.Pooling(data=relu2, pool_type="max", kernel=(2,2), stride=(2,2))
    drop1 = mx.symbol.Dropout(data=pool1, p=0.25)
    
    conv3 = mx.symbol.Convolution(data=drop1, num_filter=100, pad=(1,1), kernel=(3,3))
    relu3 = mx.symbol.Activation(data=conv3, act_type="relu")
    conv4 = mx.symbol.Convolution(data=relu3, num_filter=100, pad=(1,1), kernel=(3,3))
    relu4 = mx.symbol.Activation(data=conv4, act_type="relu")
    pool2 = mx.symbol.Pooling(data=relu4, pool_type="max", kernel=(2,2), stride=(2,2))
    drop2 = mx.symbol.Dropout(data=pool2, p=0.25)
           
    flat1 = mx.symbol.Flatten(data=drop2)
    fc1 = mx.symbol.FullyConnected(data=flat1, num_hidden=512)
    relu7 = mx.symbol.Activation(data=fc1, act_type="relu")
    drop4 = mx.symbol.Dropout(data=relu7, p=0.5)
    fc2 = mx.symbol.FullyConnected(data=drop4, num_hidden=N_CLASSES) 
    
    input_y = mx.symbol.Variable('softmax_label')  
    m = mx.symbol.SoftmaxOutput(data=fc2, label=input_y, name="softmax")
    return m

In [14]:
def init_model(m):
    if GPU:
        ctx = [mx.gpu(0)]
    else:
        ctx = mx.cpu()
    
    mod = mx.mod.Module(context=ctx, symbol=m)
    mod.bind(data_shapes=[('data', (BATCHSIZE, 3, 32, 32))],
             label_shapes=[('softmax_label', (BATCHSIZE,))])

    # Glorot-uniform initializer
    mod.init_params(initializer=mx.init.Xavier(rnd_type='uniform'))
    mod.init_optimizer(optimizer='sgd', 
                       optimizer_params=(('learning_rate', LR), ('momentum', MOMENTUM), ))
    return mod

In [15]:
%%time
# Data into format for library
x_train, x_test, y_train, y_test = cifar_for_library(data_path, channel_first=True)

# Load data-iterator
#train_iter = mx.io.NDArrayIter(x_train, y_train, BATCHSIZE, shuffle=True)
# Use custom iterator instead of mx.io.NDArrayIter() for consistency
# Wrap as DataBatch class
wrapper_db = lambda args: mx.io.DataBatch(data=[mx.nd.array(args[0])], label=[mx.nd.array(args[1])])

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)

Preparing train set...
Preparing test set...
Done.
(50000, 3, 32, 32) (10000, 3, 32, 32) (50000,) (10000,)
float32 float32 int32 int32
CPU times: user 1.1 s, sys: 724 ms, total: 1.83 s
Wall time: 1.82 s


In [16]:
%%time
# Load symbol
sym = create_symbol()

CPU times: user 4 ms, sys: 0 ns, total: 4 ms
Wall time: 2.22 ms


In [None]:
%%time
# Initialise model
model = init_model(sym)

CPU times: user 2.84 s, sys: 2.55 s, total: 5.39 s
Wall time: 13.5 s


In [None]:
with Timer() as t:
    with db_log_context(LOGGER_URL, '8086', LOGGER_USRENAME, LOGGER_PASSWORD, LOGGER_DB, LOGGER_SERIES, node_id=node_id, task_id=task_id, job_id=job_id):
        # Train and log accuracy
        metric = mx.metric.create('acc')
        for j in range(EPOCHS):
            #train_iter.reset()
            metric.reset()
            #for batch in train_iter:
            for batch in map(wrapper_db, yield_mb(x_train, y_train, BATCHSIZE, shuffle=True)):
                model.forward(batch, is_train=True) 
                model.update_metric(metric, batch.label)
                model.backward()              
                model.update()
            print('Epoch %d, Training %s' % (j, metric.get()))
print('Training took %.03f sec.' % t.interval)
logger('log', training_duration=t.interval)

Epoch 0, Training ('accuracy', 0.32726472471190782)


In [None]:
%%time
y_guess = model.predict(mx.io.NDArrayIter(x_test, batch_size=BATCHSIZE, shuffle=False))
y_guess = np.argmax(y_guess.asnumpy(), axis=-1)

In [None]:
y_truth=y_test

In [24]:
acc=sum(y_guess == y_truth)/len(y_guess)
print("Accuracy: ", acc)
logger('log', test_accuracy=acc)

Accuracy:  0.7688
