In [15]:
# notebook source: 
# https://github.com/zackchase/mxnet-the-straight-dope/blob/master/chapter04_convolutional-neural-networks/cnn-gluon.ipynb

In [16]:
# network parameters
kernel_size = (5,5)
pool_size = (2,2)
filters_layer1 = 32
filters_layer2 = 64
units_dense = 1024
# activation='relu'

# train parameters
batch_size = 128
epochs = 10
dropout = 0.5
# loss: softmax
# optimizer: adam

# Convolutional Neural Networks in ``gluon``

Now let's see how succinctly we can express a convolutional neural network using ``gluon``. You might be relieved to find out that this too requires hardly any more code than a logistic regression. 

In [17]:
from __future__ import print_function
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
mx.random.seed(1)

## Set the context

In [18]:
# to change CPU to GPU set mx.gpu instead of mx.cpu
ctx = mx.cpu()
ctx

cpu(0)

## Grab the MNIST dataset

In [19]:
#num_inputs = 784
#num_outputs = 10
def transform(data, label):
    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)
train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

  label = np.fromstring(fin.read(), dtype=np.uint8).astype(np.int32)
  data = np.fromstring(fin.read(), dtype=np.uint8)


## Define a convolutional neural network

Again, a few lines here is all we need in order to change the model. Let's add a couple of convolutional layers using ``gluon.nn``.

In [20]:
# same network as keras demo
num_outputs=10
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=filters_layer1, kernel_size=kernel_size[0], activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=pool_size[0])) # , strides=2)
    net.add(gluon.nn.Conv2D(channels=filters_layer2, kernel_size=kernel_size[0], activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=pool_size[0]))
    # The Flatten layer collapses all axis, except the first one, into one axis.
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(units_dense, activation="relu"))
    net.add(gluon.nn.Dropout(dropout))
    net.add(gluon.nn.Dense(num_outputs)) #softmax activation is automatically derived by the loss function used

## Parameter initialization


In [21]:
ctx = mx.cpu(0)
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

## Softmax cross-entropy Loss

In [22]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

## Optimizer

In [23]:
trainer = gluon.Trainer(net.collect_params(), 'Adam')#, {'learning_rate': .1})

## Write evaluation loop to calculate accuracy

In [24]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

## Training Loop

In [25]:
import time
smoothing_constant = .01

for e in range(epochs):
    start = time.time()
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        
        # commented because they slow down the training
        ##########################
        #  Keep a moving average of the losses
        ##########################
        #curr_loss = nd.mean(loss).asscalar()
        #moving_loss = (curr_loss if ((i == 0) and (e == 0)) 
        #               else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
        
    #test_accuracy = evaluate_accuracy(test_data, net)
    #train_accuracy = evaluate_accuracy(train_data, net)
    #print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))  
    print(time.time()-start)
    print('seconds per epoch')

56.772982120513916
seconds per epoch
55.472376346588135
seconds per epoch
55.119585037231445
seconds per epoch
54.95815587043762
seconds per epoch
55.217846155166626
seconds per epoch
58.89562726020813
seconds per epoch
62.48016691207886
seconds per epoch
64.74417519569397
seconds per epoch
57.583136796951294
seconds per epoch
59.71279954910278
seconds per epoch


In [26]:
np.shape(data)

(96, 1, 28, 28)

In [27]:
test_accuracy = evaluate_accuracy(test_data, net)
train_accuracy = evaluate_accuracy(train_data, net)
print("Train_acc %s, Test_acc %s" % (train_accuracy, test_accuracy))  


Train_acc 0.9981333333333333, Test_acc 0.993


In [28]:
# measure deploy speed
for i, (data, label) in enumerate(test_data):
    if i>10:
        break
    start = time.time()
    data = data.as_in_context(ctx)
    label = label.as_in_context(ctx)
    output = net(data)
    predictions = nd.argmax(output, axis=1)
    duration = time.time() - start
    print('Number of predictions per second: ')
    print(np.shape(data)[0]/duration)

Number of predictions per second: 
63814.4433614644
Number of predictions per second: 
63746.24934694847
Number of predictions per second: 
63844.79866809371
Number of predictions per second: 
127613.71808889945
Number of predictions per second: 
63791.695817490494
Number of predictions per second: 
127583.39163498099
Number of predictions per second: 
127644.05896338564
Number of predictions per second: 
128438.01722488039
Number of predictions per second: 
63761.390973871734
Number of predictions per second: 
127553.07959135187
Number of predictions per second: 
127462.22981956316


In [29]:
# save and load model (! only weights), check same prediction
# source: http://gluon.mxnet.io/chapter03_deep-neural-networks/serialization.html
filename = "testnet.params"
net.save_params(filename)
print(net(data[0:1]))

net2 = gluon.nn.Sequential()
with net2.name_scope():
    net2.add(gluon.nn.Conv2D(channels=filters_layer1, kernel_size=kernel_size[0], activation='relu'))
    net2.add(gluon.nn.MaxPool2D(pool_size=pool_size[0]))
    net2.add(gluon.nn.Conv2D(channels=filters_layer2, kernel_size=kernel_size[0], activation='relu'))
    net2.add(gluon.nn.MaxPool2D(pool_size=pool_size[0]))
    # The Flatten layer collapses all axis, except the first one, into one axis.
    net2.add(gluon.nn.Flatten())
    net2.add(gluon.nn.Dense(units_dense, activation="relu"))
    net2.add(gluon.nn.Dropout(dropout))
    net2.add(gluon.nn.Dense(num_outputs)) #softmax activation is automatically derived by the loss function used

net2.load_params(filename, ctx=ctx)
net2(data[0:1])


[[ 15.094281  -11.709779   -9.53958    -7.927335   -6.6310296  -3.9149308
   -3.0588708  -9.9343815  -0.4707301  -1.846066 ]]
<NDArray 1x10 @cpu(0)>



[[ 15.094281  -11.709779   -9.53958    -7.927335   -6.6310296  -3.9149308
   -3.0588708  -9.9343815  -0.4707301  -1.846066 ]]
<NDArray 1x10 @cpu(0)>

## Conclusion

You might notice that by using ``gluon``, we get code that runs much faster whether on CPU or GPU. That's largely because ``gluon`` can call down to highly optimized layers that have been written in C++. 