# Convolutional Neural Networks in ``gluon`` with MXNET
copied from MXNET github


In [1]:
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon


## Set the "context" for gpu or cpu. 
For this run on my Mac => set to CPU

In [2]:
# ctx = mx.gpu()
ctx = mx.cpu()

#### get MINST data the MXNET way

In [3]:
batch_size = 64
num_inputs = 784
num_outputs = 10

# notice that the data portion is put first 
def transform(data, label):
    return nd.transpose(data.astype(np.float32), (2,0,1))/255, label.astype(np.float32)

train_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = gluon.data.DataLoader(gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

## Define a convolutional neural network

Notice that the setting up the model/network seems similar with Keras

In [4]:
num_fc = 512
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Conv2D(channels=32, kernel_size=3, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))            
    net.add(gluon.nn.Conv2D(channels=64, kernel_size=5, activation='relu'))
    net.add(gluon.nn.MaxPool2D(pool_size=2, strides=2))
    # The Flatten layer collapses all axis, except the first one, into one axis.
    net.add(gluon.nn.Flatten())
    net.add(gluon.nn.Dense(num_fc, activation="relu"))
    net.add(gluon.nn.Dropout(0.5))
    net.add(gluon.nn.Dense(num_outputs))

## Parameter initialization


In [5]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

## Softmax cross-entropy Loss

In [6]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

## Optimizer

In [7]:
trainer = gluon.Trainer(net.collect_params(), 'adam')

## Write evaluation loop to calculate accuracy

In [8]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

## Training Loop

In [9]:
epochs = 10
smoothing_constant = .01
print('started training loop...')

for e in range(epochs):
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        
        ##########################
        #  Keep a moving average of the losses
        ##########################
        curr_loss = nd.mean(loss).asscalar()
        moving_loss = (curr_loss if ((i == 0) and (e == 0)) 
                       else (1 - smoothing_constant) * moving_loss + smoothing_constant * curr_loss)
        
    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))    

started training loop...
Epoch 0. Loss: 0.07305435390137117, Train_acc 0.9875166666666667, Test_acc 0.9881
Epoch 1. Loss: 0.0458467309410016, Train_acc 0.9919, Test_acc 0.9906
Epoch 2. Loss: 0.033714119708445955, Train_acc 0.9938833333333333, Test_acc 0.9927
Epoch 3. Loss: 0.030325592632488445, Train_acc 0.99575, Test_acc 0.9925
Epoch 4. Loss: 0.02110378609326964, Train_acc 0.99625, Test_acc 0.9913
Epoch 5. Loss: 0.017430693426138823, Train_acc 0.9977166666666667, Test_acc 0.9932
Epoch 6. Loss: 0.015634777734943508, Train_acc 0.9978333333333333, Test_acc 0.9922
Epoch 7. Loss: 0.012822420199208215, Train_acc 0.9979333333333333, Test_acc 0.9921
Epoch 8. Loss: 0.009639091759004723, Train_acc 0.999, Test_acc 0.9938
Epoch 9. Loss: 0.012779313164300107, Train_acc 0.9991333333333333, Test_acc 0.9945
