In [1]:
from mxnet.gluon import nn
from mxnet import autograd
from mxnet import gluon
from mxnet import nd

import utils

net = nn.Sequential()
with net.name_scope():
    net.add(nn.Conv2D(channels=20, kernel_size=5))
    net.add(nn.BatchNorm(axis=1))
    net.add(nn.Activation(activation='relu'))
    net.add(nn.MaxPool2D(pool_size=2, strides=2))

    net.add(nn.Conv2D(channels=50, kernel_size=3))
    net.add(nn.BatchNorm(axis=1))
    net.add(nn.Activation(activation='relu'))
    net.add(nn.MaxPool2D(pool_size=2, strides=2))
    net.add(nn.Flatten())

    net.add(nn.Dense(128, activation='relu'))
    net.add(nn.Dense(10))

In [2]:
ctx = utils.try_gpu()
net.initialize(ctx=ctx)
batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.2})

In [3]:
for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        label = label.as_in_context(ctx)
        with autograd.record():
            output = net(data.as_in_context(ctx))
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(batch_size)

        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)

    test_acc = utils.evaluate_accuracy(test_data, net, ctx)
    print('Epoch %d , Loss: %f, Train acc: %f, Test acc: %f' %(
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc))

Epoch 0 , Loss: 0.583778, Train acc: 0.783344, Test acc: 0.852344
Epoch 1 , Loss: 0.360016, Train acc: 0.867686, Test acc: 0.885840
Epoch 2 , Loss: 0.305558, Train acc: 0.888281, Test acc: 0.889160
Epoch 3 , Loss: 0.277306, Train acc: 0.897645, Test acc: 0.902832
Epoch 4 , Loss: 0.252794, Train acc: 0.907281, Test acc: 0.894141
