In [1]:
from __future__ import print_function
import mxnet as mx
import numpy as np
from mxnet import nd, autograd, gluon

In [2]:
##load MNIST data
num_inputs = 784
num_outputs = 10
batch_size = 64
num_examples = 60000
def transform(data, label):
    return data.astype(np.float32)/255, label.astype(np.float32)
train_data = gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

In [51]:
##Define cpu or gpu index
data_ctx = mx.cpu(0)
model_ctx = mx.gpu(0)
model_seq_ctx = mx.gpu(1)

### 1. Define as gluon(gloun.Block)

In [4]:
class MLP(gluon.Block):
    def __init__(self,**kwargs):
        super(MLP,self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(128,activation="relu")
            self.dense1 = gluon.nn.Dense(64,activation="relu")
            self.dense2 = gluon.nn.Dense(10)
    
    def forward(self,x):
        x = nd.relu(self.dense0(x))
        x = nd.relu(self.dense1(x))
        x = self.dense2(x)
        return x

In [5]:
net = MLP()
net.collect_params

<bound method Block.collect_params of MLP(
  (dense0): Dense(None -> 128, Activation(relu))
  (dense1): Dense(None -> 64, Activation(relu))
  (dense2): Dense(None -> 10, linear)
)>

In [34]:
net.dense0.weight.data().shape

(128, 784)

In [6]:
## initilize param
net.collect_params().initialize(mx.init.Normal(),ctx=model_ctx)

In [27]:
##Test the model
data = nd.ones((1,784))
data.shape
out = net(data.as_in_context(model_ctx)) ## setting the device info

In [28]:
out.shape

(1, 10)

In [8]:
##Define softmax cross-entropy loss
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [40]:
#Define optimizer
trainer = gluon.Trainer(net.collect_params(),optimizer='sgd',optimizer_params={'learning_rate':0.01})

In [69]:
##Define Evaluation metric
def evaluate_accuracy(data_iterator, net,gpu_init):
    acc = mx.metric.Accuracy()
    for i, (data,label) in enumerate(data_iterator):
        data = data.as_in_context(gpu_init).reshape((-1,784))
        label = label.as_in_context(gpu_init)
        output = net(data)
        predictions = nd.argmax(output,axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [41]:
epochs = 10
smoothing_constant = .01

for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()


    test_accuracy = evaluate_accuracy(test_data, net,model_ctx)
    train_accuracy = evaluate_accuracy(train_data, net,model_ctx)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))

Epoch 0. Loss: 2.3015143542, Train_acc 0.112366666667, Test_acc 0.1135
Epoch 1. Loss: 2.29972713038, Train_acc 0.112366666667, Test_acc 0.1135
Epoch 2. Loss: 2.29102112134, Train_acc 0.215416666667, Test_acc 0.2157
Epoch 3. Loss: 2.03063560117, Train_acc 0.448716666667, Test_acc 0.4529
Epoch 4. Loss: 1.05836072785, Train_acc 0.76565, Test_acc 0.7726
Epoch 5. Loss: 0.652320930799, Train_acc 0.828583333333, Test_acc 0.8332
Epoch 6. Loss: 0.529780993462, Train_acc 0.85845, Test_acc 0.8609
Epoch 7. Loss: 0.463093396139, Train_acc 0.8752, Test_acc 0.8767
Epoch 8. Loss: 0.420938806287, Train_acc 0.8874, Test_acc 0.8895
Epoch 9. Loss: 0.387553001785, Train_acc 0.89545, Test_acc 0.8954


### 2. sequential approach (gluon.nn.Sequential)

In [62]:
net_seq = gluon.nn.Sequential()
with net_seq.name_scope():
    net_seq.add(gluon.nn.Dense(128, activation="relu"))
    net_seq.add(gluon.nn.Dense(64, activation="relu"))
    net_seq.add(gluon.nn.Dense(10))

In [63]:
net_seq

Sequential(
  (0): Dense(None -> 128, Activation(relu))
  (1): Dense(None -> 64, Activation(relu))
  (2): Dense(None -> 10, linear)
)

In [64]:
## initilize param
net_seq.collect_params().initialize(mx.init.Normal(),ctx=model_seq_ctx)

In [65]:
#Define optimizer
trainer_seq = gluon.Trainer(net_seq.collect_params(),optimizer='sgd',optimizer_params={'learning_rate':0.01})

In [70]:
epochs = 10
smoothing_constant = .01

for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_seq_ctx).reshape((-1, 784))
        label = label.as_in_context(model_seq_ctx)
        with autograd.record():
            output = net_seq(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer_seq.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()


    test_accuracy = evaluate_accuracy(test_data, net_seq,model_seq_ctx)
    train_accuracy = evaluate_accuracy(train_data, net_seq,model_seq_ctx)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))

Epoch 0. Loss: 2.28616122996, Train_acc 0.208333333333, Test_acc 0.2091
Epoch 1. Loss: 1.97638381081, Train_acc 0.464833333333, Test_acc 0.4672
Epoch 2. Loss: 1.15075605542, Train_acc 0.755916666667, Test_acc 0.759
Epoch 3. Loss: 0.693354644616, Train_acc 0.809016666667, Test_acc 0.8178
Epoch 4. Loss: 0.594338152409, Train_acc 0.834083333333, Test_acc 0.8376
Epoch 5. Loss: 0.541639323203, Train_acc 0.851333333333, Test_acc 0.8521
Epoch 6. Loss: 0.494264999533, Train_acc 0.866066666667, Test_acc 0.8642
Epoch 7. Loss: 0.442140702486, Train_acc 0.881433333333, Test_acc 0.8784
Epoch 8. Loss: 0.394981248665, Train_acc 0.892766666667, Test_acc 0.8912
Epoch 9. Loss: 0.358843960444, Train_acc 0.89995, Test_acc 0.8986
