In [1]:
# https://gluon.mxnet.io/chapter03_deep-neural-networks/mlp-gluon.html#Faster-modeling-with-gluon.nn.Sequential
from __future__ import print_function
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon

  from ._conv import register_converters as _register_converters


In [2]:
data_ctx = mx.cpu()
model_ctx = mx.cpu()
ctx = mx.cpu()

# data_ctx = mx.gpu(0)
# model_ctx = mx.gpu(0)
# ctx = mx.gpu(0)

In [3]:
batch_size = 64
num_inputs = 784
num_outputs = 10
num_examples = 60000
def transform(data, label):
    return data.astype(np.float32)/255, label.astype(np.float32)
train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

In [4]:
print(train_data)

<mxnet.gluon.data.dataloader.DataLoader object at 0x0000000010E69748>


In [5]:
class MLP(gluon.Block):
    def __init__(self, **kwargs):
        super(MLP, self).__init__(**kwargs)
        with self.name_scope():
            self.dense0 = gluon.nn.Dense(64, activation="relu")
            self.dense1 = gluon.nn.Dense(64, activation="relu")
            self.dense2 = gluon.nn.Dense(10)

    def forward(self, x):
        x = self.dense0(x)
        print("Hidden Representation 1: %s" % x)
        x = self.dense1(x)
        print("Hidden Representation 2: %s" % x)
        x = self.dense2(x)
        print("Network output: %s" % x)
        return x

In [6]:
net = MLP()
net.collect_params().initialize(mx.init.Normal(sigma=.01), ctx=model_ctx)
data = nd.ones((1,784))
net(data.as_in_context(model_ctx))

Hidden Representation 1: 
[[0.         0.20633659 0.         0.19814774 0.06757911 0.
  0.04581137 0.         0.         0.         0.         0.00434881
  0.         0.         0.2749665  0.         0.0267767  0.
  0.         0.42491484 0.         0.         0.         0.
  0.26656955 0.14948608 0.         0.         0.         0.
  0.38377756 0.2233338  0.         0.         0.09558368 0.
  0.03616822 0.04499476 0.18303654 0.02092158 0.45617974 0.
  0.         0.24218619 0.0568591  0.         0.         0.
  0.646303   0.39750287 0.55378324 0.03892382 0.08774539 0.11992356
  0.         0.         0.08294234 0.         0.2156929  0.0034603
  0.2929723  0.58968043 0.6703476  0.5000106 ]]
<NDArray 1x64 @cpu(0)>
Hidden Representation 2: 
[[0.01581744 0.0259838  0.01134174 0.         0.00141658 0.
  0.         0.00834727 0.         0.0166119  0.00969328 0.01600609
  0.00766216 0.00607042 0.         0.03669691 0.02558209 0.
  0.00698622 0.         0.         0.         0.02138643 0.
  0.01


[[-5.2642531e-04 -4.8494514e-04 -9.1016911e-05 -1.0700598e-03
   9.5340383e-04  1.2931202e-03 -3.8861966e-04 -6.4619159e-04
   1.3646437e-04 -1.7153831e-03]]
<NDArray 1x10 @cpu(0)>

In [7]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)
  v.initialize(None, ctx, init, force_reinit=force_reinit)


In [8]:
num_hidden = 64
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_hidden, activation="relu"))
    net.add(gluon.nn.Dense(num_outputs))

In [9]:
net.collect_params().initialize(mx.init.Normal(sigma=.1), ctx=model_ctx)

In [10]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

In [11]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': .01})

In [12]:
def evaluate_accuracy(data_iterator, net):
    acc = mx.metric.Accuracy()
    for i, (data, label) in enumerate(data_iterator):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        acc.update(preds=predictions, labels=label)
    return acc.get()[1]

In [16]:
epochs = 1
smoothing_constant = .01

for e in range(epochs):
    cumulative_loss = 0
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(model_ctx).reshape((-1, 784))
        label = label.as_in_context(model_ctx)
        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])
        cumulative_loss += nd.sum(loss).asscalar()
        print(np.size(data,0), np.size(data,1))
        print(np.size(label,0))

    print(type(test_data))
    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print("Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" %
          (e, cumulative_loss/num_examples, train_accuracy, test_accuracy))

64 784


IndexError: tuple index out of range

In [None]:
batch_size = 64
num_inputs = 784
num_outputs = 10
num_examples = 60000
def transform(data, label):
    return data.astype(np.float32)/255, label.astype(np.float32)
train_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform),
                                      batch_size, shuffle=True)
test_data = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform),
                                     batch_size, shuffle=False)

In [None]:
for i, (data,label) in enumerate(train_data):
    print(i)
    print(data)
    print(label)