`gluon`を使った効率的実装

In [1]:
import mxnet as mx
from mxnet import nd, autograd
from mxnet import gluon
import numpy as np

In [2]:
ctx = mx.cpu()

# The MNIST Dataset
先と一緒

In [3]:
mnist = mx.test_utils.get_mnist()

# Data Iterators
これも一緒

In [4]:
batch_size = 64
train_data = mx.io.NDArrayIter(mnist["train_data"], mnist["train_label"], batch_size, shuffle=True)
test_data = mx.io.NDArrayIter(mnist["test_data"], mnist["test_label"], batch_size, shuffle=True)

# Multiclass Logistic Regression
ここから違う

In [5]:
net = gluon.nn.Sequential()
with net.name_scope():
    net.add(gluon.nn.Dense(10))  # 出力10クラス

# Parameter initialization

In [6]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

# Softmax Cross Entropy Loss
MXNetには効率的にsoftmaxによる活性化とCross Entropyによる損失計算をやってくれるのがあるのでそっちを使う

In [7]:
loss = gluon.loss.SoftmaxCrossEntropyLoss()

# Optimizer

In [8]:
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

# Evaluation Metric
評価のコードもビルトインの`metric`を使って簡単に

In [9]:
def evaluate_accuracy(data_iterator, net):
    numerator = 0.
    denominator =0.
    data_iterator.reset()
    for i, batch in enumerate(data_iterator):
        data = batch.data[0].as_in_context(ctx).reshape((-1, 784))
        label = batch.label[0].as_in_context(ctx)
        label_one_hot = nd.one_hot(label, 10)
        output = net(data)
        predictions = nd.argmax(output, axis=1)
        numerator += nd.sum(predictions == label)
        denominator += data.shape[0]
    return (numerator / denominator).asscalar()

In [10]:
evaluate_accuracy(test_data, net) # .10近傍になるはず

0.081409238

# Execute training loop

In [11]:
epochs = 10
moving_loss = 0.

for e in range(epochs):
    train_data.reset()
    for i, batch in enumerate(train_data):
        data = batch.data[0].as_in_context(ctx).reshape((-1,784))
        label = batch.label[0].as_in_context(ctx)
        with autograd.record():
            output = net(data)
            cross_entropy = loss(output, label)
        cross_entropy.backward()
        trainer.step(data.shape[0])

        ##########################
        #  Keep a moving average of the losses
        ##########################
        if i == 0:
            moving_loss = np.mean(cross_entropy.asnumpy()[0])
        else:
            moving_loss = .99 * moving_loss + .01 * np.mean(cross_entropy.asnumpy()[0])

    test_accuracy = evaluate_accuracy(test_data, net)
    train_accuracy = evaluate_accuracy(train_data, net)
    print('Epoch {}. Loss: {}, Train_acc {}, Test_acc {}'.format(e, moving_loss, train_accuracy, test_accuracy))

Epoch 0. Loss: 0.31170054768420913, Train_acc 0.9039345979690552, Test_acc 0.9107285141944885
Epoch 1. Loss: 0.27062708281545905, Train_acc 0.9122301340103149, Test_acc 0.915207028388977
Epoch 2. Loss: 0.25293948852486975, Train_acc 0.9162946343421936, Test_acc 0.9171974658966064
Epoch 3. Loss: 0.2420894935405671, Train_acc 0.9190098643302917, Test_acc 0.9195860028266907
Epoch 4. Loss: 0.2344389898403261, Train_acc 0.9210087656974792, Test_acc 0.9208797812461853
Epoch 5. Loss: 0.2286221874448365, Train_acc 0.9223247766494751, Test_acc 0.9213773608207703
Epoch 6. Loss: 0.22398332934970872, Train_acc 0.9235241413116455, Test_acc 0.9221735596656799
Epoch 7. Loss: 0.22015783823210588, Train_acc 0.9244902729988098, Test_acc 0.9230692386627197
Epoch 8. Loss: 0.21692255642840216, Train_acc 0.9251732230186462, Test_acc 0.9227706789970398
Epoch 9. Loss: 0.21413185209825747, Train_acc 0.9257729053497314, Test_acc 0.9227706789970398


> 明らかにgluonを利用した方が早い・・・