In [2]:
from mxnet.gluon import nn
from mxnet import nd


def dropout(x, drop_probability):
    keep_probability = 1 - drop_probability
    assert 0 <= keep_probability <= 1
    if 0 == keep_probability:
        return x.zeros_like()
    mask = nd.random.uniform(0, 1.0, x.shape, ctx=x.context) < keep_probability
    scale = 1 / keep_probability
    return mask * x * scale

In [3]:
a = nd.arange(20).reshape((5, 4))
dropout(a, 0.0)


[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]
 [12. 13. 14. 15.]
 [16. 17. 18. 19.]]
<NDArray 5x4 @cpu(0)>

In [4]:
dropout(a, 0.5)


[[ 0.  0.  0.  6.]
 [ 0. 10.  0.  0.]
 [16. 18. 20.  0.]
 [24. 26.  0.  0.]
 [ 0. 34.  0.  0.]]
<NDArray 5x4 @cpu(0)>

In [5]:
import utils

batch_size = 256
train_data, test_data = utils.load_data_fashion_mnist(batch_size)

Downloading datasets\train-images-idx3-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/train-images-idx3-ubyte.gz...
Downloading datasets\train-labels-idx1-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/train-labels-idx1-ubyte.gz...
Downloading datasets\t10k-images-idx3-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/t10k-images-idx3-ubyte.gz...
Downloading datasets\t10k-labels-idx1-ubyte.gz from https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/fashion-mnist/t10k-labels-idx1-ubyte.gz...


In [6]:
num_inputs = 28 * 28
num_outputs = 10
num_hidden1 = 256
num_hidden2 = 256
weight_scale = .01

w1 = nd.random.normal(shape=(num_inputs, num_hidden1), scale=weight_scale)
b1 = nd.zeros(num_hidden1)

w2 = nd.random.normal(shape=(num_hidden1, num_hidden2), scale=weight_scale)
b2 = nd.zeros(num_hidden2)

w3 = nd.random.normal(shape=(num_hidden2, num_outputs), scale=weight_scale)
b3 = nd.zeros(num_outputs)

params = [w1, b1, w2, b2, w3, b3]

for param in params:
    param.attach_grad()

In [7]:
drop_prob1 = 0.2
drop_prob2 = 0.5

def net(x):
    x = x.reshape((-1, num_inputs))
    h1 = nd.relu(nd.dot(x, w1) + b1)
    h1 = dropout(h1, drop_prob1)
    h2 = nd.relu(nd.dot(h1, w2) + b2)
    h2 = dropout(h2, drop_prob2)
    return nd.dot(h2, w3) + b3

In [8]:
from mxnet import autograd as ag
from mxnet import gluon

softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
learning_rate = .5

for epoch in range(5):
    train_loss = 0.
    train_acc = 0.
    for data, label in train_data:
        with ag.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        utils.SGD(params, learning_rate/batch_size)
        train_loss += nd.mean(loss).asscalar()
        train_acc += utils.accuracy(output, label)
    test_acc = utils.evaluate_accuracy(test_data, net)
    print('Epoch %d, Loss :%f, Train acc %f, Test acc %f' % (
        epoch, train_loss/len(train_data),
        train_acc/len(train_data), test_acc))

Epoch 0, Loss :1.226745, Train acc 0.527543, Test acc 0.762695
Epoch 1, Loss :0.607888, Train acc 0.775360, Test acc 0.827539
Epoch 2, Loss :0.500961, Train acc 0.818861, Test acc 0.839648
Epoch 3, Loss :0.457722, Train acc 0.833987, Test acc 0.832715
Epoch 4, Loss :0.434216, Train acc 0.843201, Test acc 0.856641
