In [24]:
import mxnet as mx
import mxnet.autograd as ag

In [25]:
from mxnet import gluon
from mxnet import ndarray as nd

def transform(data, label):
    return data.astype('float32')/255, label.astype('float32')

mnist_train = gluon.data.vision.FashionMNIST(train=True, transform=transform)
mnist_test = gluon.data.vision.FashionMNIST(train=False, transform=transform)

batch_size = 256
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle=True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle=False)

In [26]:
num_inputs = 784
num_outputs = 10
num_hidden = 250

W1 = nd.random_normal(0, 0.01, shape=(num_inputs, num_hidden), ctx=mx.gpu())
b1 = nd.zeros((num_hidden,), ctx=mx.gpu())

W2 = nd.random_normal(0, 0.01, shape=(num_hidden, num_outputs), ctx=mx.gpu())
b2 = nd.zeros((num_outputs,), ctx=mx.gpu())

params=[W1, b2, W2, b2]

for param in params:
    param.attach_grad()

In [27]:
def softmax(score):
    """
    input:
    - score N*D的矩阵，N代表样本的个数，D代表特征的长度
    output:
    - prob N*D的矩阵，每个元素代表了概率
    """
    max_value = nd.max(score, axis=1, keepdims=True)
    score = score - max_value
    exp_score = nd.exp(score)
    prob = exp_score / nd.sum(exp_score, axis=1, keepdims=True)
    return prob

# 定义loss
def cross_entropy_loss(y, y_hat):
    return - nd.pick(nd.log(y), y_hat)


def relu(x):
    return nd.maximum(x, 0)
    

In [28]:
def net(X):
    X = X.reshape((X.shape[0], -1))
    hidden = relu(nd.dot(X, W1) + b1)
    output = nd.dot(hidden, W2) + b2
    return softmax(output)

In [29]:
def accuracy(y, y_hat):
    return nd.mean(nd.argmax(y, axis=1) == y_hat).asscalar()

def evaluate_accuracy(data_iterator, net):
    acc = 0.
    for data, label in data_iterator:
        output = net(data.as_in_context(mx.gpu()))
        acc += accuracy(output, label.as_in_context(mx.gpu()))
    return acc / len(data_iterator)

In [30]:
def SGD(params, lr):
    for param in params:
        param -= lr * param.grad

In [31]:
epoch = 5
learing_rate = 0.5

for i in range(epoch):
    acc = 0
    for data, label in train_data:
        with ag.record():
            y = net(data.as_in_context(mx.gpu()))
            loss = cross_entropy_loss(y, label.as_in_context(mx.gpu()))
        loss.backward()
        SGD(params, learing_rate / batch_size)
        acc += accuracy(y, label.as_in_context(mx.gpu()))
    acc /= len(train_data)
    print('epoch {}: training accuracy: {}, testing accuracy: {}'.format(i, acc, evaluate_accuracy(test_data, net)))
        

epoch 0: training accuracy: 0.7083665779296388, testing accuracy: 0.82255859375
epoch 1: training accuracy: 0.8168938385679366, testing accuracy: 0.85556640625
epoch 2: training accuracy: 0.8404476949509154, testing accuracy: 0.86669921875
epoch 3: training accuracy: 0.8502992021276595, testing accuracy: 0.86923828125
epoch 4: training accuracy: 0.8609208776595745, testing accuracy: 0.87294921875


# 使用Gluon

In [32]:
import mxnet.gluon.nn as nn

In [33]:
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Flatten())
    net.add(nn.Dense(250, activation='relu'))
    net.add(nn.Dense(10))
net.initialize(ctx=mx.gpu())
cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.trainer.Trainer(net.collect_params(), optimizer='sgd', optimizer_params={'learning_rate': 0.5})

In [34]:
epoch = 5
learing_rate = 0.5

for i in range(epoch):
    acc = 0
    for data, label in train_data:
        with ag.record():
            y = net(data.as_in_context(mx.gpu()))
            loss = cross_entropy_loss(y, label.as_in_context(mx.gpu()))
        loss.backward()
        trainer.step(batch_size=batch_size)
        acc += accuracy(y, label.as_in_context(mx.gpu()))
    acc /= len(train_data)
    print('epoch {}: training accuracy: {}, testing accuracy: {}'.format(i, acc, evaluate_accuracy(test_data, net)))

epoch 0: training accuracy: 0.7346742021276595, testing accuracy: 0.79091796875
epoch 1: training accuracy: 0.8276374114320634, testing accuracy: 0.838671875
epoch 2: training accuracy: 0.8482878989361702, testing accuracy: 0.8650390625
epoch 3: training accuracy: 0.8592641843126175, testing accuracy: 0.87392578125
epoch 4: training accuracy: 0.8666445034615538, testing accuracy: 0.876953125


## USE Dropout

In [37]:
class Dropout(nn.Block):
    def __init__(self, drop_prob, **kwargs):
        super(Dropout, self).__init__(**kwargs)
        self.drop_prob = drop_prob
    def forward(self, X):
        hold_prob = 1 - self.drop_prob
        mask = nd.random.uniform(0, 1.0, shape=X.shape, ctx=X.context) < hold_prob
        scale = 1 / hold_prob
        if ag.train_mode:
            return X * mask * scale
        else:
            return X

In [38]:
net = nn.Sequential()
with net.name_scope():
    net.add(nn.Flatten())
    net.add(nn.Dense(256, activation='relu'))
    net.add(Dropout(0.2))
    net.add(nn.Dense(256, activation='relu'))
    net.add(Dropout(0.5))
    net.add(nn.Dense(10))
net.initialize(ctx=mx.gpu())
cross_entropy_loss = gluon.loss.SoftmaxCrossEntropyLoss()
trainer = gluon.trainer.Trainer(net.collect_params(), 
                                optimizer='sgd', 
                                optimizer_params={'learning_rate': 0.5})

epoch = 10
learing_rate = 0.5

for i in range(epoch):
    acc = 0
    for data, label in train_data:
        with ag.record():
            y = net(data.as_in_context(mx.gpu()))
            loss = cross_entropy_loss(y, label.as_in_context(mx.gpu()))
        loss.backward()
        trainer.step(batch_size=batch_size)
        acc += accuracy(y, label.as_in_context(mx.gpu()))
    acc /= len(train_data)
    print('epoch {}: training accuracy: {}, testing accuracy: {}'.format(i, acc, evaluate_accuracy(test_data, net)))

epoch 0: training accuracy: 0.6846077127659574, testing accuracy: 0.8044921875
epoch 1: training accuracy: 0.8101285460147452, testing accuracy: 0.83671875
epoch 2: training accuracy: 0.8332058954746165, testing accuracy: 0.84599609375
epoch 3: training accuracy: 0.845811170212766, testing accuracy: 0.8427734375
epoch 4: training accuracy: 0.8551307624958931, testing accuracy: 0.862890625
epoch 5: training accuracy: 0.8596354167512122, testing accuracy: 0.858203125
epoch 6: training accuracy: 0.864223182708659, testing accuracy: 0.8681640625
epoch 7: training accuracy: 0.870151817291341, testing accuracy: 0.86328125
epoch 8: training accuracy: 0.8709940159574469, testing accuracy: 0.87685546875
epoch 9: training accuracy: 0.8760749114320633, testing accuracy: 0.8693359375
