### 获取数据
使用gluon的data.vision模块自动下载数据

In [1]:
from mxnet import gluon
from mxnet import ndarray as nd

def transform(data, label):
    return data.astype('float32')/255, label.astype('float32')
mnist_train = gluon.data.vision.FashionMNIST(train = True, transform = transform)
mnist_test = gluon.data.vision.FashionMNIST(train = False, transform = transform)

In [96]:
import matplotlib.pyplot as plt
def show_images(images):
    n = images.shape[0]
    _, figs = plt.subplots(1, n, figsize = (15, 15))
    for i in range(n):
        figs[i].imshow(images[i].reshape((28, 28)).asnumpy())
        figs[i].axes.get_xaxis().set_visible(False)
        figs[i].axes.get_yaxis().set_visible(False)
    plt.show()
data,label = mnist_train[:9]


### 数据读取
可以使用yield形成遍历器，也可以使用gluon.data的DataLoader函数，每次也是返回一个批量

In [2]:
batch_size = 256
train_data = gluon.data.DataLoader(mnist_train, batch_size, shuffle = True)
test_data = gluon.data.DataLoader(mnist_test, batch_size, shuffle = False)

### 初始化模型参数
这里使用的数据是28 * 28大小的图片，因而输入向量的长度是784，又由于输出为10的向量，因而权重为一个 784 * 10的矩阵

In [3]:
num_inputs = 784
num_outputs = 10

w = nd.random_normal(shape = (num_inputs, num_outputs))
b = nd.random_normal(shape = num_outputs)

params = [w, b]

为参数附上梯度

In [4]:
for param in params:
    param.attach_grad()

### 定义模型
输出为可能的分类，使用softmax将任意输入归一化

In [5]:
from mxnet import nd
def softmax(x):
    exp = nd.exp(x)
    #对行进行求和并且保留axis为1
    #返回（rows， 1）的矩阵
    partition = exp.sum(axis = 1, keepdims = True)
    return exp / partition

以上，我们将每个元素变为非负数，并且和为1

定义模型

In [6]:
def net(x):
    return softmax(nd.dot(x.reshape((-1, num_inputs)), w) + b)

In [7]:
def SGD(params, learning_rate):
    for param in params:
        param[:] = param - learning_rate * param.grad

### 交叉熵损失函数
需要定义一个预测为概率的损失函数,ylog(yhat),又由于y向量中只有其中之一为1，因此把该项找出来进行log

In [8]:
def cross_entropy(yhat, y):
    return -nd.log(nd.pick(yhat,y))

### 计算精度
把输出最高概率选为预测的类

In [9]:
def accuracy(output, label):
    return nd.mean(output.argmax(axis = 1) == label).asscalar()

评估一个模型在数据集上的精度

In [10]:
def evaluate_accuracy(data_iterator, net):
    acc = 0.
    for data,label in data_iterator:
        output = net(data)
        acc += accuracy(output, label)
    return acc/len(data_iterator)

In [None]:
import sys
sys.path.append('..')

from mxnet import autograd
epochs = 5
learning_rate = 0.1
for e in range(epochs):
    train_loss = 0
    test_loss = 0
    train_acc = 0
    test_acc = 0
    for data,label in train_data:
        with autograd.record():
            output = net(data)
            loss = cross_entropy(output, label)
        loss.backward()
        SGD(params, 0.001)
        train_loss += nd.mean(loss).asscalar()
        train_acc += accuracy(output, label)
    test_acc = evaluate_accuracy(test_data, net)
    print("Epoch: %d, Loss: %f, Train acc: %f, Test acc: %f" % (e, train_loss/len(train_data),train_acc/len(train_data),test_acc/len(test_data)))

Epoch: 0, Loss: 0.773692, Train acc: 0.796022, Test acc: 0.019973
Epoch: 1, Loss: 0.754029, Train acc: 0.800105, Test acc: 0.019839
Epoch: 2, Loss: 0.750002, Train acc: 0.799601, Test acc: 0.020193
Epoch: 3, Loss: 0.722233, Train acc: 0.803524, Test acc: 0.020156


### 预测 

### Question
为什么使用softmax和交叉熵会有数值不稳定，该如何改善?