In [1]:
import d2lzh as d2l
from mxnet.gluon import data as gdata
import sys
import time
import mxnet as mx
mnist_train = gdata.vision.FashionMNIST(train=True)
mnist_test = gdata.vision.FashionMNIST(train=False)
len(mnist_train), len(mnist_test)

(60000, 10000)

In [2]:
def get_fashion_mnist_labels(labels):
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

In [3]:
def show_fashion_mnist(images, labels):
    d2l.use_svg_display()
    # 这⾥的_表⽰我们忽略（不使⽤）的变量
    _, figs = d2l.plt.subplots(1, len(images), figsize=(12, 12))
    for f, img, lbl in zip(figs, images, labels):
        f.imshow(img.reshape((28, 28)).asnumpy())
        f.set_title(lbl)
        f.axes.get_xaxis().set_visible(False)
        f.axes.get_yaxis().set_visible(False)

In [4]:
X, y = mnist_train[0:9]

In [5]:
y

array([2, 9, 6, 0, 3, 4, 4, 5, 4])

In [6]:
get_fashion_mnist_labels(y)

['pullover',
 'ankle boot',
 'shirt',
 't-shirt',
 'dress',
 'coat',
 'coat',
 'sandal',
 'coat']

In [7]:
batch_size = 256
# 我们通过ToTensor实例将图像数据从uint8格式变换成32位浮点数格式，并除以255使得所有像素的数值均在0到1之间。
transformer = gdata.vision.transforms.ToTensor()
if sys.platform.startswith('win'):
    num_workers = 0 # 0表⽰不⽤额外的进程来加速读取数据
else:
    num_workers = 4
train_iter = gdata.DataLoader(mnist_train.transform_first(transformer), batch_size, 
                              shuffle=True, num_workers=num_workers)
test_iter = gdata.DataLoader(mnist_test.transform_first(transformer), batch_size, 
                             shuffle=False, num_workers=num_workers)

In [8]:
sys.platform

'win32'

In [9]:

import d2lzh as d2l
from mxnet import autograd, nd

In [10]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

In [11]:
num_inputs = 784
num_outputs = 10
W = nd.random.normal(scale=0.01, shape=(num_inputs, num_outputs), ctx=mx.gpu())
b = nd.zeros(num_outputs, ctx=mx.gpu())

In [12]:
W.attach_grad()
b.attach_grad()

In [13]:
X = nd.array([[1, 2, 3], [4, 5, 6]])
X.sum(axis=0, keepdims=True), X.sum(axis=1, keepdims=True)

(
 [[5. 7. 9.]]
 <NDArray 1x3 @cpu(0)>, 
 [[ 6.]
  [15.]]
 <NDArray 2x1 @cpu(0)>)

In [14]:
def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(axis=1, keepdims=True)
    return X_exp / partition # 这⾥应⽤了⼴播机制

In [15]:
X = nd.random.normal(shape=(2, 5))
X_prob = softmax(X)
X_prob, X_prob.sum(axis=1)

(
 [[0.41841468 0.21212995 0.17643595 0.15238647 0.04063296]
  [0.2188216  0.02668872 0.0043691  0.07917226 0.6709483 ]]
 <NDArray 2x5 @cpu(0)>, 
 [1. 1.]
 <NDArray 2 @cpu(0)>)

In [16]:
def net(X):
    return softmax(nd.dot(X.reshape((-1, num_inputs)), W) + b)

In [17]:
def cross_entropy(y_hat, y):
    return -nd.pick(y_hat, y).log()

In [18]:
def accuracy(y_hat, y):
    return (y_hat.argmax(axis=1) == y.astype('float32')).mean().asscalar()

In [19]:
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        y = y.astype('float32')
        acc_sum += (net(X.copyto(mx.gpu())).argmax(axis=1) == y.copyto(mx.gpu())).sum().asscalar()
        n += y.size
    return acc_sum / n

In [20]:
evaluate_accuracy(test_iter, net)

0.1435

In [21]:
num_epochs, lr = 5, 0.1

def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, trainer=None):
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n = 0.0, 0.0, 0
        for X, y_cpu in train_iter:
            y = y_cpu.copyto(mx.gpu())
            with autograd.record():
                y_hat = net(X.copyto(mx.gpu()))
                l = loss(y_hat, y).sum()
            l.backward()
            if trainer is None:
                d2l.sgd(params, lr, batch_size)
            else:
                trainer.step(batch_size) # “softmax回归的简洁实现”⼀节将⽤到
            y = y.astype('float32')
            train_l_sum += l.asscalar()
            train_acc_sum += (y_hat.argmax(axis=1) == y).sum().asscalar()
            n += y.size
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f'
              % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc))


In [22]:
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)

epoch 1, loss 0.7883, train acc 0.747, test acc 0.804
epoch 2, loss 0.5727, train acc 0.811, test acc 0.827
epoch 3, loss 0.5293, train acc 0.823, test acc 0.827
epoch 4, loss 0.5047, train acc 0.830, test acc 0.833
epoch 5, loss 0.4899, train acc 0.835, test acc 0.834


In [6]:
# softmax回归的简洁实现

import d2lzh as d2l
from mxnet import gluon, init
from mxnet.gluon import loss as gloss, nn
import mxnet as mx

In [7]:
ctx = d2l.try_gpu()
ctx

gpu(0)

In [None]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
net = nn.Sequential()
net.add(nn.Dense(10))
net.initialize(init.Normal(sigma=0.01))
loss = gloss.SoftmaxCrossEntropyLoss()
trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.1})

num_epochs = 20
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, trainer)


epoch 1, loss 0.7870, train acc 0.750, test acc 0.805
epoch 2, loss 0.5729, train acc 0.811, test acc 0.824
epoch 3, loss 0.5287, train acc 0.823, test acc 0.829
epoch 4, loss 0.5055, train acc 0.829, test acc 0.832
epoch 5, loss 0.4887, train acc 0.834, test acc 0.841
epoch 6, loss 0.4781, train acc 0.839, test acc 0.844
epoch 7, loss 0.4702, train acc 0.839, test acc 0.845
epoch 8, loss 0.4620, train acc 0.842, test acc 0.846
epoch 9, loss 0.4570, train acc 0.844, test acc 0.847
epoch 10, loss 0.4516, train acc 0.845, test acc 0.849
epoch 11, loss 0.4475, train acc 0.846, test acc 0.849
epoch 12, loss 0.4436, train acc 0.848, test acc 0.850
epoch 13, loss 0.4396, train acc 0.849, test acc 0.849
epoch 14, loss 0.4370, train acc 0.850, test acc 0.850
epoch 15, loss 0.4339, train acc 0.851, test acc 0.852
epoch 16, loss 0.4319, train acc 0.852, test acc 0.852
epoch 17, loss 0.4301, train acc 0.853, test acc 0.854
epoch 18, loss 0.4283, train acc 0.852, test acc 0.850
epoch 19, loss 0.42