In [1]:
import torch
import numpy as np
import sys
sys.path.append('..')
import d2lzh_pytorch as d2l

# 获取和读取数据

In [2]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

# 定义模型参数

我们在3.6节（softmax回归的从零开始实现）里已经介绍了，Fashion-MNIST数据集中图像形状为 $28×28$，类别数为$10$。本节中我们依然使用长度为 $28×28=784$的向量表示每一张图像。因此，输入个数为$784$，输出个数为$10$。实验中，我们设超参数隐藏单元个数为$256$。

In [3]:
num_inputs = 784  # 输入个数
num_outputs = 10  # 输出个数
num_hiddens = 256  # 隐藏单元个数

W1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)),
                  dtype=torch.float)
b1 = torch.zeros(num_hiddens,
                 dtype=torch.float)

W2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)),
                  dtype=torch.float)
b2 = torch.zeros(num_outputs,
                 dtype=torch.float)

params = [W1, b1, W2, b2]
for param in params:
    param.requires_grad_(requires_grad=True)  # inplace的方式修改requests_grad=True

# 定义激活函数

In [4]:
def relu(X):
    return torch.max(input=X, other=torch.tensor(0.0))

# 定义模型

In [5]:
def net(X):
    X = X.view((-1, num_inputs))
    H = relu(torch.matmul(X, W1) + b1)
    return torch.matmul(H, W2) + b2

# 定义损失函数

In [6]:
loss = torch.nn.CrossEntropyLoss()

# 训练模型

In [7]:
num_epochs, lr = 5, 100.0
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0030, train acc 0.715, test acc 0.760
epoch 2, loss 0.0019, train acc 0.823, test acc 0.818
epoch 3, loss 0.0017, train acc 0.844, test acc 0.816
epoch 4, loss 0.0015, train acc 0.854, test acc 0.828
epoch 5, loss 0.0015, train acc 0.863, test acc 0.815


In [8]:
num_epochs, lr = 5, 0.5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0014, train acc 0.872, test acc 0.864
epoch 2, loss 0.0013, train acc 0.881, test acc 0.865
epoch 3, loss 0.0013, train acc 0.881, test acc 0.867
epoch 4, loss 0.0013, train acc 0.882, test acc 0.867
epoch 5, loss 0.0013, train acc 0.882, test acc 0.867
