In [1]:
import torch
import numpy as np
import sys
sys.path.append('..')
import d2lzh_pytorch as d2l

In [2]:
# 获取和读取数据，继续使用数据集Fashion-MNIST

batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

In [4]:
# 设超参数隐藏单元个数为256
num_inputs, num_outputs, num_hiddens = 784, 10, 256

w1 = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_hiddens)), dtype=torch.float)
b1 = torch.zeros(num_hiddens, dtype=torch.float)
w2 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_outputs)), dtype=torch.float)
b2 = torch.zeros(num_outputs, dtype=torch.float)

params = [w1, b1, w2, b2]
for param in params:
    param.requires_grad_(requires_grad=True)

In [5]:
# 定义激活函数ReLU

def relu(X):
    return torch.max(input=X, other=torch.tensor(0.0))

In [6]:
# 定义模型
# 注意使用了matmul来完成矩阵相乘
# 不能使用*，因为*表示矩阵对应元素相乘，与方程组的矩阵表示形式不一样

def net(X):
    # 将每张图片改为长度为num_inputs的向量
    X = X.view((-1, num_inputs))
    H = relu(torch.matmul(X, w1) + b1)
    return torch.matmul(H, w2) + b2

In [7]:
# 定义交叉熵损失函数

loss = torch.nn.CrossEntropyLoss()

In [8]:
# 训练模型

num_epochs, lr = 5, 100.0
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)

epoch 1, loss 0.0030, train acc 0.714, test acc 0.756
epoch 2, loss 0.0019, train acc 0.824, test acc 0.817
epoch 3, loss 0.0017, train acc 0.844, test acc 0.843
epoch 4, loss 0.0015, train acc 0.855, test acc 0.800
epoch 5, loss 0.0015, train acc 0.863, test acc 0.831


In [9]:
# 多层感知机的简洁实现

In [10]:
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..") 
import d2lzh_pytorch as d2l

In [12]:
num_inputs, num_outputs, num_hiddens = 784, 10, 256

net = nn.Sequential(
        d2l.FlattenLayer(),
        nn.Linear(num_inputs, num_hiddens),
        nn.ReLU(),
        nn.Linear(num_hiddens, num_outputs), 
        )

for params in net.parameters():
    init.normal_(params, mean=0, std=0.01)

In [13]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
loss = torch.nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

num_epochs = 5
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, None, None, optimizer)

epoch 1, loss 0.0031, train acc 0.704, test acc 0.748
epoch 2, loss 0.0019, train acc 0.820, test acc 0.823
epoch 3, loss 0.0016, train acc 0.846, test acc 0.824
epoch 4, loss 0.0015, train acc 0.857, test acc 0.840
epoch 5, loss 0.0014, train acc 0.863, test acc 0.801
