In [2]:
import torch 
import torchvision
from torch.utils import data
from torchvision import transforms
from torch import nn
from d2l import torch as d2l

batch_size = 256

trans = transforms.ToTensor()
# 创建Dataset对象
mnist_train = torchvision.datasets.FashionMNIST(root="../data/",train=True,transform=trans,download=True)
mnist_test = torchvision.datasets.FashionMNIST(root="../data/",train=False,transform=trans,download=True) 

train_iter = data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4) # 加载数据并分批
test_iter = data.DataLoader(mnist_test, batch_size, shuffle=True, num_workers=4)

In [3]:
mnist_train[0][0].shape

torch.Size([1, 28, 28])

In [4]:
# Softmax回归的输出是一个全连接层
# PyTorch不会隐式地调整输入的形状
# 因此，我们定义了展平层(flatten)在线性层前调整网络输入的形状
# Flaten():将任意维度的tensor转换为2d维度的tensor
net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10)) # 1*28*28

def init_weight(m): # m为当前的layer
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std = 0.01) # 标准差

net.apply(init_weight) # 对网络的每一层实施初始化
print(net[1].weight) # 注意Flatten层是没有参数的，只有Linear层才有参数

Parameter containing:
tensor([[-9.9251e-03,  3.9568e-03,  3.0540e-03,  ...,  3.5584e-03,
          7.1917e-03, -3.2820e-03],
        [-8.4160e-03,  9.1574e-03, -5.0447e-07,  ..., -8.3311e-03,
          5.6575e-04, -9.9625e-05],
        [ 1.1982e-02, -1.6683e-03,  3.1474e-03,  ..., -1.1207e-02,
         -7.9860e-04, -1.6032e-02],
        ...,
        [ 5.0406e-03,  9.9460e-03,  6.5048e-03,  ...,  1.3254e-04,
          1.7051e-02,  2.1368e-03],
        [-1.0665e-02, -7.0885e-03,  1.3447e-02,  ..., -3.0942e-03,
          3.2712e-04,  1.4247e-02],
        [ 1.1700e-02,  4.8642e-03, -5.4011e-04,  ..., -1.0139e-02,
         -6.0516e-03,  1.4365e-02]], requires_grad=True)


In [None]:
# 在交叉熵损失函数中传递未归一化的预测，并同时计算softmax及其对数
loss = nn.CrossEntropyLoss()
# * nn.CrossEntropyLoss 在计算损失时的一个关键点。
# *当你给它传入一个 batch（批量） 的预测值 net(X) 和真实标签 y 时，
# *它会计算这个批量中每个样本的损失，然后默认会取这些损失的平均值。
# 设置优化算法：小批量随机梯度下降
trainer = torch.optim.SGD(net.parameters(), lr=0.1) # 传入net的所有参数，设置学习率


In [None]:
num_epochs = 10
num_correct = 0
train_loss = 0
num_samples = 0

for epoch in range(num_epochs):
    for X, y in train_iter:  # 从DataLoader里面一次一次把所有数据拿出来
        l = loss(net(X),y) # net(X) 为计算出来的线性回归的预测值
        trainer.zero_grad() # 梯度清零
        l.backward() 
        trainer.step()  # SGD优化器优化模型
    for X, y in train_iter:
        out = net(X)
        batch_loss = loss(out, y)
        train_loss += batch_loss.item() * X.shape[0]
        num_samples += X.shape[0]
        # 计算预测类别
        pred = out.argmax(dim=1)
        num_correct += (pred == y).sum().item()
    avg_train_loss = train_loss / num_samples
    accuracy = num_correct / num_samples
    print(f'epoch:{epoch+1},cost:{l:f}, accuracy:{accuracy:.4f}')
# 0.8268

epoch:1,cost:0.552619, accuracy:0.8510
epoch:2,cost:0.358660, accuracy:0.8511
epoch:3,cost:0.445071, accuracy:0.8493
epoch:4,cost:0.268670, accuracy:0.8494
epoch:5,cost:0.405321, accuracy:0.8497
epoch:6,cost:0.324140, accuracy:0.8493
epoch:7,cost:0.592218, accuracy:0.8489
epoch:8,cost:0.557565, accuracy:0.8472
epoch:9,cost:0.411822, accuracy:0.8481
epoch:10,cost:0.353706, accuracy:0.8486


In [7]:


# ========== 新增：用测试集评估模型性能 ==========
test_loss = 0
num_samples = 0
num_correct = 0
with torch.no_grad():
    for X, y in test_iter:
        out = net(X)
        batch_loss = loss(out, y)
        test_loss += batch_loss.item() * X.shape[0]
        num_samples += X.shape[0]
        # 计算预测类别
        pred = out.argmax(dim=1)
        num_correct += (pred == y).sum().item()
    avg_test_loss = test_loss / num_samples
    accuracy = num_correct / num_samples
print(f"测试集平均loss: {avg_test_loss:.6f}")
print(f"测试集准确率: {accuracy:.4f}")


测试集平均loss: 0.480852
测试集准确率: 0.8326
