## 准备数据

In [18]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

def mnist_dataset(batch_size=64):
    # 定义数据预处理步骤：转换为Tensor以及标准化
    transform = transforms.Compose([
        transforms.ToTensor(),  # 将图像转换为Tensor，并且数值范围归一化到[0, 1]
        transforms.Normalize((0.1307,), (0.3081,)),  # 使用MNIST数据集的均值和标准差进行标准化
    ])

    # 下载训练数据集并进行预处理
    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    # 下载测试数据集并进行预处理
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    return train_loader, test_loader

## 建立模型

In [42]:


class myModel(nn.Module):
    def __init__(self):
        super(myModel, self).__init__()
        ####################
        '''声明模型对应的参数'''
        ####################        
        self.mul_h1 = nn.Linear(28*28, 100)
        self.mul_h2 = nn.Linear(100, 10)


    def forward(self, x):
        ####################
        '''实现模型函数体，返回未归一化的logits'''
        ####################
        # 输入x应为PyTorch张量，无需手动添加偏置项，因为Linear层会自动处理
        x = x.view(x.size(0), -1)  # 调整输入尺寸为(batch_size, 28*28)
        
        h1 = F.relu(self.mul_h1(x))
        h2 = self.mul_h2(h1)
        
        return h2
        
model = myModel()

optimizer = optim.Adam(model.parameters(), lr=1e-3)

## 计算 loss

In [None]:
def compute_loss(logits, labels):
    """
    计算交叉熵损失。
    :param logits: 模型输出的未归一化logits
    :param labels: 真实标签（类别索引）
    :return: 标量损失
    """
    criterion = nn.CrossEntropyLoss()
    return criterion(logits, labels)

def compute_accuracy(logits, labels):
    """
    计算准确率。
    :param logits: 模型输出的未归一化logits
    :param labels: 真实标签（类别索引）
    :return: 准确率
    """
    predictions = torch.argmax(logits, dim=1)
    return torch.mean((predictions == labels).float())

def train_one_step(model, optimizer, x, y):
    """
    执行一个训练步骤。
    :param model: 模型实例
    :param optimizer: 优化器实例
    :param x: 输入数据
    :param y: 真实标签
    :return: 损失和准确率
    """
    model.train()
    optimizer.zero_grad()  # 清空之前的梯度
    
    logits = model(x)
    loss = compute_loss(logits, y)
    
    loss.backward()  # 反向传播
    optimizer.step()  # 更新参数
    
    accuracy = compute_accuracy(logits, y)
    
    return loss.item(), accuracy.item()

def test(model, x, y):
    """
    测试模型性能。
    :param model: 模型实例
    :param x: 输入数据
    :param y: 真实标签
    :return: 测试集上的损失和准确率
    """
    model.eval()  # 设置模型为评估模式
    with torch.no_grad():  # 关闭梯度计算
        logits = model(x)
        loss = compute_loss(logits, y)
        accuracy = compute_accuracy(logits, y)
        
    return loss.item(), accuracy.item()

## 实际训练

In [None]:
train_loader, test_loader = mnist_dataset()
# 训练循环
for epoch in range(50):
    epoch_loss = 0.0
    epoch_accuracy = 0.0
    for batch_x, batch_y in train_loader:
        if torch.cuda.is_available():
            batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
        loss, accuracy = train_one_step(model, optimizer, batch_x, batch_y)
        epoch_loss += loss
        epoch_accuracy += accuracy
    
    epoch_loss /= len(train_loader)
    epoch_accuracy /= len(train_loader)
    print(f'Epoch {epoch}: loss {epoch_loss:.4f}; accuracy {epoch_accuracy:.4f}')


Epoch 0: loss 0.2748; accuracy 0.9215
Epoch 1: loss 0.1237; accuracy 0.9628
Epoch 2: loss 0.0858; accuracy 0.9737
Epoch 3: loss 0.0662; accuracy 0.9790
Epoch 4: loss 0.0530; accuracy 0.9833
Epoch 5: loss 0.0437; accuracy 0.9855
Epoch 6: loss 0.0363; accuracy 0.9883
Epoch 7: loss 0.0295; accuracy 0.9901
Epoch 8: loss 0.0267; accuracy 0.9911
Epoch 9: loss 0.0210; accuracy 0.9930
Epoch 10: loss 0.0203; accuracy 0.9929
Epoch 11: loss 0.0170; accuracy 0.9944
Epoch 12: loss 0.0161; accuracy 0.9943
Epoch 13: loss 0.0155; accuracy 0.9946
Epoch 14: loss 0.0148; accuracy 0.9949
Epoch 15: loss 0.0117; accuracy 0.9959
Epoch 16: loss 0.0118; accuracy 0.9959
Epoch 17: loss 0.0111; accuracy 0.9961
Epoch 18: loss 0.0099; accuracy 0.9966
Epoch 19: loss 0.0121; accuracy 0.9957
Epoch 20: loss 0.0085; accuracy 0.9970
Epoch 21: loss 0.0089; accuracy 0.9968
Epoch 22: loss 0.0074; accuracy 0.9974
Epoch 23: loss 0.0088; accuracy 0.9969
Epoch 24: loss 0.0109; accuracy 0.9966
Epoch 25: loss 0.0095; accuracy 0.9

TypeError: test() missing 1 required positional argument: 'y'

In [46]:
# 测试模型
model.eval()
eval_loss = 0.0
eval_accuracy = 0.0
for batch_x, batch_y in test_loader:
    if torch.cuda.is_available():
        batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
    loss, accuracy = test(model, batch_x, batch_y)
    eval_loss += loss
    eval_accuracy += accuracy

eval_loss /= len(test_loader)
eval_accuracy /= len(test_loader)
print(f'Test: loss {eval_loss:.4f}; accuracy {eval_accuracy:.4f}')


Test: loss 0.1678; accuracy 0.9796


In [47]:
print(batch_y[7])
print(model(batch_x[7]))

tensor(8)
tensor([[-14.4693, -26.4977,  -7.8636,   5.2124, -53.7433,  -1.4998, -41.4715,
         -22.1236,  27.8530, -33.1808]], grad_fn=<AddmmBackward0>)
