In [46]:
import torch
from torch import nn
import torchvision.transforms as transforms
import torchvision
import torch.utils.data as data

In [47]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
    nn.Linear(120, 84), nn.Sigmoid(),
    nn.Linear(84, 10)) # LeNet的网络结构

X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape: \t',X.shape) # 测试网络各层输出是否符合预期

Conv2d output shape: 	 torch.Size([1, 6, 28, 28])
Sigmoid output shape: 	 torch.Size([1, 6, 28, 28])
AvgPool2d output shape: 	 torch.Size([1, 6, 14, 14])
Conv2d output shape: 	 torch.Size([1, 16, 10, 10])
Sigmoid output shape: 	 torch.Size([1, 16, 10, 10])
AvgPool2d output shape: 	 torch.Size([1, 16, 5, 5])
Flatten output shape: 	 torch.Size([1, 400])
Linear output shape: 	 torch.Size([1, 120])
Sigmoid output shape: 	 torch.Size([1, 120])
Linear output shape: 	 torch.Size([1, 84])
Sigmoid output shape: 	 torch.Size([1, 84])
Linear output shape: 	 torch.Size([1, 10])


In [48]:
batch_size = 256 
num_epochs = 10 # 训练迭代次数
lr = 0.005 # learning rate
device = torch.device("cuda:0") # 使用GPU0
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081))])
criterion = nn.CrossEntropyLoss()

In [49]:
train_dataset = torchvision.datasets.MNIST(root='../data', train=True, transform=transform, download=True)
train_loader = data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = torchvision.datasets.MNIST(root='./data', train=False, transform=transform, download=True)
test_loader = data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [50]:
# 训练
net = net.to(device)
for epoch in range(num_epochs):  # 可以根据需要调整训练的轮数
    net.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()  # 清零梯度

        outputs = net(inputs)  # 前向传播
        loss = criterion(outputs, labels)  # 计算损失
        loss.backward()  # 反向传播，计算梯度
        optimizer.step()  # 更新权重

        running_loss += loss.item()
    print(f"Epoch {epoch + 1}, Loss: {running_loss / len(train_loader)}")
    # 测试
    correct = 0
    total = 0
    net.eval()
    with torch.no_grad():
        for data in test_loader:
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = net(inputs)  # 前向传播
            _, predicted = torch.max(outputs.data, 1)  # 找到最大概率的类别
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f"Accuracy on the test set: {accuracy}%")
     

Epoch 1, Loss: 1.0389812161313727
Accuracy on the test set: 93.22%
Epoch 2, Loss: 0.1583482135483559
Accuracy on the test set: 96.91%
Epoch 3, Loss: 0.09259242403538938
Accuracy on the test set: 97.71%
Epoch 4, Loss: 0.06958884199724552
Accuracy on the test set: 97.82%
Epoch 5, Loss: 0.05995816456431404
Accuracy on the test set: 97.99%
Epoch 6, Loss: 0.046714162394562934
Accuracy on the test set: 98.53%
Epoch 7, Loss: 0.04314050231564869
Accuracy on the test set: 98.61%
Epoch 8, Loss: 0.035418831352937094
Accuracy on the test set: 98.6%
Epoch 9, Loss: 0.032683991656658494
Accuracy on the test set: 98.77%
Epoch 10, Loss: 0.027302942105985068
Accuracy on the test set: 98.52%
