![](../image/lenet.svg)
![](../image/lenet-vert.svg)

卷积:
- 2个卷积层(都是5x5), 3个全连接层

- 平均汇聚层, 而非最大汇聚层

其他:

- 使用一个sigmoid激活函数。而非ReLU

In [1]:
import torch
# torchvision.datasets.FashionMNIST
import torchvision
# 修改数据集格式
from torchvision import transforms
# data.DataLoader
from torch.utils import data
# nn块
from torch import nn

In [2]:
# -----------参数-----------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
batch_size = 128
lr = 0.1
num_epochs=20
loss = nn.CrossEntropyLoss()

cuda


In [3]:
trans = transforms.ToTensor()
mnist_train_totensor = torchvision.datasets.FashionMNIST(
    root="../data",
    train=True,
    download=True,
    transform=trans
)
mnist_test_totensor = torchvision.datasets.FashionMNIST(
    root="../data",
    train=False,
    download=True,
    transform=trans
)
# 28*28, 不用转化大小
mnist_train_totensor[0][0].shape

torch.Size([1, 28, 28])

In [4]:
# shuffle, 打乱
# num_workers, 使用4个进程来读取数据
train_iter = data.DataLoader(
    mnist_train_totensor, batch_size, shuffle=True, num_workers=4)
test_iter = data.DataLoader(
    mnist_test_totensor, batch_size, shuffle=True, num_workers=4)

In [7]:
net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2),
    nn.BatchNorm2d(6),
    nn.Sigmoid(),

    nn.AvgPool2d(kernel_size=2, stride=2),
    
    nn.Conv2d(6, 16, kernel_size=5),
    nn.BatchNorm2d(16),
    nn.Sigmoid(),
    
    nn.AvgPool2d(kernel_size=2, stride=2),
    
    nn.Flatten(),
    
    nn.Linear(16 * 5 * 5, 120),
    nn.BatchNorm1d(120),
    nn.Sigmoid(),
    
    nn.Linear(120, 84),
    nn.BatchNorm1d(84),
    nn.Sigmoid(),
    
    nn.Linear(84, 10)
).to(device)
net

Sequential(
  (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (1): BatchNorm2d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): Sigmoid()
  (3): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (4): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (5): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): Sigmoid()
  (7): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (8): Flatten(start_dim=1, end_dim=-1)
  (9): Linear(in_features=400, out_features=120, bias=True)
  (10): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (11): Sigmoid()
  (12): Linear(in_features=120, out_features=84, bias=True)
  (13): BatchNorm1d(84, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (14): Sigmoid()
  (15): Linear(in_features=84, out_features=10, bias=True)
)

In [11]:
X = torch.rand(size=(2, 1, 28, 28), dtype=torch.float32).to(device)
for layer in net:
    X = layer(X)
    print(f'output shape: {layer.__class__.__name__: <15}{X.shape}')

output shape: Conv2d         torch.Size([2, 6, 28, 28])
output shape: BatchNorm2d    torch.Size([2, 6, 28, 28])
output shape: Sigmoid        torch.Size([2, 6, 28, 28])
output shape: AvgPool2d      torch.Size([2, 6, 14, 14])
output shape: Conv2d         torch.Size([2, 16, 10, 10])
output shape: BatchNorm2d    torch.Size([2, 16, 10, 10])
output shape: Sigmoid        torch.Size([2, 16, 10, 10])
output shape: AvgPool2d      torch.Size([2, 16, 5, 5])
output shape: Flatten        torch.Size([2, 400])
output shape: Linear         torch.Size([2, 120])
output shape: BatchNorm1d    torch.Size([2, 120])
output shape: Sigmoid        torch.Size([2, 120])
output shape: Linear         torch.Size([2, 84])
output shape: BatchNorm1d    torch.Size([2, 84])
output shape: Sigmoid        torch.Size([2, 84])
output shape: Linear         torch.Size([2, 10])


In [8]:
def init_weights(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        nn.init.xavier_uniform_(m.weight)


net.apply(init_weights)
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
loss = nn.CrossEntropyLoss()

In [9]:
def train_loop(train_iter, net, loss, optimizer):
    # 共有几批
    num_batchs = len(train_iter)
    # 总平均loss
    total_train_loss = 0
    for batch, (X, y) in enumerate(train_iter):
        # move to device
        X, y = X.to(device), y.to(device)
        # 该批的推断结果
        y_hat = net(X)
        
        train_loss = loss(y_hat, y)
        total_train_loss += train_loss.item()

        # Backpropagation
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()

        # --------打印进度        
        print(f"\r[{batch+1:>8d}/{num_batchs:>8d}]  ", end='')

    
    return total_train_loss / num_batchs

In [10]:
# ---------训练
for epoch in range(num_epochs):
    total_train_loss = train_loop(train_iter, net, loss, optimizer)
    print(f'epoch {epoch + 1}, total_train_loss {total_train_loss:f}')

[     469/     469]  epoch 1, total_train_loss 0.746685
[     469/     469]  epoch 2, total_train_loss 0.480179
[     469/     469]  epoch 3, total_train_loss 0.417636
[     469/     469]  epoch 4, total_train_loss 0.380995
[     469/     469]  epoch 5, total_train_loss 0.355484
[     469/     469]  epoch 6, total_train_loss 0.335892
[     469/     469]  epoch 7, total_train_loss 0.318740
[     469/     469]  epoch 8, total_train_loss 0.307732
[     469/     469]  epoch 9, total_train_loss 0.294946
[     469/     469]  epoch 10, total_train_loss 0.284497
[     469/     469]  epoch 11, total_train_loss 0.276756
[     469/     469]  epoch 12, total_train_loss 0.269966
[     469/     469]  epoch 13, total_train_loss 0.261793
[     469/     469]  epoch 14, total_train_loss 0.255295
[     469/     469]  epoch 15, total_train_loss 0.249387
[     469/     469]  epoch 16, total_train_loss 0.242427
[     469/     469]  epoch 17, total_train_loss 0.238807
[     469/     469]  epoch 18, total_tra

In [12]:
# ----------预测
def test_net(test_iter, net, loss):
    # 共有几批
    num_batchs = len(test_iter)
    # 总平均loss, 总平均准确率
    total_test_loss, total_correct = 0, 0
    # 设定评估模式
    net.eval()
    # 不要梯度
    with torch.no_grad():
        for batch, (X, y) in enumerate(test_iter):
            # move to device
            X, y = X.to(device), y.to(device)
            y_hat = net(X)

            test_loss = loss(y_hat, y)
            # 分类0,1,2,3的类别对的上否
            correct = (y_hat.argmax(1) == y).float().sum().item()
            total_test_loss += test_loss.item()
            total_correct += correct/len(X)

            # --------打印进度
            print(f"\r[{batch+1:>8d}/{num_batchs:>8d}]  ", end='')


    total_test_loss /= num_batchs
    total_correct /= num_batchs
    print(
        f"\nTest: Accuracy: {total_correct:.1%}, Avg loss: {total_test_loss:f}")
    
test_net(test_iter, net, loss)

[       1/      79]  [       2/      79]  [       3/      79]  [       4/      79]  [       5/      79]  [       6/      79]  [       7/      79]  [       8/      79]  [       9/      79]  [      10/      79]  [      11/      79]  [      12/      79]  [      13/      79]  [      14/      79]  [      15/      79]  [      16/      79]  [      17/      79]  [      18/      79]  [      19/      79]  [      20/      79]  [      21/      79]  [      22/      79]  [      23/      79]  [      24/      79]  [      25/      79]  [      26/      79]  [      27/      79]  [      28/      79]  [      29/      79]  [      30/      79]  [      31/      79]  [      32/      79]  [      33/      79]  [      34/      79]  [      35/      79]  [      36/      79]  [      37/      79]  [      38/      79]  [      39/      79]  [      40/      79]  [      41/      79]  [      42/      79]  [      43/      79]  [      44/      79]  [      45/      79]  [      46