# 5.5 卷积神经网络(LeNet)

In [7]:
import os
import time
import torch
from torch import nn, optim
import torchvision
import sys
sys.path.append("F:\\GitHub_work\\Dive-into-DL-PyTorch\\code") # 为了导入上层目录的d2lzh_pytorch
import d2lzh_pytorch as d2l
import matplotlib.pyplot as plt

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.__version__)
print(device)

1.3.1
cuda


## 5.5.1 LeNet模型 

In [9]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 6, 5), # in_channels, out_channels, kernel_size
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2), # kernel_size, stride
            nn.Conv2d(6, 16, 5),# in_channels, out_channels, kernel_size
            nn.Sigmoid(),
            nn.MaxPool2d(2, 2) # kernel_size, stride
        )
        self.fc = nn.Sequential(
            nn.Linear(16*4*4, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )

    def forward(self, img):
        feature = self.conv(img)
        output = self.fc(feature.view(img.shape[0], -1))#shape[0]是256，即为256行
        return output

In [24]:
net = LeNet()
print(net)
params = list(net.parameters())
print(len(params))
print(params[0].size())

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)
10
torch.Size([6, 1, 5, 5])


## 5.5.2 获取数据和训练模型

In [11]:
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)

In [12]:
# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    acc_sum, n = 0.0, 0
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(net, torch.nn.Module):
                net.eval() # 评估模式, 这会关闭dropout
                acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                net.train() # 改回训练模式
            else: # 自定义的模型, 3.13节之后不会用到, 不考虑GPU
                if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
                    # 将is_training设置成False
                    acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() 
                else:
                    acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() 
            n += y.shape[0]
    return acc_sum / n

In [13]:
# 本函数已保存在d2lzh_pytorch包中方便以后使用
def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
    net = net.to(device)
    print("training on ", device)
    loss = torch.nn.CrossEntropyLoss()
    batch_count = 0
    for epoch in range(num_epochs):
        train_l_sum, train_acc_sum, n, start = 0.0, 0.0, 0, time.time()
        for X, y in train_iter:
            X = X.to(device)
            y = y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            train_l_sum += l.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1
        test_acc = evaluate_accuracy(test_iter, net)
        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))

In [15]:
lr, num_epochs = 0.001, 20
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)

training oncuda
epoch 1, loss 0.4606, train acc 0.821, test acc 0.811, time 9.9 sec
epoch 2, loss 0.2260, train acc 0.826, test acc 0.817, time 10.7 sec
epoch 3, loss 0.1466, train acc 0.832, test acc 0.825, time 10.2 sec
epoch 4, loss 0.1074, train acc 0.836, test acc 0.827, time 9.5 sec
epoch 5, loss 0.0837, train acc 0.841, test acc 0.831, time 10.5 sec
epoch 6, loss 0.0684, train acc 0.846, test acc 0.836, time 10.5 sec
epoch 7, loss 0.0572, train acc 0.851, test acc 0.842, time 10.6 sec
epoch 8, loss 0.0490, train acc 0.855, test acc 0.846, time 10.9 sec
epoch 9, loss 0.0427, train acc 0.859, test acc 0.843, time 10.4 sec
epoch 10, loss 0.0377, train acc 0.862, test acc 0.849, time 10.2 sec
epoch 11, loss 0.0337, train acc 0.864, test acc 0.851, time 10.1 sec
epoch 12, loss 0.0302, train acc 0.867, test acc 0.853, time 10.0 sec
epoch 13, loss 0.0274, train acc 0.869, test acc 0.859, time 10.1 sec
epoch 14, loss 0.0251, train acc 0.870, test acc 0.856, time 9.9 sec
epoch 15, loss 0