### 卷积神经网络(LeNet)

LeNet的网络结构如下:
![image.png](attachment:image.png)

注意这里我们采用的是Fashion-MNIST，输入的图片形状是[1,28,28]

LeNet分为卷积层和全连接层两部分。

其中卷积层的基本单位是5 * 5大小的卷积核,激活层和2 * 2大小的池化层。

当卷积层的输出传入全连接层时，全连接层会将小批量中每个样本变平。也就是说，全连接层的输入形状将变成二维，其中第一维是样本的数量，第二维是每个样本变平后的向量表示，且向量长度为通道、高和宽的乘积。

全连接层含3个全连接层。它们的输出个数分别为120、84和10.

#### 实践

下面我们以Fashion-MNIST为例。

In [1]:
import torch
from torch import nn
import utils

In [2]:
#读取数据
batch_size=256
train_iter,test_iter=utils.load_data_fashion_mnist(batch_size=batch_size)


In [3]:
#定义模型
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet,self).__init__()
        self.conv=nn.Sequential(
            nn.Conv2d(1,6,5),
            nn.Sigmoid(),
            nn.MaxPool2d(2),
            nn.Conv2d(6,16,5),
            nn.Sigmoid(),
            nn.MaxPool2d(2),
        )
        self.fc=nn.Sequential(
            nn.Linear(16*4*4,120),
            nn.Sigmoid(),
            nn.Linear(120,84),
            nn.Sigmoid(),
            nn.Linear(84,10) #最后输出层不需要再加激活函数
        )
    def forward(self,img):
        feature=self.conv(img)
        #卷积层输入全连接层需要调整形状
        output=self.fc(feature.view(img.shape[0],-1))
        return output

net=LeNet()
print(net)

LeNet(
  (conv): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
    (1): Sigmoid()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Sequential(
    (0): Linear(in_features=256, out_features=120, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=120, out_features=84, bias=True)
    (3): Sigmoid()
    (4): Linear(in_features=84, out_features=10, bias=True)
  )
)


In [4]:
#定义评估函数
def evaluate_accuracy(data_iter,net,device=None):
    if device is None and isinstance(net,nn.Module):
        device=list(net.parameters())[0].device
    acc_sum,n=0.0,0
    with torch.no_grad():
        for X,y in data_iter:
            net.eval() #评估模式，关闭dropout
            acc_sum+=(net(X.to(device)).argmax(dim=1)==y.to(device)).float().sum().cpu().item()
            net.train()
            n+=y.shape[0]
    return acc_sum/n

In [7]:
#定义训练函数
import time
def train_ch5(net,train_iter,test_iter,optimizer,device,num_epochs):
    #使用GPU或CPu
    net=net.to(device)
    print("training on",device)
    #定义损失
    loss=torch.nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,batch_count,start=0.0,0.0,0,0,time.time()
        for X,y in train_iter:
            X=X.to(device)
            y=y.to(device)
            y_hat=net(X)
            l=loss(y_hat,y)
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
            #这里需要转回cpu运算
            train_l_sum+=l.cpu().item()
            train_acc_sum+=(y_hat.argmax(dim=1)==y).sum().cpu().item()
            n+=y.shape[0]
            batch_count+=1
        test_acc=evaluate_accuracy(test_iter,net)
        print('epoch %d,loss %.4f,train acc %.3f,test acc %.3f,time %.1f sec'
             %(epoch+1,train_l_sum/batch_count,train_acc_sum/n,test_acc,time.time()-start))

In [8]:
#进行训练
lr,num_epochs=0.001,5
optimizer=torch.optim.Adam(net.parameters(),lr=lr)
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
train_ch5(net,train_iter,test_iter,optimizer,device,num_epochs)

training on cuda
epoch 1,loss 1.8984,train acc 0.291,test acc 0.568,time 8.6 sec
epoch 2,loss 0.9670,train acc 0.636,test acc 0.679,time 7.9 sec
epoch 3,loss 0.8023,train acc 0.702,test acc 0.713,time 6.9 sec
epoch 4,loss 0.7178,train acc 0.731,test acc 0.734,time 7.8 sec
epoch 5,loss 0.6599,train acc 0.749,test acc 0.751,time 8.0 sec
