In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import torchsummary
from torch.utils.data import DataLoader
import math

# 定义模型类

In [3]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1, padding=True)
        self.pool = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(32, 64, 3, 1, padding=True)
        self.dropout = nn.Dropout2d(0.5)
        self.fc1 = nn.Linear(3136, 128)
        self.fc2 = nn.Linear(128, 10)
        
    # 重写forward方法
    def forward(self, x):
        x = self.conv1(x)  # 卷积
        x = self.pool(x)  # 池化
        x = F.relu(x)  # relu函数激活
        x = self.conv2(x)  # 卷积
        x = self.pool(x)  # 池化
        x = F.relu(x)  # relu函数激活
        x = self.dropout(x)  # dropout层
        x = torch.flatten(x, 1)  # 展平
        x = self.fc1(x)  # 全连接
        x = F.relu(x)  # relu函数激活
        x = self.dropout(x)  # dropout层
        x = self.fc2(x)  # relu函数激活
        output = F.log_softmax(x, dim=1)  # 计算log_softmax值
        return output

In [4]:
model = Net()

In [5]:
# 查看模型结构
torchsummary.summary(model.to('cuda'), (1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
         MaxPool2d-2           [-1, 32, 14, 14]               0
            Conv2d-3           [-1, 64, 14, 14]          18,496
         MaxPool2d-4             [-1, 64, 7, 7]               0
         Dropout2d-5             [-1, 64, 7, 7]               0
            Linear-6                  [-1, 128]         401,536
         Dropout2d-7                  [-1, 128]               0
            Linear-8                   [-1, 10]           1,290
Total params: 421,642
Trainable params: 421,642
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.38
Params size (MB): 1.61
Estimated Total Size (MB): 2.00
----------------------------------------------------------------


# 定义训练函数

In [6]:
# model：模型    device：模型训练场所     optimizer：优化器    epoch：模型训练轮次
def train(model, device, train_loader, optimizer, epoch):
    model.train()  # 声明训练函数，参数的梯度要更新
    total = 0  # 记录已经训练的数据个数
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        
        total += len(data)
        progress = math.ceil(batch_idx / len(train_loader) * 50)
        print("\rTrain epoch %d: %d/%d, [%-51s] %d%%" %
              (epoch, total, len(train_loader.dataset),
               '-' * progress + '>', progress * 2), end='')

In [7]:
# python标准化输出 %-51s, 51表示至少输出51个字符，-表示左对齐，显示后面的内容，不够的位置用空格补全
progress = 10
print("\r[%-51s]" % ('-' * progress + '>'), end='')

[---------->                                        ]

# 定义测试函数

In [10]:
def test(model, device, test_loader):
    model.eval()  # 声明验证函数，禁止所有梯度进行更新
    test_loss = 0
    correct = 0
    # 强制后面的计算不生成计算图，加快测试效率
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # 对每个batch的loss进行求和
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()
    test_loss /= len(test_loader.dataset)
    
    print('\nTest: average loss: {:.4f}, accuracy: {}/{} ({:.0f}%)'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))       

# 定义主函数

In [11]:
def main():
    epochs = 2
    batch_size = 64
    torch.manual_seed(2021)
    
    # 查看GPU是否可用，如果可用就用GPU否则用CPU
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
    
    # 训练集的定义
    train_loader = DataLoader(
                            datasets.MNIST('Efficient-Neural-Network-Bilibili-master/data/MNIST', 
                                           train=True,
                                           download=False,
                                           transform=transforms.Compose([
                                                                       transforms.ToTensor(),
                                                                       transforms.Normalize((0.1307,),(0.3081,))
                                           ])),
                            batch_size=1000,
                            shuffle=True
                            )
    # 测试集的定义
    test_loader = DataLoader(
                            datasets.MNIST('Efficient-Neural-Network-Bilibili-master/data/MNIST', 
                                           train=False,
                                           download=False,
                                           transform=transforms.Compose([
                                                                       transforms.ToTensor(),
                                                                       transforms.Normalize((0.1307,),(0.3081,))
                                           ])),
                            batch_size=1000,
                            shuffle=True
                            )
    # 模型定义并加载至GPU
    model = Net().to(device)
    # 随机梯度下降
    optimizer = torch.optim.SGD(model.parameters(), lr=0.025, momentum=0.9)
    
    for epoch in range(1, epochs+1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)

In [13]:
main()

Train epoch 1: 60000/60000, [-------------------------------------------------->] 100%
Test: average loss: 0.1979, accuracy: 9381/10000 (94%)
Train epoch 2: 60000/60000, [-------------------------------------------------->] 100%
Test: average loss: 0.1088, accuracy: 9652/10000 (97%)
