In [15]:
import torch
# print(torch.__version__)
from calendar import EPOCH
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler # 学习调整率 
from torchvision import transforms # 视觉库中提供了一些数据变换的接口
from torchvision import datasets
from random import shuffle


In [16]:
# 设置超参数
BATCH_SIZE = 64
EPOCHS = 3
learning_rate = 0.001 # 初始学习率
# torch判断是否使用cuda
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# DEVICE = torch.device("cpu")
# momentum
# log_interval
# random_seed = 1
# torch.manual_seed(random_seed)

In [17]:
# 加载数据集
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize(mean=(0.5,), std=(0.5,)) # 数据规范化到正态分布
                    ])),
    batch_size = BATCH_SIZE, shuffle=True 
)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.5,), (0.5,)) # 数据规范化到正态分布
                        # (0.1307,), (0.3081,) 0.1307和0.3081是MNIST数据集的全局平均值和标准偏差
                    ])),
    batch_size = BATCH_SIZE, shuffle=True 
)

In [18]:
# 建造模型

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # 提取特征值
        self.features = nn.Sequential(
            
            # 卷积层 ((28-3+2*1)/1)+1=28 28*28*1-->>28*28*32
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1),
            
            # 批量归一化 归一化与上一层out channels大小相等
            nn.BatchNorm2d(num_features=32),
            
            # 激活函数
            nn.ReLU(inplace=True), # true:直接进行运算
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1), # (28-3+2*1)/1)+1=28
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True), 

            # 最大池化层 图像大小变为1/4 28*28-->> 7*7
            nn.MaxPool2d(kernel_size=2, stride=2), 
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        # 分类层
        self.classifier = nn.Sequential(

            # Dropout层
            nn.Dropout(p=0.5), # 每个权重有0.5可能为0
            # 64通道*(7*7图像)输入到512个神经元中
            nn.Linear(64 * 7 * 7, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(512, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(512, 10),
        )

    # 向前传递函数
    def forward(self, x):

        x = self.features(x)
        # 输出结果必须展平成1维向量
        x = x.view(x.size(0), -1)
        x = self.classifier(x)

        return x    

In [19]:
# 训练前 初始化模型

ConvModel = ConvNet().to(DEVICE)
# 定义交叉损失函数
criterion = nn.CrossEntropyLoss().to(DEVICE)
# 定义优化器
optimizer = optim.Adam(ConvModel.parameters(), lr=learning_rate)
# 定义学习率调度器， 学习率衰减周期：step_size, 衰减的乘法因子gamma
exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=6, gamma=0.1)

In [20]:
# 训练

def train(num_epochs, _model, _device, _train_loader, _optimizer, _lr_scheduler):
    # 训练模式
    _model.train()
    # _lr_scheduler.step()

    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(_train_loader):
            #print(images.shape)
            samples = images.to(_device)
            #resamples = samples.reshape(-1, 1, 28, 28)
            #print(resamples.shape)
            labels = labels.to(_device)
            # 输入批量n*28*28*单通道的四维张量， -1表示输入批量数
            # output = _model(images)
            output = _model(samples.reshape(-1, 1, 28, 28))
            loss = criterion(output, labels)
            # 优化器内部参数梯度必须变为0
            _optimizer.zero_grad()
            # 损失值后向传播
            loss.backward()
            # 更新模型参数 ?? 顺序？
            _optimizer.step()
            # 学习率调度器更新 根据Loss进行学习率更新
            _lr_scheduler.step(loss)

            if (i + 1) % 100 == 0:
                print("Epoch: {}/{}, step: {}, loss: {:.4f}".format(epoch + 1, num_epochs, i + 1, loss.item()))

In [21]:
# 预测

def test(_test_loader, _model, _device):
    _model.eval()
    loss =0
    correct = 0

    # 如果不需要 backward更新梯度，那么就要禁用梯度计算，减少内存和计算资源浪费
    with torch.no_grad():
        for  data, target in test_loader:
            data, target = data.to(_device), target.to(_device)
            # print(data.shape)
            output = _model(data.reshape(-1, 1, 28, 28))
            # output = _model(data)
            loss += criterion(output, target).item()
            # 找到概率最大的下标，为输出值
            pred = output.data.max(1, keepdim=True)[1]
            correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    loss /= len(_test_loader.dataset)

    print('\nAverage loss: {:.4f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        loss, correct, len(_test_loader.dataset),
        100. * correct / len(_test_loader.dataset)
    ))

In [22]:
# 运行
for epoch in range(1, EPOCHS + 1):
    train(epoch, ConvModel, DEVICE, train_loader, optimizer, exp_lr_scheduler)
    test(test_loader, ConvModel, DEVICE)
    # test(train_loader, ConvModel, DEVICE)

Epoch: 1/1, step: 100, loss: 0.2382
Epoch: 1/1, step: 200, loss: 0.1896
Epoch: 1/1, step: 300, loss: 0.2093
Epoch: 1/1, step: 400, loss: 0.2125
Epoch: 1/1, step: 500, loss: 0.1364
Epoch: 1/1, step: 600, loss: 0.0759
Epoch: 1/1, step: 700, loss: 0.0681
Epoch: 1/1, step: 800, loss: 0.1668
Epoch: 1/1, step: 900, loss: 0.0859

Average loss: 0.0005, Accuracy: 9907/10000 (99.070%)

Epoch: 1/2, step: 100, loss: 0.0470
Epoch: 1/2, step: 200, loss: 0.0248
Epoch: 1/2, step: 300, loss: 0.1366
Epoch: 1/2, step: 400, loss: 0.0699
Epoch: 1/2, step: 500, loss: 0.0174
Epoch: 1/2, step: 600, loss: 0.0416
Epoch: 1/2, step: 700, loss: 0.0136
Epoch: 1/2, step: 800, loss: 0.0298
Epoch: 1/2, step: 900, loss: 0.0338
Epoch: 2/2, step: 100, loss: 0.0092
Epoch: 2/2, step: 200, loss: 0.1915
Epoch: 2/2, step: 300, loss: 0.0576
Epoch: 2/2, step: 400, loss: 0.0999
Epoch: 2/2, step: 500, loss: 0.0608
Epoch: 2/2, step: 600, loss: 0.0641
Epoch: 2/2, step: 700, loss: 0.0528
Epoch: 2/2, step: 800, loss: 0.1008
Epoch: 2/