In [1]:
# implemented by zmy

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F # 使用了其中的log_softmax函数和nll_loss损失函数
from torchvision import datasets, transforms

In [3]:
# 定义一个字典存放一些常量参数
args = {
    "batch_size": 512, # 训练数据和测试数据的时候每个mini-batch包的大小为512
    "epoch": 10, # 训练和测试的迭代次数
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu") # 计算设备
}

In [4]:
# 训练集
train_loader = torch.utils.data.DataLoader(
            datasets.MNIST("./data/", train=True, download=True,
                          transform=transforms.Compose([
                              transforms.ToTensor(), # 将图像转为tensor格式
                              transforms.Normalize((0.1307,), (0.3081,)) # 0.1307和0.3081是mnist数据集的均值和标准差，可以将灰度转换到0~1之间
                          ])),
        batch_size=args["batch_size"],
        shuffle=True, # 打乱顺序
        drop_last=True #舍弃最后一个大小不足512的batch
)

In [5]:
# 测试集
test_loader = torch.utils.data.DataLoader(
            datasets.MNIST("./data/", train=False, download=True,
                          transform=transforms.Compose([
                              transforms.ToTensor(),
                              transforms.Normalize((0.1307,), (0.3081,))
                          ])),
        batch_size=args["batch_size"],
        shuffle=False
)

In [6]:
# 构建卷积神经网络
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__();
        self.net = nn.Sequential(
            nn.Conv2d(1, 10, kernel_size=5),  # 卷积层: (in_channel, out_channel, kernel_size)
            nn.ReLU(inplace=True),  # 激活层 ReLU
            nn.MaxPool2d(2, 2),  # 最大池化
            nn.Conv2d(10, 20, kernel_size=3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),
            nn.Flatten(),  # 拉平成向量
            nn.Linear(500, 256),  # 全连接层
            nn.ReLU(inplace=True),
            nn.Linear(256, 10)
        ) # 原来的图像经过loader和net之后输出一个1x10的向量
        
    def forward(self, x):
        return F.log_softmax(self.net(x), dim=1)

In [7]:
# 实例化模型和优化器
model = Net().to(args["device"]) # 转到GPU上进行计算
optimizer = torch.optim.Adam(model.parameters())

In [8]:
# 定义训练函数
def train(model, device, optimizer, train_loader, epoch):
    model.train() # 将模块设置为训练模式,作用未知
    # batch_idx:mini_batch的编号， enumerate除了返回train_loader的x,y数据之外，还返回一个从0开始自动迭代的编号
    for batch_idx, (x_data, y_data) in  enumerate(train_loader):
        x_data, y_data = x_data.to(device), y_data.to(device) # 把数据移到GPU上
        # ①前馈 ②计算损失 ③反向传播，更新梯度（每次更新梯度前要清零）
        y_hat = model(x_data)
        loss = F.nll_loss(y_hat, y_data)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # 一共需要60000/512=117个周期，所以我们每29个周期观察一次
        if(batch_idx + 1) % 29 == 0:
            print("TrainEpoch:{} Progress:({}/{} ({:.4f}%)\tloss:{:.6f}".format(
                epoch, 
                args["batch_size"] * batch_idx, # 已经训练的数据个数
                len(train_loader.dataset), # 需要训练的数据个数
                100 * args["batch_size"] * batch_idx / len(train_loader.dataset), # 进度
                loss.item() 
            ))
            

In [9]:
# 定义测试函数
def test(model, device, test_loader):
    model.eval() # 将模块设置为评估模式,作用未知
    loss = 0
    correct = 0
    for x_data, y_data in test_loader:
        x_data, y_data = x_data.to(device), y_data.to(device) # 把数据移到GPU上
        with torch.no_grad():
            y_hat = model(x_data)
            loss += F.nll_loss(y_hat, y_data)
            y_hat = y_hat.max(1, keepdim=True)[1]
            # y_hat的形状是512*10的tensor，第一个参数1表示按行取最大值（0是按列），[1]是取最大值的下标（[0]是最大值），也就是识别结果
            correct += y_hat.eq(y_data.view_as(y_hat)).sum().item()
            # 和y_data进行比对，计算出有几个一样的
        
    loss /= args["batch_size"]       
    print("Test:Average Loss:{:.4f}, Accuracy:{}/{} ({}%)\n".format(
            loss,
            correct,
            len(test_loader.dataset),
            100 * correct / len(test_loader.dataset)
    ))    


In [10]:
for epoch in range(1, args["epoch"] + 1):
    train(model, args["device"], optimizer, train_loader, epoch)
    test(model, args["device"], test_loader)

TrainEpoch:1 Progress:(14336/60000 (23.8933%)	loss:0.384776
TrainEpoch:1 Progress:(29184/60000 (48.6400%)	loss:0.295397
TrainEpoch:1 Progress:(44032/60000 (73.3867%)	loss:0.200777
TrainEpoch:1 Progress:(58880/60000 (98.1333%)	loss:0.138194
Test:Average Loss:0.0048, Accuracy:9642/10000 (96.42%)

TrainEpoch:2 Progress:(14336/60000 (23.8933%)	loss:0.123801
TrainEpoch:2 Progress:(29184/60000 (48.6400%)	loss:0.102141
TrainEpoch:2 Progress:(44032/60000 (73.3867%)	loss:0.086510
TrainEpoch:2 Progress:(58880/60000 (98.1333%)	loss:0.086306
Test:Average Loss:0.0028, Accuracy:9770/10000 (97.7%)

TrainEpoch:3 Progress:(14336/60000 (23.8933%)	loss:0.074409
TrainEpoch:3 Progress:(29184/60000 (48.6400%)	loss:0.066436
TrainEpoch:3 Progress:(44032/60000 (73.3867%)	loss:0.043368
TrainEpoch:3 Progress:(58880/60000 (98.1333%)	loss:0.088545
Test:Average Loss:0.0020, Accuracy:9825/10000 (98.25%)

TrainEpoch:4 Progress:(14336/60000 (23.8933%)	loss:0.041106
TrainEpoch:4 Progress:(29184/60000 (48.6400%)	loss:0.