# **完整的训练流程**

In [None]:
import torch
import torch.nn as nn
from torch.nn import Conv2d, MaxPool2d, Linear, Flatten
import torchvision
from torchvision import datasets, transforms
from model import *
from torch.utils.tensorboard import SummaryWriter

# 1. 准备数据集
train_data = datasets.CIFAR10(
    root="./dataset",
    transform=transforms.ToTensor(),
    train=True,
    download=True,
)

test_data = datasets.CIFAR10(
    root="./dataset",
    transform=transforms.ToTensor(),
    train=False,
    download=True,
)

train_data_size = len(train_data)
test_data_size = len(test_data)
print(f"train_Dataset size: {train_data_size}")
print(f"test_Dataset size: {test_data_size}")

# 3.定义DataLoader
# dataloader 加载数据集
train_loader = torch.utils.data.DataLoader(
    dataset=train_data,
    batch_size=64,
    shuffle=True,
)

test_loader = torch.utils.data.DataLoader(
    dataset=test_data,
    batch_size=64,
    shuffle=False,
)

Files already downloaded and verified
Files already downloaded and verified
train_Dataset size: 50000
test_Dataset size: 10000


In [None]:
# 4.实例化模型
myModule = MyModule()
# print(myModule)

# 5.定义损失函数
loss_fn = nn.CrossEntropyLoss()

# 6.优化器
# 1e-2= 1*(10)^-2 = 0.01
learning_rate = 1e-2
optimizer = torch.optim.SGD(myModule.parameters(), lr=learning_rate)

# 7.训练模型
# 设置训练网络的一些参数
# 记录训练的次数
total_train_step = 0
# 记录测试的次数
total_test_step = 0
# 训练的轮数
epoch = 10

write = SummaryWriter("logs")
# 训练
for i in range(epoch):
    
    # train mode 的作用: 将网络设置为训练模式
    myModule.train()
    
    # 7.1训练模型 
    print(f"----------第 {i+1} 轮训练开始----------")
    for data in train_loader:
        imgs, targets = data
        outputs = myModule(imgs)
        loss = loss_fn(outputs, targets)

        # 优化器优化模型
        # 梯度清零
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_step += 1
        if total_train_step % 100 == 0:
            print(f"训练次数: {total_train_step}, Loss: {loss.item():.3f}")
            write.add_scalar("train_loss", loss.item(), total_train_step)

    # eval mode 的作用: 将网络设置为测试模式
    myModule.eval()
    # 7.2测试模型
    total_test_loss = 0
    total_accuracy = 0
    with torch.no_grad():
        for data in test_loader:
            imgs, targets = data
            outputs = myModule(imgs)
            loss = loss_fn(outputs, targets)
            total_test_loss += loss.item()
            accuracy = (outputs.argmax(1) == targets).sum()
            total_accuracy += accuracy
        total_test_step += 1
        print(
            f"测试次数: {total_test_step}, 测试集的Loss: {total_test_loss:.3f}, 测试集的准确率: {total_accuracy/test_data_size:.2%}"
        )
        write.add_scalar("test_loss", total_test_loss, total_test_step)
        
    # 7.3保存模型
    torch.save(myModule, f"model/myModule_{i+1}.pth")
write.close()

----------第 1 轮训练开始----------
训练次数: 100, Loss: 2.301
训练次数: 200, Loss: 2.291
训练次数: 300, Loss: 2.292
训练次数: 400, Loss: 2.238
训练次数: 500, Loss: 2.154
训练次数: 600, Loss: 1.991
训练次数: 700, Loss: 1.944
测试次数: 1, 测试集的Loss: 339.847, 测试集的准确率: 21.02%
----------第 2 轮训练开始----------
训练次数: 800, Loss: 1.992
训练次数: 900, Loss: 1.837
训练次数: 1000, Loss: 1.931
训练次数: 1100, Loss: 1.874
训练次数: 1200, Loss: 1.843
训练次数: 1300, Loss: 1.656
训练次数: 1400, Loss: 1.680
训练次数: 1500, Loss: 1.873
测试次数: 2, 测试集的Loss: 289.403, 测试集的准确率: 34.88%
----------第 3 轮训练开始----------
训练次数: 1600, Loss: 1.694
训练次数: 1700, Loss: 1.704
训练次数: 1800, Loss: 1.653
训练次数: 1900, Loss: 1.462
训练次数: 2000, Loss: 1.706
训练次数: 2100, Loss: 1.854
训练次数: 2200, Loss: 1.843
训练次数: 2300, Loss: 1.449
测试次数: 3, 测试集的Loss: 262.477, 测试集的准确率: 40.51%
----------第 4 轮训练开始----------
训练次数: 2400, Loss: 1.565
训练次数: 2500, Loss: 1.691
训练次数: 2600, Loss: 1.272
训练次数: 2700, Loss: 1.565
训练次数: 2800, Loss: 1.397
训练次数: 2900, Loss: 1.437
训练次数: 3000, Loss: 1.333
训练次数: 3100, Loss: 1.379
测试次数: 4, 测试集的