使用神经网络训练和验证数据集：

1. 数据准备
   - 创建/加载数据集
     - 使用适当的方法创建或加载数据集（例如，使用`torchvision.datasets`或自定义数据集）。
     - 将数据集划分为训练集、验证集和测试集。
   - 创建数据加载器（`DataLoader`）以便进行批量训练。
2. 创建模型
   - 定义一个神经网络模型，通常继承`nn.Module`。
   - 在构造函数中定义网络层（如全连接层、卷积层等）。
   - 在`forward`方法中定义前向传播。
3. 定义损失函数和优化器
   - 选择适合任务的损失函数（如分类任务使用交叉熵损失`nn.CrossEntropyLoss`，回归任务使用均方误差`nn.MSELoss`）。
   - 选择一个优化器（如`optim.SGD`或`optim.Adam`）并设置学习率。
4. 训练模型
   - 循环多个epoch（训练周期）。
   - 在每个epoch中，遍历训练数据加载器，每次获取一个批次的数据。
   - 将数据输入模型得到输出，计算损失。
   - 清零梯度，反向传播，优化器更新参数。
   - 可选：记录训练损失和准确率。
5. 验证模型
   - 在每个epoch结束后，使用验证集评估模型。
   - 将模型设置为评估模式（`model.eval()`），关闭梯度计算（`torch.no_grad()`）。
   - 遍历验证数据加载器，计算验证损失和准确率。
   - 根据验证结果调整超参数或进行早停（early stopping）等操作。
6. 保存模型
   - 使用`torch.save`保存模型的状态字典（`state_dict`）或整个模型。
   - 通常保存验证性能最好的模型。
7. 模型预测
   - 加载保存的模型。
   - 对新数据（单一样本或批次）进行预测，注意数据预处理和转换为张量。
   - 将模型输出转换为预测标签（如分类问题使用softmax和argmax）。

1.使用全连接网络训练和验证MNIST数据集

In [18]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from PIL import Image


# 数据准备:加载数据集（MNIST），返回训练/验证数据集加载器
def build_dataLoader(*, train=False, test=False):

    trainLoader, testLoader = None, None

    if train:

        trainDataset = datasets.MNIST(
            root="./图片资料",
            train=True,
            transform=transforms.ToTensor(),
            download=True,
        )

        trainLoader = DataLoader(
            trainDataset,
            batch_size=len(trainDataset) // 20,
            shuffle=True,
        )

    if test:

        testDataset = datasets.MNIST(
            root="./图片资料",
            train=False,
            transform=transforms.ToTensor(),
            download=True,
        )

        testLoader = DataLoader(
            testDataset,
            batch_size=len(testDataset) // 10,
            shuffle=True,
        )

    if trainLoader is not None and testLoader is not None:
        return trainLoader, testLoader
    elif trainLoader is not None and testLoader is None:
        return trainLoader
    elif testLoader is not None and trainLoader is None:
        return testLoader


# 构建模型结构
class Net(nn.Module):
    def __init__(self, input_size, output_size):
        super(Net, self).__init__()

        self.fc1 = nn.Linear(input_size, 256)
        self.BN1 = nn.BatchNorm1d(256)
        self.relu1 = nn.ReLU()

        self.fc2 = nn.Linear(256, 128)
        self.BN2 = nn.BatchNorm1d(128)
        self.relu2 = nn.ReLU()

        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = x.view(-1, 784)
        
        x = self.fc1(x)
        x = self.BN1(x)
        x = self.relu1(x)

        x = self.fc2(x)
        x = self.BN2(x)
        x = self.relu2(x)

        x = self.fc3(x)
        return x


# 训练模型
def train(model, train_loader, lr, epochs):
    model.train()

    # 定义损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr, betas=(0.9, 0.999), eps=1e-8)

    for epoch in range(epochs):
        crrect = 0
        for x, y in train_loader:
            y_pred = model(x)
            loss = criterion(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            crrect += (torch.max(y_pred, dim=1)[1].eq(y)).sum().item()

        accuracy = crrect / len(train_loader.dataset)
        if epoch % 10 == 0:
            print(f"Epoch: {epoch}, Loss: {loss.item()}, acc: {accuracy}")


# 验证模型
def eval(model, test_loader):
    model.eval()

    with torch.no_grad():
        criterion = nn.CrossEntropyLoss()
        crrect = 0
        for x, y in test_loader:
            y_pred = model(x)
            loss = criterion(y_pred, y)
            crrect += (torch.max(y_pred, dim=1)[1] == y).sum().item()

        accuracy = crrect / len(test_loader.dataset)
        print(f"loss: {loss.item()} acc: {accuracy}")


# 保存模型
def save_model(model, path):
    torch.save(model.state_dict(), path)


# 加载模型
def load_model(path):
    model = Net(1 * 28 * 28, 10)
    model.load_state_dict(torch.load(path))
    return model


# 预测
def predict(model, test_path):
    transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((28, 28))])

    img = Image.open(test_path).convert('L')
    img = transform(img).unsqueeze(0)

    model.eval()
    with torch.no_grad():
        y_pred = model(img)
        pred = torch.argmax(y_pred, dim=1)

        print(f"预测分类：{pred.item()}")


train_loader, test_loader = build_dataLoader(train=True, test=True)
model = Net(784, 10)
train(model, train_loader, 0.01, 100)
eval(model, test_loader)
save_model(model, "./model/mnist_model.pth")

# model = load_model("./model/mnist_model.pth")
# predict(model, r"图片资料\3.png")


Epoch: 0, Loss: 0.18691670894622803, acc: 0.85915
Epoch: 10, Loss: 0.006430135574191809, acc: 0.9986666666666667
Epoch: 20, Loss: 0.011783843860030174, acc: 0.9969333333333333
Epoch: 30, Loss: 9.147380478680134e-05, acc: 1.0
Epoch: 40, Loss: 4.7696405090391636e-05, acc: 1.0
Epoch: 50, Loss: 3.165101952617988e-05, acc: 1.0
Epoch: 60, Loss: 3.136577652185224e-05, acc: 1.0
Epoch: 70, Loss: 1.6336618500645272e-05, acc: 1.0
Epoch: 80, Loss: 1.6351716112694703e-05, acc: 1.0
Epoch: 90, Loss: 9.618052899895702e-06, acc: 1.0
loss: 0.12755970656871796 acc: 0.9816


2.使用全连接网络训练和验证CIFAR10数据集

In [58]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from PIL import Image

# 数据准备，返回训练集数据加载器和测试集数据加载器
def build_dataLoader():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))      # 归一化
    ])
    
    trainDataset = datasets.CIFAR10(
        root="./图片资料",
        train=True,
        transform=transform,
        download=True
    )

    testDataset = datasets.CIFAR10(
        root="./图片资料",
        train=False,
        transform=transform,
        download=True
    )

    trainLoader = DataLoader(
        dataset=trainDataset,
        batch_size=len(trainDataset)//20,
        shuffle=True
    )

    testLoader = DataLoader(
        dataset=testDataset,
        batch_size=len(testDataset)//10,
        shuffle=True
    )

    return trainLoader, testLoader



# 构建网络结构
class CIFAR10Net(nn.Module):
    def __init__(self):
        super(CIFAR10Net, self).__init__()

        self.fc1 = nn.Linear(32*32*3, 1024)
        # self.BN1 = nn.BatchNorm1d(1024)
        self.tanh1 = nn.Tanh()
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.5)

        self.fc2 = nn.Linear(1024, 512)
        # self.BN2 = nn.BatchNorm1d(512)
        self.tanh2 = nn.Tanh()
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.5)

        self.fc3 = nn.Linear(512, 256)
        # self.BN3 = nn.BatchNorm1d(256)
        self.tanh3 = nn.Tanh()
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(0.5)

        self.fc4 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 32*32*3)

        x = self.fc1(x)
        # x = self.BN1(x)
        x = self.tanh1(x)
        x = self.relu1(x)
        x = self.dropout1(x)

        x = self.fc2(x)
        # x = self.BN2(x)
        x = self.tanh2(x)
        x = self.relu2(x)
        # x = self.dropout2(x)

        x = self.fc3(x)
        # x = self.BN3(x)
        x = self.tanh3(x)
        x = self.relu3(x)
        x = self.dropout3(x)

        x = self.fc4(x)
        return x
    

# 训练
def train(model, train_loader, test_loader, lr, epochs):
    model.train()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    acc_list = []
    loss_list = []
    patience = 0

    for epoch in range(epochs):
        crrect_num = 0
        for data in train_loader:
            x, y = data
            y_pred = model(x)
            loss = criterion(y_pred, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            crrect_num += (y_pred.argmax(dim=1) == y).sum().item()

        acc_list.append(crrect_num / len(train_loader.dataset))
        loss_list.append(loss.item())

        print(f"Epoch: {epoch+1}, Loss: {loss.item():0.4f} Acc: {crrect_num / len(train_loader.dataset):0.4f}")

        if epoch > 20 and acc_list[-1] - acc_list[-2] < 0.01 and loss_list[-2] - loss_list[-1] < 0.001:
            patience += 1
            if patience > 5:
                print('模型训练连续5次没有进步，结束训练')
                break
        else:
            patience = 0
            


# 测试
def eval(model, test_loader):
    model.eval()

    with torch.no_grad():
        criterion = nn.CrossEntropyLoss()
        crrect_num = 0
        
        for data in test_loader:
            x, y = data
            y_pred = model(x)
            loss = criterion(y_pred, y)
            crrect_num += (y_pred.argmax(dim=1) == y).sum().item()
        print(f"测试集验证结果：\n Loss: {loss.item()} Acc: {crrect_num / len(test_loader.dataset)}")


# 保存模型
def save_model(model, path):
    torch.save(model.state_dict(), path)


# 加载模型
def load_model(path):
    model = CIFAR10Net()
    model.load_state_dict(torch.load(path))


# 预测
def predict(model, path):
    
    model.eval()
    with torch.no_grad():
        
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Resize((32, 32)),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))      # 归一化
        ])

        img = Image.open(path)
        img = transform(img)
        img = img.view(-1, 3 * 32 * 32)

        output = model(img)
        _, pred = torch.max(output, dim=1)
        print(pred.item())

trainLoader, testLoader = build_dataLoader()
model = CIFAR10Net()
train(model, trainLoader, testLoader, 0.01, 50)
eval(model, testLoader)
save_model(model, 'model/cifar10_model.pth')




Epoch: 1, Loss: 1.9758 Acc: 0.1983
Epoch: 2, Loss: 1.9311 Acc: 0.2901
Epoch: 3, Loss: 1.8783 Acc: 0.3156
Epoch: 4, Loss: 1.9075 Acc: 0.3257
Epoch: 5, Loss: 1.8573 Acc: 0.3298
Epoch: 6, Loss: 1.8757 Acc: 0.3330
Epoch: 7, Loss: 1.8739 Acc: 0.3341
Epoch: 8, Loss: 1.8415 Acc: 0.3342
Epoch: 9, Loss: 1.8559 Acc: 0.3359
Epoch: 10, Loss: 1.7869 Acc: 0.3388
Epoch: 11, Loss: 1.8374 Acc: 0.3352
Epoch: 12, Loss: 1.8598 Acc: 0.3353
Epoch: 13, Loss: 1.8270 Acc: 0.3382
Epoch: 14, Loss: 1.8251 Acc: 0.3401
Epoch: 15, Loss: 1.8432 Acc: 0.3398
Epoch: 16, Loss: 1.8054 Acc: 0.3382
Epoch: 17, Loss: 1.8283 Acc: 0.3374
Epoch: 18, Loss: 1.8404 Acc: 0.3409
Epoch: 19, Loss: 1.8199 Acc: 0.3412
Epoch: 20, Loss: 1.8521 Acc: 0.3398
Epoch: 21, Loss: 1.8676 Acc: 0.3422
Epoch: 22, Loss: 1.8592 Acc: 0.3434
Epoch: 23, Loss: 1.8541 Acc: 0.3450
Epoch: 24, Loss: 1.8064 Acc: 0.3424
Epoch: 25, Loss: 1.8233 Acc: 0.3443
Epoch: 26, Loss: 1.8611 Acc: 0.3458
Epoch: 27, Loss: 1.8341 Acc: 0.3462
Epoch: 28, Loss: 1.8146 Acc: 0.3447
E