In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import math
import optuna
# 加载和预处理数据
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False)

In [2]:
class CustomReLU(nn.Module):
    def __init__(self, sigma):
        super().__init__()
        self.sigma = nn.Parameter(torch.tensor(sigma))  # 1个可学习参数 sigma

    def forward(self, input):
        return torch.max(input, self.sigma * input)

class Net(nn.Module):
    def __init__(self, sigma):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()  # 28*28二维图像展平成一维向量，size=784
        self.fc1 = nn.Linear(28 * 28, 30)  # 784维映射到128维
        self.custom_relu = CustomReLU(sigma)  # 自定义ReLU层
        self.dropout = nn.Dropout(0.5)  # 舍去50%的神经元
        self.fc2 = nn.Linear(30, 10)  # 128维映射到10维

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.custom_relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [3]:
def objective(trial):
    sigma = trial.suggest_float("sigma", 0.1, 7.0)  # 使用 Optuna 建议 sigma 值
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net(sigma).to(device)
    criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()  # 将模型设置为训练模式，以便在前向传播时启用 Dropout 和 BatchNorm
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

    # 在测试集上验证模型
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():  # 在验证过程中禁用梯度计算，以提高计算速度和节省内存。
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total

    return accuracy

In [4]:
# 运行优化
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

# 获取最佳 sigma 值
best_sigma = study.best_params["sigma"]
print("Best sigma:", best_sigma)

[I 2024-07-21 17:08:25,015] A new study created in memory with name: no-name-4a8a87bc-08d5-41c0-92c5-74af07def818
[I 2024-07-21 17:09:47,922] Trial 0 finished with value: 93.95 and parameters: {'sigma': 2.795975126610662}. Best is trial 0 with value: 93.95.
[I 2024-07-21 17:11:11,577] Trial 1 finished with value: 93.95 and parameters: {'sigma': 3.057043900767174}. Best is trial 0 with value: 93.95.
[I 2024-07-21 17:12:41,000] Trial 2 finished with value: 93.93 and parameters: {'sigma': 5.640697357002357}. Best is trial 0 with value: 93.95.
[I 2024-07-21 17:14:08,564] Trial 3 finished with value: 93.78 and parameters: {'sigma': 3.3601340757352074}. Best is trial 0 with value: 93.95.
[I 2024-07-21 17:15:35,984] Trial 4 finished with value: 93.54 and parameters: {'sigma': 5.000104940219762}. Best is trial 0 with value: 93.95.
[I 2024-07-21 17:17:00,673] Trial 5 finished with value: 93.46 and parameters: {'sigma': 6.5925269667022555}. Best is trial 0 with value: 93.95.
[I 2024-07-21 17:18:

Best sigma: 2.4545961693131755


In [5]:
# 实例化模型、定义损失函数和优化器
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(best_sigma).to(device)
criterion = nn.CrossEntropyLoss()  # 交叉熵损失函数
optimizer = optim.Adam(model.parameters(), lr=0.01)

num_epochs = 50


for epoch in range(num_epochs):
    model.train()  # 将模型设置为训练模式，以便在前向传播时启用 Dropout 和 BatchNorm
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    # 在测试集上验证模型
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():  # 在验证过程中禁用梯度计算，以提高计算速度和节省内存。
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

print('Finished Training')

Epoch [1/50], Loss: 0.5626
Accuracy of the network on the 10000 test images: 90.58%
Epoch [2/50], Loss: 0.4628
Accuracy of the network on the 10000 test images: 92.00%
Epoch [3/50], Loss: 0.4398
Accuracy of the network on the 10000 test images: 92.77%
Epoch [4/50], Loss: 0.4335
Accuracy of the network on the 10000 test images: 92.70%
Epoch [5/50], Loss: 0.4270
Accuracy of the network on the 10000 test images: 93.03%
Epoch [6/50], Loss: 0.4204
Accuracy of the network on the 10000 test images: 93.19%
Epoch [7/50], Loss: 0.4148
Accuracy of the network on the 10000 test images: 92.83%
Epoch [8/50], Loss: 0.4149
Accuracy of the network on the 10000 test images: 93.43%
Epoch [9/50], Loss: 0.4094
Accuracy of the network on the 10000 test images: 93.74%
Epoch [10/50], Loss: 0.4045
Accuracy of the network on the 10000 test images: 94.05%
Epoch [11/50], Loss: 0.3984
Accuracy of the network on the 10000 test images: 93.42%
Epoch [12/50], Loss: 0.4002
Accuracy of the network on the 10000 test imag