In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import optuna

# 加载和预处理数据
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False)

# 定义自定义激活函数
class CustomActivation(nn.Module):
    def __init__(self, sigma=1.0):
        super(CustomActivation, self).__init__()
        self.sigma = sigma

    def forward(self, x):
        return torch.where(x >= 0, x, x * torch.exp(x / self.sigma))

# 定义神经网络模型
class Net(nn.Module):
    def __init__(self, sigma=1.0):
        super(Net, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 30)
        self.custom_activation = CustomActivation(sigma)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.custom_activation(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [2]:

# 目标函数
def objective(trial):
    sigma = trial.suggest_float("sigma", 0.1, 10.0)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net(sigma=sigma).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    num_epochs = 10  # 可以根据需要调整训练轮数
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # 在测试集上验证模型
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total

    return accuracy

# 运行优化
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

# 获取最佳 sigma 值
best_sigma = study.best_params["sigma"]
print("Best sigma:", best_sigma)

In [3]:

# 使用最佳 sigma 值训练最终模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(sigma=best_sigma).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    # 在测试集上验证模型
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

print('Finished Training')


In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import numpy as np
import optuna

# 加载和预处理数据
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=100, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=100, shuffle=False)

import torch
import torch.nn as nn

# 定义自定义激活函数
class CustomActivation(nn.Module):
    def __init__(self):
        super(CustomActivation, self).__init__()

    def forward(self, x, sigma):
        return torch.where(x >= 0, x, x * torch.exp(x / sigma))

# 定义神经网络模型
class Net(nn.Module):
    def __init__(self, sigma=1.0):
        super(Net, self).__init__()
        self.sigma = nn.Parameter(torch.tensor(sigma))  # 将 sigma 定义为可学习参数
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 30)
        self.custom_activation = CustomActivation()  # 实例化自定义激活函数
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(30, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        # 在调用 custom_activation 时传入 sigma 参数
        x = self.custom_activation(x, self.sigma)  # 使用可学习参数 sigma
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [2]:
# 目标函数
def objective(trial):
    sigma = trial.suggest_float("sigma", 0.1, 10.0)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = Net(sigma=sigma).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    num_epochs = 10  # 可以根据需要调整训练轮数
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        # 在测试集上验证模型
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        accuracy = 100 * correct / total

    return accuracy

# 运行优化
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=20)

# 获取最佳 sigma 值
best_sigma = study.best_params["sigma"]
print("Best sigma:", best_sigma)

In [3]:
# 使用最佳 sigma 值训练最终模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Net(sigma=best_sigma).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    # 在测试集上验证模型
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f}%')

print('Finished Training')