In [None]:
import random
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
import pandas as pd
#import imageio
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data as Data

import matplotlib.pyplot as plt
%matplotlib inline
import math

import torchvision
import torchvision.transforms as transforms
import optuna  #载入optuna优化包

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                             train=True,
                                             transform=transform,
                                             download=True)

test_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                            train=False,
                                            transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100,
                                          shuffle=False)


##optuna######
###耗费时间的过程。。。。。。。。。。。。。。。
# Define a basic convolutional layer
# 自定义的可学习ReLU激活函数：max(0,x)+sigma*randn(内嵌的)
class ReLU(nn.Module):
    def __init__(self, seqFlag, trial):
        super(ReLU, self).__init__()
        self.sigma = nn.Parameter(torch.tensor(trial.suggest_float(f'sigma_{seqFlag}', 0.1, 5.0)))

    def forward(self, x):
        noise = torch.randn_like(x) * self.sigma
        return torch.maximum(x, torch.zeros_like(x)) + noise



def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)


# 定义ResNet模型
class ResNet(nn.Module):
    def __init__(self, trial, num_classes=10):
        super(ResNet, self).__init__()
        self.conv1 = conv3x3(3, 16)
        self.bn1 = nn.BatchNorm2d(16)
        self.custom1 = ReLU(1, trial)  # 自定义激活函数层

        # Layer 1
        self.layer1_conv1 = conv3x3(16, 16)
        self.layer1_bn1 = nn.BatchNorm2d(16)
        self.layer1_conv2 = conv3x3(16, 16)
        self.layer1_bn2 = nn.BatchNorm2d(16)
        self.layer1_extra_conv1 = conv3x3(16, 16)
        self.layer1_extra_bn1 = nn.BatchNorm2d(16)
        self.layer1_extra_conv2 = conv3x3(16, 16)
        self.layer1_extra_bn2 = nn.BatchNorm2d(16)
        self.gelub11 = ReLU(2, trial)
        self.gelub12 = ReLU(3, trial)
        self.gelub13 = ReLU(4, trial)
        self.gelub14 = ReLU(5, trial)

        # Layer 2
        self.layer2_conv1 = conv3x3(16, 32, stride=2)
        self.layer2_bn1 = nn.BatchNorm2d(32)
        self.layer2_conv2 = conv3x3(32, 32)
        self.layer2_bn2 = nn.BatchNorm2d(32)
        self.layer2_extra_conv1 = conv3x3(16, 32, stride=2)  # 调整residual的通道数
        self.layer2_extra_bn1 = nn.BatchNorm2d(32)
        self.layer2_extra_conv2 = conv3x3(32, 32)
        self.layer2_extra_bn2 = nn.BatchNorm2d(32)
        self.gelub21 = ReLU(6, trial)
        self.gelub22 = ReLU(7, trial)
        self.gelub23 = ReLU(8, trial)
        self.gelub24 = ReLU(9, trial)
        self.layer2_downsample = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=1, stride=2, bias=False),
            nn.BatchNorm2d(32)
        )

        # Layer 3
        self.layer3_conv1 = conv3x3(32, 64, stride=2)
        self.layer3_bn1 = nn.BatchNorm2d(64)
        self.layer3_conv2 = conv3x3(64, 64)
        self.layer3_bn2 = nn.BatchNorm2d(64)
        self.layer3_extra_conv1 = conv3x3(32, 64, stride=2)  # 调整residual的通道数
        self.layer3_extra_bn1 = nn.BatchNorm2d(64)
        self.layer3_extra_conv2 = conv3x3(64, 64)
        self.layer3_extra_bn2 = nn.BatchNorm2d(64)
        self.gelub31 = ReLU(10, trial)
        self.gelub32 = ReLU(11, trial)
        self.gelub33 = ReLU(12, trial)
        self.gelub34 = ReLU(13, trial)

        self.layer3_downsample = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=1, stride=2, bias=False),
            nn.BatchNorm2d(64)
        )

        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        # Layer 0
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.custom1(out)

        # Layer 1
        residual = out
        out = self.layer1_conv1(out)
        out = self.layer1_bn1(out)
        out = self.gelub11(out)
        out = self.layer1_conv2(out)
        out = self.layer1_bn2(out)
        out += residual
        out = self.gelub12(out)

        residual = out
        out = self.layer1_extra_conv1(out)
        out = self.layer1_extra_bn1(out)
        out = self.gelub13(out)
        out = self.layer1_extra_conv2(out)
        out = self.layer1_extra_bn2(out)
        out += residual
        out = self.gelub14(out)

        # Layer 2
        residual = out
        out = self.layer2_conv1(out)
        out = self.layer2_bn1(out)
        out = self.gelub21(out)
        out = self.layer2_conv2(out)
        out = self.layer2_bn2(out)
        out = self.gelub22(out)
        residual = self.layer2_downsample(residual)
        out += residual
        out = self.gelub22(out)

        residual = out
        out = self.layer2_extra_conv2(out)
        out = self.layer2_extra_bn2(out)
        out = self.gelub23(out)
        out = self.layer2_extra_conv2(out)
        out = self.layer2_extra_bn2(out)
        out += residual
        out = self.gelub24(out)

        # Layer 3
        residual = out
        out = self.layer3_conv1(out)
        out = self.layer3_bn1(out)
        out = self.gelub31(out)
        out = self.layer3_conv2(out)
        out = self.layer3_bn2(out)
        residual = self.layer3_downsample(residual)
        out += residual
        out = self.gelub32(out)

        residual = out
        out = self.layer3_extra_conv2(out)
        out = self.layer3_extra_bn2(out)
        out = self.gelub33(out)
        out = self.layer3_extra_conv2(out)
        out = self.layer3_extra_bn2(out)
        out += residual
        out = self.gelub34(out)

        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


learning_rate = 0.01
num_epochs = 80


# 定义目标函数
def objective(trial):
    # 模型实例化
    model = ResNet(trial)
    model.to(device)
    curr_lr = learning_rate
    # 损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=curr_lr)  # 保持 Adam 优化器

    #history = {'train_loss': [], 'valid_loss': [], 'train_acc': [], 'valid_acc': []}

    for epoch in range(num_epochs):
        optimizer = torch.optim.Adam(model.parameters(), lr=curr_lr)  # 保持 Adam 优化器
        train_loss, train_correct = 0.0, 0
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)
            scores, predictions = torch.max(outputs.data, 1)
            train_correct += (predictions == labels).sum().item()

        # Decay learning rate
        if (epoch + 1) % 20 == 0:
            curr_lr /= 3
            update_lr(optimizer, curr_lr)

        valid_loss, valid_correct = 0.0, 0
        model.eval()
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            valid_loss += loss.item() * images.size(0)
            scores, predictions = torch.max(outputs.data, 1)
            valid_correct += (predictions == labels).sum().item()
        ##不好的trial剪枝    
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / len(train_loader.sampler) * 100
        valid_loss = valid_loss / len(test_loader.sampler)
        valid_acc = valid_correct / len(test_loader.sampler) * 100

        print("Epoch:{}/{} \t Train Loss:{:.4f} Valid Loss:{:.4f} \t Train Acc:{:.2f} %  Valid Acc:{:.2f} %".format(
            epoch + 1, num_epochs,
            train_loss,
            valid_loss,
            train_acc,
            valid_acc))
        # history['train_loss'].append(train_loss)
        # history['valid_loss'].append(valid_loss)
        # history['train_acc'].append(train_acc)
        # history['valid_acc'].append(valid_acc)

    return valid_acc


study = optuna.create_study(direction='maximize')

# 运行优化过程
study.optimize(objective, n_trials=1)

# 打印最佳参数和目标值
print('Best trial:')
print('  Value: ', study.best_trial.value)
print('  Params: ')
for key, value in study.best_trial.params.items():
    print('    {}: {}'.format(key, value))

Files already downloaded and verified


[I 2024-10-10 16:31:20,713] A new study created in memory with name: no-name-fcf57575-4f27-476d-9779-62564777625f


Epoch:1/100 	 Train Loss:1.8438 Valid Loss:1.8759 	 Train Acc:30.18 %  Valid Acc:33.65 %
Epoch:2/100 	 Train Loss:1.5985 Valid Loss:1.3544 	 Train Acc:40.88 %  Valid Acc:50.49 %
Epoch:3/100 	 Train Loss:1.3589 Valid Loss:1.2935 	 Train Acc:50.47 %  Valid Acc:52.73 %
Epoch:4/100 	 Train Loss:1.2130 Valid Loss:1.1173 	 Train Acc:56.13 %  Valid Acc:60.17 %
Epoch:5/100 	 Train Loss:1.1069 Valid Loss:1.1277 	 Train Acc:60.34 %  Valid Acc:60.33 %
Epoch:6/100 	 Train Loss:1.0260 Valid Loss:0.9766 	 Train Acc:63.59 %  Valid Acc:66.45 %
Epoch:7/100 	 Train Loss:0.9577 Valid Loss:0.9873 	 Train Acc:66.25 %  Valid Acc:66.36 %
Epoch:8/100 	 Train Loss:0.8947 Valid Loss:0.9244 	 Train Acc:68.41 %  Valid Acc:67.29 %
Epoch:9/100 	 Train Loss:0.8474 Valid Loss:0.9144 	 Train Acc:70.20 %  Valid Acc:68.39 %
Epoch:10/100 	 Train Loss:0.8044 Valid Loss:0.9072 	 Train Acc:72.00 %  Valid Acc:70.00 %
Epoch:11/100 	 Train Loss:0.7603 Valid Loss:0.7689 	 Train Acc:73.44 %  Valid Acc:73.77 %
Epoch:12/100 	 Trai