In [3]:
import random
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
import pandas as pd
#import imageio
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data as Data

import matplotlib.pyplot as plt
%matplotlib inline
import math

import torchvision
import torchvision.transforms as transforms
import optuna  #载入optuna优化包

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                             train=True,
                                             transform=transform,
                                             download=True)

test_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                            train=False,
                                            transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100,
                                          shuffle=False)


##optuna######
###耗费时间的过程。。。。。。。。。。。。。。。
# Define a basic convolutional layer
# 定义 ExLU 激活函数，sigma 是可学习参数
class ExLU(nn.Module):
    def __init__(self, seqFlag, trial):
        super(ExLU, self).__init__()
        # 通过 Optuna 选择 sigma 参数，范围为 2.0 到 6.0
        self.sigma = nn.Parameter(torch.tensor(trial.suggest_float(f'sigma_{seqFlag}', 2.0, 6.0), dtype=torch.float32))

    def forward(self, x):
        return torch.where(x >= 0, x, x * torch.exp(x / self.sigma))


def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)


# 定义ResNet模型
class ResNet(nn.Module):
    def __init__(self, trial, num_classes=10):
        super(ResNet, self).__init__()
        self.conv1 = conv3x3(3, 16)
        self.bn1 = nn.BatchNorm2d(16)
        self.custom1 = ExLU(1, trial)  # 自定义激活函数层

        # Layer 1
        self.layer1_conv1 = conv3x3(16, 16)
        self.layer1_bn1 = nn.BatchNorm2d(16)
        self.layer1_conv2 = conv3x3(16, 16)
        self.layer1_bn2 = nn.BatchNorm2d(16)
        self.layer1_extra_conv1 = conv3x3(16, 16)
        self.layer1_extra_bn1 = nn.BatchNorm2d(16)
        self.layer1_extra_conv2 = conv3x3(16, 16)
        self.layer1_extra_bn2 = nn.BatchNorm2d(16)
        self.gelub11 = ExLU(2, trial)
        self.gelub12 = ExLU(3, trial)
        self.gelub13 = ExLU(4, trial)
        self.gelub14 = ExLU(5, trial)

        # Layer 2
        self.layer2_conv1 = conv3x3(16, 32, stride=2)
        self.layer2_bn1 = nn.BatchNorm2d(32)
        self.layer2_conv2 = conv3x3(32, 32)
        self.layer2_bn2 = nn.BatchNorm2d(32)
        self.layer2_extra_conv1 = conv3x3(16, 32, stride=2)  # 调整residual的通道数
        self.layer2_extra_bn1 = nn.BatchNorm2d(32)
        self.layer2_extra_conv2 = conv3x3(32, 32)
        self.layer2_extra_bn2 = nn.BatchNorm2d(32)
        self.gelub21 = ExLU(6, trial)
        self.gelub22 = ExLU(7, trial)
        self.gelub23 = ExLU(8, trial)
        self.gelub24 = ExLU(9, trial)
        self.layer2_downsample = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=1, stride=2, bias=False),
            nn.BatchNorm2d(32)
        )

        # Layer 3
        self.layer3_conv1 = conv3x3(32, 64, stride=2)
        self.layer3_bn1 = nn.BatchNorm2d(64)
        self.layer3_conv2 = conv3x3(64, 64)
        self.layer3_bn2 = nn.BatchNorm2d(64)
        self.layer3_extra_conv1 = conv3x3(32, 64, stride=2)  # 调整residual的通道数
        self.layer3_extra_bn1 = nn.BatchNorm2d(64)
        self.layer3_extra_conv2 = conv3x3(64, 64)
        self.layer3_extra_bn2 = nn.BatchNorm2d(64)
        self.gelub31 = ExLU(10, trial)
        self.gelub32 = ExLU(11, trial)
        self.gelub33 = ExLU(12, trial)
        self.gelub34 = ExLU(13, trial)

        self.layer3_downsample = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=1, stride=2, bias=False),
            nn.BatchNorm2d(64)
        )

        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        # Layer 0
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.custom1(out)

        # Layer 1
        residual = out
        out = self.layer1_conv1(out)
        out = self.layer1_bn1(out)
        out = self.gelub11(out)
        out = self.layer1_conv2(out)
        out = self.layer1_bn2(out)
        out += residual
        out = self.gelub12(out)

        residual = out
        out = self.layer1_extra_conv1(out)
        out = self.layer1_extra_bn1(out)
        out = self.gelub13(out)
        out = self.layer1_extra_conv2(out)
        out = self.layer1_extra_bn2(out)
        out += residual
        out = self.gelub14(out)

        # Layer 2
        residual = out
        out = self.layer2_conv1(out)
        out = self.layer2_bn1(out)
        out = self.gelub21(out)
        out = self.layer2_conv2(out)
        out = self.layer2_bn2(out)
        out = self.gelub22(out)
        residual = self.layer2_downsample(residual)
        out += residual
        out = self.gelub22(out)

        residual = out
        out = self.layer2_extra_conv2(out)
        out = self.layer2_extra_bn2(out)
        out = self.gelub23(out)
        out = self.layer2_extra_conv2(out)
        out = self.layer2_extra_bn2(out)
        out += residual
        out = self.gelub24(out)

        # Layer 3
        residual = out
        out = self.layer3_conv1(out)
        out = self.layer3_bn1(out)
        out = self.gelub31(out)
        out = self.layer3_conv2(out)
        out = self.layer3_bn2(out)
        residual = self.layer3_downsample(residual)
        out += residual
        out = self.gelub32(out)

        residual = out
        out = self.layer3_extra_conv2(out)
        out = self.layer3_extra_bn2(out)
        out = self.gelub33(out)
        out = self.layer3_extra_conv2(out)
        out = self.layer3_extra_bn2(out)
        out += residual
        out = self.gelub34(out)

        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


learning_rate = 0.001
num_epochs = 100


# 定义目标函数
def objective(trial):
    # 模型实例化
    model = ResNet(trial)
    model.to(device)
    curr_lr = learning_rate
    # 损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=curr_lr)  # 保持 Adam 优化器

    #history = {'train_loss': [], 'valid_loss': [], 'train_acc': [], 'valid_acc': []}

    for epoch in range(num_epochs):
        optimizer = torch.optim.Adam(model.parameters(), lr=curr_lr)  # 保持 Adam 优化器
        train_loss, train_correct = 0.0, 0
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)
            scores, predictions = torch.max(outputs.data, 1)
            train_correct += (predictions == labels).sum().item()

        # Decay learning rate
        if (epoch + 1) % 20 == 0:
            curr_lr /= 3
            update_lr(optimizer, curr_lr)

        valid_loss, valid_correct = 0.0, 0
        model.eval()
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            valid_loss += loss.item() * images.size(0)
            scores, predictions = torch.max(outputs.data, 1)
            valid_correct += (predictions == labels).sum().item()
        ##不好的trial剪枝    
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / len(train_loader.sampler) * 100
        valid_loss = valid_loss / len(test_loader.sampler)
        valid_acc = valid_correct / len(test_loader.sampler) * 100

        print("Epoch:{}/{} \t Train Loss:{:.4f} Valid Loss:{:.4f} \t Train Acc:{:.2f} %  Valid Acc:{:.2f} %".format(
            epoch + 1, num_epochs,
            train_loss,
            valid_loss,
            train_acc,
            valid_acc))
        # history['train_loss'].append(train_loss)
        # history['valid_loss'].append(valid_loss)
        # history['train_acc'].append(train_acc)
        # history['valid_acc'].append(valid_acc)

    return valid_acc


study = optuna.create_study(direction='maximize')

# 运行优化过程
study.optimize(objective, n_trials=20)

# 打印最佳参数和目标值
print('Best trial:')
print('  Value: ', study.best_trial.value)
print('  Params: ')
for key, value in study.best_trial.params.items():
    print('    {}: {}'.format(key, value))

Files already downloaded and verified


[I 2024-09-28 12:44:40,995] A new study created in memory with name: no-name-ae612396-5282-4c48-b5a4-9e8ab03ff34c


Epoch:1/100 	 Train Loss:1.6233 Valid Loss:1.5446 	 Train Acc:39.21 %  Valid Acc:44.24 %
Epoch:2/100 	 Train Loss:1.3166 Valid Loss:1.1979 	 Train Acc:52.14 %  Valid Acc:56.19 %
Epoch:3/100 	 Train Loss:1.0967 Valid Loss:1.0336 	 Train Acc:60.48 %  Valid Acc:63.10 %
Epoch:4/100 	 Train Loss:0.9702 Valid Loss:0.9455 	 Train Acc:65.30 %  Valid Acc:67.17 %
Epoch:5/100 	 Train Loss:0.8643 Valid Loss:0.8286 	 Train Acc:69.36 %  Valid Acc:70.67 %
Epoch:6/100 	 Train Loss:0.7836 Valid Loss:0.8301 	 Train Acc:72.43 %  Valid Acc:72.14 %
Epoch:7/100 	 Train Loss:0.7234 Valid Loss:0.7238 	 Train Acc:74.60 %  Valid Acc:75.41 %
Epoch:8/100 	 Train Loss:0.6686 Valid Loss:0.6882 	 Train Acc:76.65 %  Valid Acc:77.03 %
Epoch:9/100 	 Train Loss:0.6258 Valid Loss:0.6380 	 Train Acc:78.04 %  Valid Acc:78.82 %
Epoch:10/100 	 Train Loss:0.5880 Valid Loss:0.6220 	 Train Acc:79.57 %  Valid Acc:78.87 %
Epoch:11/100 	 Train Loss:0.5654 Valid Loss:0.5881 	 Train Acc:80.25 %  Valid Acc:80.33 %
Epoch:12/100 	 Trai

[I 2024-09-28 14:00:18,962] Trial 0 finished with value: 87.46000000000001 and parameters: {'sigma_1': 4.117417817946505, 'sigma_2': 5.449013271348036, 'sigma_3': 2.779244364432301, 'sigma_4': 4.530349575211693, 'sigma_5': 5.967907012091073, 'sigma_6': 4.889869277518576, 'sigma_7': 5.773191060313531, 'sigma_8': 3.1520967075023574, 'sigma_9': 5.906979297861794, 'sigma_10': 2.588424490048343, 'sigma_11': 5.008784183170302, 'sigma_12': 4.927360753952874, 'sigma_13': 4.507570565685883}. Best is trial 0 with value: 87.46000000000001.


Epoch:100/100 	 Train Loss:0.1398 Valid Loss:0.4682 	 Train Acc:95.07 %  Valid Acc:87.46 %
Epoch:1/100 	 Train Loss:1.5555 Valid Loss:1.6297 	 Train Acc:42.54 %  Valid Acc:43.64 %
Epoch:2/100 	 Train Loss:1.2029 Valid Loss:1.1941 	 Train Acc:56.68 %  Valid Acc:58.49 %
Epoch:3/100 	 Train Loss:1.0201 Valid Loss:0.9900 	 Train Acc:63.65 %  Valid Acc:65.04 %
Epoch:4/100 	 Train Loss:0.9026 Valid Loss:0.8388 	 Train Acc:67.97 %  Valid Acc:70.24 %
Epoch:5/100 	 Train Loss:0.8066 Valid Loss:0.7790 	 Train Acc:71.49 %  Valid Acc:72.79 %
Epoch:6/100 	 Train Loss:0.7353 Valid Loss:0.7249 	 Train Acc:74.26 %  Valid Acc:74.81 %
Epoch:7/100 	 Train Loss:0.6851 Valid Loss:0.6999 	 Train Acc:75.83 %  Valid Acc:75.68 %
Epoch:8/100 	 Train Loss:0.6469 Valid Loss:0.6991 	 Train Acc:77.45 %  Valid Acc:76.06 %
Epoch:9/100 	 Train Loss:0.6116 Valid Loss:0.6528 	 Train Acc:78.58 %  Valid Acc:77.67 %
Epoch:10/100 	 Train Loss:0.5859 Valid Loss:0.6265 	 Train Acc:79.60 %  Valid Acc:78.64 %
Epoch:11/100 	 Tra

[I 2024-09-28 15:15:54,785] Trial 1 finished with value: 86.88 and parameters: {'sigma_1': 3.956572633440479, 'sigma_2': 5.818852403655713, 'sigma_3': 5.04592344670842, 'sigma_4': 4.049405329095023, 'sigma_5': 5.236927212380223, 'sigma_6': 3.01469295361493, 'sigma_7': 2.6576120661687086, 'sigma_8': 2.6925896709777346, 'sigma_9': 4.942355931055474, 'sigma_10': 4.882624463432087, 'sigma_11': 3.525408800152578, 'sigma_12': 2.6858769675508247, 'sigma_13': 2.2575777656768525}. Best is trial 0 with value: 87.46000000000001.


Epoch:100/100 	 Train Loss:0.1640 Valid Loss:0.4924 	 Train Acc:94.26 %  Valid Acc:86.88 %
Epoch:1/100 	 Train Loss:1.5626 Valid Loss:1.3686 	 Train Acc:41.90 %  Valid Acc:50.31 %
Epoch:2/100 	 Train Loss:1.2242 Valid Loss:1.0577 	 Train Acc:55.99 %  Valid Acc:61.52 %
Epoch:3/100 	 Train Loss:1.0354 Valid Loss:1.0527 	 Train Acc:63.01 %  Valid Acc:63.01 %
Epoch:4/100 	 Train Loss:0.9129 Valid Loss:0.8740 	 Train Acc:67.46 %  Valid Acc:69.33 %
Epoch:5/100 	 Train Loss:0.8255 Valid Loss:0.7780 	 Train Acc:70.90 %  Valid Acc:73.27 %
Epoch:6/100 	 Train Loss:0.7427 Valid Loss:0.7389 	 Train Acc:73.99 %  Valid Acc:73.75 %
Epoch:7/100 	 Train Loss:0.6924 Valid Loss:0.6743 	 Train Acc:76.01 %  Valid Acc:76.57 %
Epoch:8/100 	 Train Loss:0.6500 Valid Loss:0.7170 	 Train Acc:77.44 %  Valid Acc:75.00 %
Epoch:9/100 	 Train Loss:0.6116 Valid Loss:0.6169 	 Train Acc:78.64 %  Valid Acc:79.05 %
Epoch:10/100 	 Train Loss:0.5874 Valid Loss:0.5925 	 Train Acc:79.49 %  Valid Acc:80.46 %
Epoch:11/100 	 Tra

[I 2024-09-28 16:32:18,921] Trial 2 finished with value: 87.03999999999999 and parameters: {'sigma_1': 2.065123292420359, 'sigma_2': 5.436066467821196, 'sigma_3': 5.832981273924858, 'sigma_4': 5.0891351563618175, 'sigma_5': 4.431298899980196, 'sigma_6': 3.9281539651115422, 'sigma_7': 3.718654151492435, 'sigma_8': 2.3594585391743967, 'sigma_9': 4.897794578624979, 'sigma_10': 2.7051246301991325, 'sigma_11': 4.321320712669025, 'sigma_12': 4.2951581233732945, 'sigma_13': 2.1831304681855697}. Best is trial 0 with value: 87.46000000000001.


Epoch:100/100 	 Train Loss:0.1555 Valid Loss:0.4740 	 Train Acc:94.42 %  Valid Acc:87.04 %
Epoch:1/100 	 Train Loss:1.5577 Valid Loss:1.3343 	 Train Acc:42.25 %  Valid Acc:49.94 %
Epoch:2/100 	 Train Loss:1.1960 Valid Loss:1.1084 	 Train Acc:56.95 %  Valid Acc:60.42 %
Epoch:3/100 	 Train Loss:1.0290 Valid Loss:0.9598 	 Train Acc:63.23 %  Valid Acc:65.27 %
Epoch:4/100 	 Train Loss:0.9165 Valid Loss:0.8907 	 Train Acc:67.65 %  Valid Acc:68.53 %
Epoch:5/100 	 Train Loss:0.8237 Valid Loss:0.7887 	 Train Acc:70.81 %  Valid Acc:72.54 %
Epoch:6/100 	 Train Loss:0.7474 Valid Loss:0.8151 	 Train Acc:73.71 %  Valid Acc:72.18 %
Epoch:7/100 	 Train Loss:0.6844 Valid Loss:0.7196 	 Train Acc:76.05 %  Valid Acc:75.21 %
Epoch:8/100 	 Train Loss:0.6337 Valid Loss:0.6521 	 Train Acc:77.78 %  Valid Acc:77.43 %
Epoch:9/100 	 Train Loss:0.5930 Valid Loss:0.6025 	 Train Acc:79.43 %  Valid Acc:79.27 %
Epoch:10/100 	 Train Loss:0.5629 Valid Loss:0.5912 	 Train Acc:80.45 %  Valid Acc:80.53 %
Epoch:11/100 	 Tra

[I 2024-09-28 17:48:43,681] Trial 3 finished with value: 87.35000000000001 and parameters: {'sigma_1': 3.4109643054180365, 'sigma_2': 4.239093399661501, 'sigma_3': 5.784842941156161, 'sigma_4': 5.902469413494353, 'sigma_5': 2.0138665258839143, 'sigma_6': 3.7031548542330937, 'sigma_7': 5.2317653521705, 'sigma_8': 5.678645452804165, 'sigma_9': 5.613058672041527, 'sigma_10': 4.493496657846066, 'sigma_11': 3.0249119890635985, 'sigma_12': 3.1902679353125065, 'sigma_13': 2.0458018826374316}. Best is trial 0 with value: 87.46000000000001.


Epoch:100/100 	 Train Loss:0.1364 Valid Loss:0.4903 	 Train Acc:95.18 %  Valid Acc:87.35 %
Epoch:1/100 	 Train Loss:1.5831 Valid Loss:1.3492 	 Train Acc:41.19 %  Valid Acc:50.52 %
Epoch:2/100 	 Train Loss:1.2233 Valid Loss:1.0718 	 Train Acc:55.99 %  Valid Acc:61.33 %
Epoch:3/100 	 Train Loss:1.0385 Valid Loss:1.0137 	 Train Acc:62.87 %  Valid Acc:64.91 %
Epoch:4/100 	 Train Loss:0.9287 Valid Loss:0.8931 	 Train Acc:67.17 %  Valid Acc:68.81 %
Epoch:5/100 	 Train Loss:0.8367 Valid Loss:0.8360 	 Train Acc:70.49 %  Valid Acc:71.33 %
Epoch:6/100 	 Train Loss:0.7648 Valid Loss:0.7465 	 Train Acc:73.01 %  Valid Acc:74.20 %
Epoch:7/100 	 Train Loss:0.7069 Valid Loss:0.6991 	 Train Acc:75.21 %  Valid Acc:75.78 %
Epoch:8/100 	 Train Loss:0.6602 Valid Loss:0.6965 	 Train Acc:76.87 %  Valid Acc:76.60 %
Epoch:9/100 	 Train Loss:0.6218 Valid Loss:0.6117 	 Train Acc:78.30 %  Valid Acc:79.01 %
Epoch:10/100 	 Train Loss:0.5935 Valid Loss:0.6118 	 Train Acc:79.23 %  Valid Acc:79.31 %
Epoch:11/100 	 Tra

[I 2024-09-28 19:04:39,602] Trial 4 finished with value: 87.03999999999999 and parameters: {'sigma_1': 5.617047761870667, 'sigma_2': 4.72737567067329, 'sigma_3': 5.434397658303169, 'sigma_4': 4.897073209438389, 'sigma_5': 4.616163693671034, 'sigma_6': 4.517794610438461, 'sigma_7': 5.024062165296797, 'sigma_8': 5.269806195885671, 'sigma_9': 2.1902030364153915, 'sigma_10': 3.047303028114354, 'sigma_11': 3.8207894034426646, 'sigma_12': 2.9856909545043755, 'sigma_13': 4.576797231258607}. Best is trial 0 with value: 87.46000000000001.


Epoch:100/100 	 Train Loss:0.1525 Valid Loss:0.4894 	 Train Acc:94.58 %  Valid Acc:87.04 %
Epoch:1/100 	 Train Loss:1.5265 Valid Loss:1.3479 	 Train Acc:43.42 %  Valid Acc:52.49 %
Epoch:2/100 	 Train Loss:1.1566 Valid Loss:1.0261 	 Train Acc:58.17 %  Valid Acc:63.45 %
Epoch:3/100 	 Train Loss:0.9904 Valid Loss:0.9522 	 Train Acc:64.62 %  Valid Acc:66.10 %
Epoch:4/100 	 Train Loss:0.8799 Valid Loss:0.8711 	 Train Acc:68.85 %  Valid Acc:69.69 %
Epoch:5/100 	 Train Loss:0.7811 Valid Loss:0.7214 	 Train Acc:72.46 %  Valid Acc:75.25 %
Epoch:6/100 	 Train Loss:0.7099 Valid Loss:0.6939 	 Train Acc:75.19 %  Valid Acc:76.40 %
Epoch:7/100 	 Train Loss:0.6521 Valid Loss:0.6704 	 Train Acc:77.31 %  Valid Acc:77.24 %
Epoch:8/100 	 Train Loss:0.6092 Valid Loss:0.6083 	 Train Acc:78.78 %  Valid Acc:79.24 %
Epoch:9/100 	 Train Loss:0.5733 Valid Loss:0.5978 	 Train Acc:80.09 %  Valid Acc:79.95 %
Epoch:10/100 	 Train Loss:0.5422 Valid Loss:0.5606 	 Train Acc:81.06 %  Valid Acc:81.05 %
Epoch:11/100 	 Tra

[I 2024-09-28 20:20:17,971] Trial 5 finished with value: 87.79 and parameters: {'sigma_1': 3.9858475313312236, 'sigma_2': 5.2531078598099485, 'sigma_3': 5.089494814479712, 'sigma_4': 2.8074438391635086, 'sigma_5': 2.2022404256941384, 'sigma_6': 2.8515413976412893, 'sigma_7': 4.4484960003692535, 'sigma_8': 5.070710170952079, 'sigma_9': 2.894160101529802, 'sigma_10': 4.369134671365424, 'sigma_11': 2.698460011357618, 'sigma_12': 2.9889064668682894, 'sigma_13': 2.7814107653406643}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1269 Valid Loss:0.4734 	 Train Acc:95.53 %  Valid Acc:87.79 %
Epoch:1/100 	 Train Loss:1.5597 Valid Loss:1.3497 	 Train Acc:41.89 %  Valid Acc:51.46 %
Epoch:2/100 	 Train Loss:1.2025 Valid Loss:1.1050 	 Train Acc:56.62 %  Valid Acc:60.76 %
Epoch:3/100 	 Train Loss:1.0198 Valid Loss:0.9920 	 Train Acc:63.46 %  Valid Acc:65.64 %
Epoch:4/100 	 Train Loss:0.8963 Valid Loss:0.8531 	 Train Acc:68.32 %  Valid Acc:70.25 %
Epoch:5/100 	 Train Loss:0.8095 Valid Loss:0.7482 	 Train Acc:71.50 %  Valid Acc:74.15 %
Epoch:6/100 	 Train Loss:0.7370 Valid Loss:0.7177 	 Train Acc:74.01 %  Valid Acc:75.26 %
Epoch:7/100 	 Train Loss:0.6840 Valid Loss:0.7411 	 Train Acc:76.02 %  Valid Acc:74.67 %
Epoch:8/100 	 Train Loss:0.6383 Valid Loss:0.6735 	 Train Acc:77.91 %  Valid Acc:77.33 %
Epoch:9/100 	 Train Loss:0.5999 Valid Loss:0.6248 	 Train Acc:78.98 %  Valid Acc:79.14 %
Epoch:10/100 	 Train Loss:0.5675 Valid Loss:0.5985 	 Train Acc:80.22 %  Valid Acc:79.84 %
Epoch:11/100 	 Tra

[I 2024-09-28 21:34:55,343] Trial 6 finished with value: 86.78 and parameters: {'sigma_1': 4.533565865852614, 'sigma_2': 4.260157925484506, 'sigma_3': 3.7927238512511727, 'sigma_4': 2.685319377929929, 'sigma_5': 3.824760242197154, 'sigma_6': 2.0214298447107137, 'sigma_7': 4.847335570645397, 'sigma_8': 2.409746244493736, 'sigma_9': 2.023339715218943, 'sigma_10': 5.355701916033709, 'sigma_11': 3.940179781721062, 'sigma_12': 3.203030710540088, 'sigma_13': 2.192463145434842}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1425 Valid Loss:0.4778 	 Train Acc:95.04 %  Valid Acc:86.78 %
Epoch:1/100 	 Train Loss:1.5673 Valid Loss:1.3883 	 Train Acc:41.62 %  Valid Acc:49.06 %
Epoch:2/100 	 Train Loss:1.2101 Valid Loss:1.0842 	 Train Acc:56.59 %  Valid Acc:60.71 %
Epoch:3/100 	 Train Loss:1.0351 Valid Loss:0.9686 	 Train Acc:63.01 %  Valid Acc:65.69 %
Epoch:4/100 	 Train Loss:0.9017 Valid Loss:0.8423 	 Train Acc:68.03 %  Valid Acc:70.68 %
Epoch:5/100 	 Train Loss:0.8076 Valid Loss:0.8296 	 Train Acc:71.33 %  Valid Acc:71.28 %
Epoch:6/100 	 Train Loss:0.7271 Valid Loss:0.7167 	 Train Acc:74.67 %  Valid Acc:75.86 %
Epoch:7/100 	 Train Loss:0.6745 Valid Loss:0.6369 	 Train Acc:76.26 %  Valid Acc:78.23 %
Epoch:8/100 	 Train Loss:0.6223 Valid Loss:0.6043 	 Train Acc:78.33 %  Valid Acc:79.68 %
Epoch:9/100 	 Train Loss:0.5887 Valid Loss:0.5892 	 Train Acc:79.37 %  Valid Acc:80.11 %
Epoch:10/100 	 Train Loss:0.5573 Valid Loss:0.5855 	 Train Acc:80.65 %  Valid Acc:80.10 %
Epoch:11/100 	 Tra

[I 2024-09-28 22:49:44,689] Trial 7 finished with value: 87.21 and parameters: {'sigma_1': 3.430958625738919, 'sigma_2': 4.669465513669883, 'sigma_3': 5.4448875836772945, 'sigma_4': 4.224564956146228, 'sigma_5': 3.011210235562084, 'sigma_6': 5.6874405361321045, 'sigma_7': 4.576727208632528, 'sigma_8': 2.0886136499142425, 'sigma_9': 4.734405576648485, 'sigma_10': 3.087693285432531, 'sigma_11': 4.616892643960015, 'sigma_12': 4.386703439578602, 'sigma_13': 5.821515814706727}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1339 Valid Loss:0.4616 	 Train Acc:95.27 %  Valid Acc:87.21 %
Epoch:1/100 	 Train Loss:1.5595 Valid Loss:2.0615 	 Train Acc:42.05 %  Valid Acc:35.77 %
Epoch:2/100 	 Train Loss:1.2584 Valid Loss:1.0891 	 Train Acc:54.54 %  Valid Acc:61.66 %
Epoch:3/100 	 Train Loss:1.0584 Valid Loss:1.0256 	 Train Acc:62.22 %  Valid Acc:64.53 %
Epoch:4/100 	 Train Loss:0.9513 Valid Loss:0.9755 	 Train Acc:66.18 %  Valid Acc:66.37 %
Epoch:5/100 	 Train Loss:0.8580 Valid Loss:0.8304 	 Train Acc:69.44 %  Valid Acc:70.90 %
Epoch:6/100 	 Train Loss:0.7941 Valid Loss:0.7680 	 Train Acc:71.94 %  Valid Acc:73.39 %
Epoch:7/100 	 Train Loss:0.7331 Valid Loss:0.6865 	 Train Acc:74.22 %  Valid Acc:75.99 %
Epoch:8/100 	 Train Loss:0.6868 Valid Loss:0.6835 	 Train Acc:76.01 %  Valid Acc:77.05 %
Epoch:9/100 	 Train Loss:0.6506 Valid Loss:0.7020 	 Train Acc:77.27 %  Valid Acc:76.71 %
Epoch:10/100 	 Train Loss:0.6125 Valid Loss:0.6323 	 Train Acc:78.76 %  Valid Acc:78.59 %
Epoch:11/100 	 Tra

[I 2024-09-29 00:04:22,127] Trial 8 finished with value: 87.07000000000001 and parameters: {'sigma_1': 2.5918911468358146, 'sigma_2': 2.2898693101690717, 'sigma_3': 5.304501718205486, 'sigma_4': 2.6543661868473745, 'sigma_5': 5.76822010360886, 'sigma_6': 2.09836023662329, 'sigma_7': 2.258791627183686, 'sigma_8': 5.137855894426098, 'sigma_9': 3.1006490551714774, 'sigma_10': 5.233332515051997, 'sigma_11': 3.769088407270208, 'sigma_12': 4.903454654179182, 'sigma_13': 3.472430745161756}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1569 Valid Loss:0.4732 	 Train Acc:94.38 %  Valid Acc:87.07 %
Epoch:1/100 	 Train Loss:1.5450 Valid Loss:1.3944 	 Train Acc:42.99 %  Valid Acc:47.84 %
Epoch:2/100 	 Train Loss:1.1657 Valid Loss:1.0830 	 Train Acc:57.99 %  Valid Acc:60.74 %
Epoch:3/100 	 Train Loss:1.0126 Valid Loss:0.9654 	 Train Acc:63.73 %  Valid Acc:65.54 %
Epoch:4/100 	 Train Loss:0.9067 Valid Loss:0.8611 	 Train Acc:67.88 %  Valid Acc:70.19 %
Epoch:5/100 	 Train Loss:0.8345 Valid Loss:0.7663 	 Train Acc:70.25 %  Valid Acc:72.98 %
Epoch:6/100 	 Train Loss:0.7611 Valid Loss:0.7483 	 Train Acc:73.00 %  Valid Acc:74.23 %
Epoch:7/100 	 Train Loss:0.6958 Valid Loss:0.6827 	 Train Acc:75.69 %  Valid Acc:76.73 %
Epoch:8/100 	 Train Loss:0.6444 Valid Loss:0.6596 	 Train Acc:77.57 %  Valid Acc:77.80 %
Epoch:9/100 	 Train Loss:0.6042 Valid Loss:0.6152 	 Train Acc:78.98 %  Valid Acc:79.31 %
Epoch:10/100 	 Train Loss:0.5742 Valid Loss:0.6303 	 Train Acc:80.00 %  Valid Acc:78.69 %
Epoch:11/100 	 Tra

[I 2024-09-29 01:18:53,994] Trial 9 finished with value: 87.16000000000001 and parameters: {'sigma_1': 3.6876631016193646, 'sigma_2': 3.752252473606999, 'sigma_3': 3.6728404198136153, 'sigma_4': 2.8422288464218513, 'sigma_5': 3.0916485151787794, 'sigma_6': 4.091823619749095, 'sigma_7': 3.9021503289126342, 'sigma_8': 3.5450784803973345, 'sigma_9': 2.063823633221827, 'sigma_10': 2.025748017078601, 'sigma_11': 5.977523370228875, 'sigma_12': 4.817414065331608, 'sigma_13': 3.2355148245672023}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1302 Valid Loss:0.4888 	 Train Acc:95.36 %  Valid Acc:87.16 %
Epoch:1/100 	 Train Loss:1.4753 Valid Loss:1.2974 	 Train Acc:45.54 %  Valid Acc:52.34 %
Epoch:2/100 	 Train Loss:1.1474 Valid Loss:1.0766 	 Train Acc:58.66 %  Valid Acc:61.12 %
Epoch:3/100 	 Train Loss:1.0012 Valid Loss:0.9811 	 Train Acc:64.09 %  Valid Acc:65.29 %
Epoch:4/100 	 Train Loss:0.9072 Valid Loss:0.8459 	 Train Acc:67.77 %  Valid Acc:69.67 %
Epoch:5/100 	 Train Loss:0.8168 Valid Loss:0.7867 	 Train Acc:70.99 %  Valid Acc:72.32 %
Epoch:6/100 	 Train Loss:0.7482 Valid Loss:0.7505 	 Train Acc:73.79 %  Valid Acc:74.12 %
Epoch:7/100 	 Train Loss:0.6868 Valid Loss:0.6550 	 Train Acc:76.14 %  Valid Acc:77.22 %
Epoch:8/100 	 Train Loss:0.6349 Valid Loss:0.6585 	 Train Acc:77.86 %  Valid Acc:77.50 %
Epoch:9/100 	 Train Loss:0.5959 Valid Loss:0.6049 	 Train Acc:79.24 %  Valid Acc:79.70 %
Epoch:10/100 	 Train Loss:0.5578 Valid Loss:0.5986 	 Train Acc:80.65 %  Valid Acc:79.54 %
Epoch:11/100 	 Tra

[I 2024-09-29 02:33:29,321] Trial 10 finished with value: 87.42999999999999 and parameters: {'sigma_1': 5.133121148519871, 'sigma_2': 3.029484272617492, 'sigma_3': 4.474814314435976, 'sigma_4': 3.3024312950946513, 'sigma_5': 2.038143935658676, 'sigma_6': 2.9518982639014966, 'sigma_7': 3.1506304064097796, 'sigma_8': 4.375640378885204, 'sigma_9': 3.548149071918487, 'sigma_10': 4.02183154411782, 'sigma_11': 2.1479551595520614, 'sigma_12': 2.0577174791124797, 'sigma_13': 3.1441949480709765}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1348 Valid Loss:0.4917 	 Train Acc:95.10 %  Valid Acc:87.43 %
Epoch:1/100 	 Train Loss:1.6066 Valid Loss:1.6171 	 Train Acc:40.07 %  Valid Acc:42.47 %
Epoch:2/100 	 Train Loss:1.2500 Valid Loss:1.1172 	 Train Acc:54.76 %  Valid Acc:60.34 %
Epoch:3/100 	 Train Loss:1.0714 Valid Loss:1.0655 	 Train Acc:61.44 %  Valid Acc:62.41 %
Epoch:4/100 	 Train Loss:0.9478 Valid Loss:0.8940 	 Train Acc:66.44 %  Valid Acc:67.88 %
Epoch:5/100 	 Train Loss:0.8549 Valid Loss:0.8138 	 Train Acc:69.76 %  Valid Acc:71.25 %
Epoch:6/100 	 Train Loss:0.7824 Valid Loss:0.7280 	 Train Acc:72.51 %  Valid Acc:74.69 %
Epoch:7/100 	 Train Loss:0.7150 Valid Loss:0.7056 	 Train Acc:74.83 %  Valid Acc:75.43 %
Epoch:8/100 	 Train Loss:0.6591 Valid Loss:0.6596 	 Train Acc:77.08 %  Valid Acc:77.63 %
Epoch:9/100 	 Train Loss:0.6182 Valid Loss:0.6261 	 Train Acc:78.39 %  Valid Acc:78.79 %
Epoch:10/100 	 Train Loss:0.5830 Valid Loss:0.6055 	 Train Acc:79.75 %  Valid Acc:79.70 %
Epoch:11/100 	 Tra

[I 2024-09-29 03:48:02,039] Trial 11 finished with value: 87.16000000000001 and parameters: {'sigma_1': 4.621838623243027, 'sigma_2': 5.245029509868638, 'sigma_3': 2.419443396459878, 'sigma_4': 2.0073841862379744, 'sigma_5': 5.991924343357426, 'sigma_6': 5.277838121960413, 'sigma_7': 5.871065181129118, 'sigma_8': 3.4161260806788416, 'sigma_9': 3.1472958779890012, 'sigma_10': 5.9490619003931915, 'sigma_11': 5.246172845266237, 'sigma_12': 5.766480877304007, 'sigma_13': 4.5344607575242115}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1341 Valid Loss:0.4771 	 Train Acc:95.29 %  Valid Acc:87.16 %
Epoch:1/100 	 Train Loss:1.5829 Valid Loss:1.3066 	 Train Acc:41.29 %  Valid Acc:50.78 %
Epoch:2/100 	 Train Loss:1.2051 Valid Loss:1.0698 	 Train Acc:56.58 %  Valid Acc:61.85 %
Epoch:3/100 	 Train Loss:1.0351 Valid Loss:0.9646 	 Train Acc:62.99 %  Valid Acc:65.59 %
Epoch:4/100 	 Train Loss:0.9133 Valid Loss:0.8675 	 Train Acc:67.56 %  Valid Acc:69.37 %
Epoch:5/100 	 Train Loss:0.8094 Valid Loss:0.7972 	 Train Acc:71.31 %  Valid Acc:71.86 %
Epoch:6/100 	 Train Loss:0.7351 Valid Loss:0.7579 	 Train Acc:74.17 %  Valid Acc:74.09 %
Epoch:7/100 	 Train Loss:0.6810 Valid Loss:0.6702 	 Train Acc:76.27 %  Valid Acc:77.37 %
Epoch:8/100 	 Train Loss:0.6348 Valid Loss:0.6944 	 Train Acc:77.98 %  Valid Acc:76.76 %
Epoch:9/100 	 Train Loss:0.5971 Valid Loss:0.6087 	 Train Acc:79.08 %  Valid Acc:78.88 %
Epoch:10/100 	 Train Loss:0.5697 Valid Loss:0.6103 	 Train Acc:80.22 %  Valid Acc:79.49 %
Epoch:11/100 	 Tra

[I 2024-09-29 05:02:50,596] Trial 12 finished with value: 87.5 and parameters: {'sigma_1': 4.498007734650074, 'sigma_2': 5.985915284933026, 'sigma_3': 2.7801338496660883, 'sigma_4': 3.5456801786901844, 'sigma_5': 3.2901837127522406, 'sigma_6': 4.924366464684921, 'sigma_7': 5.546398178407402, 'sigma_8': 4.496365369357626, 'sigma_9': 5.833241556968021, 'sigma_10': 3.9135623651850318, 'sigma_11': 2.0258743379828825, 'sigma_12': 5.848030158402097, 'sigma_13': 5.416644327165752}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1363 Valid Loss:0.4861 	 Train Acc:95.20 %  Valid Acc:87.50 %
Epoch:1/100 	 Train Loss:1.5731 Valid Loss:1.3751 	 Train Acc:41.54 %  Valid Acc:50.24 %
Epoch:2/100 	 Train Loss:1.2269 Valid Loss:1.0775 	 Train Acc:55.47 %  Valid Acc:60.43 %
Epoch:3/100 	 Train Loss:1.0481 Valid Loss:1.0025 	 Train Acc:62.46 %  Valid Acc:64.30 %
Epoch:4/100 	 Train Loss:0.9179 Valid Loss:0.8349 	 Train Acc:67.38 %  Valid Acc:70.48 %
Epoch:5/100 	 Train Loss:0.8192 Valid Loss:0.8298 	 Train Acc:70.94 %  Valid Acc:71.68 %
Epoch:6/100 	 Train Loss:0.7425 Valid Loss:0.7228 	 Train Acc:73.94 %  Valid Acc:74.89 %
Epoch:7/100 	 Train Loss:0.6859 Valid Loss:0.6550 	 Train Acc:76.04 %  Valid Acc:77.77 %
Epoch:8/100 	 Train Loss:0.6385 Valid Loss:0.6682 	 Train Acc:77.60 %  Valid Acc:77.10 %
Epoch:9/100 	 Train Loss:0.6043 Valid Loss:0.6795 	 Train Acc:78.79 %  Valid Acc:77.44 %
Epoch:10/100 	 Train Loss:0.5763 Valid Loss:0.5769 	 Train Acc:80.00 %  Valid Acc:80.33 %
Epoch:11/100 	 Tra

[I 2024-09-29 06:17:08,115] Trial 13 finished with value: 87.19 and parameters: {'sigma_1': 5.904474279130868, 'sigma_2': 5.878502317109078, 'sigma_3': 3.1013893040924625, 'sigma_4': 3.439745643583225, 'sigma_5': 2.80643208299418, 'sigma_6': 5.991155140993546, 'sigma_7': 4.361295202497942, 'sigma_8': 4.417558868292893, 'sigma_9': 4.085467460125837, 'sigma_10': 3.790905433599999, 'sigma_11': 2.1057873221890864, 'sigma_12': 5.9334166217615225, 'sigma_13': 5.813275087133876}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1450 Valid Loss:0.4916 	 Train Acc:94.99 %  Valid Acc:87.19 %
Epoch:1/100 	 Train Loss:1.5365 Valid Loss:1.3565 	 Train Acc:43.27 %  Valid Acc:49.61 %
Epoch:2/100 	 Train Loss:1.1860 Valid Loss:1.1227 	 Train Acc:57.28 %  Valid Acc:60.06 %
Epoch:3/100 	 Train Loss:1.0186 Valid Loss:0.9717 	 Train Acc:63.63 %  Valid Acc:65.54 %
Epoch:4/100 	 Train Loss:0.9188 Valid Loss:0.8558 	 Train Acc:67.50 %  Valid Acc:69.86 %
Epoch:5/100 	 Train Loss:0.8294 Valid Loss:0.7880 	 Train Acc:70.68 %  Valid Acc:72.66 %
Epoch:6/100 	 Train Loss:0.7467 Valid Loss:0.8060 	 Train Acc:73.91 %  Valid Acc:72.05 %
Epoch:7/100 	 Train Loss:0.6925 Valid Loss:0.7045 	 Train Acc:75.84 %  Valid Acc:75.49 %
Epoch:8/100 	 Train Loss:0.6379 Valid Loss:0.6907 	 Train Acc:77.83 %  Valid Acc:76.35 %
Epoch:9/100 	 Train Loss:0.6038 Valid Loss:0.6076 	 Train Acc:79.00 %  Valid Acc:78.64 %
Epoch:10/100 	 Train Loss:0.5710 Valid Loss:0.6115 	 Train Acc:80.22 %  Valid Acc:79.45 %
Epoch:11/100 	 Tra

[I 2024-09-29 07:31:41,244] Trial 14 finished with value: 87.63 and parameters: {'sigma_1': 4.921104992163509, 'sigma_2': 5.03405551255441, 'sigma_3': 2.072806263485472, 'sigma_4': 3.5075781490433866, 'sigma_5': 3.5787034096408212, 'sigma_6': 3.112551565547318, 'sigma_7': 3.4932755465416294, 'sigma_8': 4.725221452950863, 'sigma_9': 2.8366704562993768, 'sigma_10': 3.617248271577596, 'sigma_11': 2.7639592873043615, 'sigma_12': 3.6894421816246274, 'sigma_13': 5.238301111384994}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1365 Valid Loss:0.4702 	 Train Acc:95.20 %  Valid Acc:87.63 %
Epoch:1/100 	 Train Loss:1.5664 Valid Loss:1.7125 	 Train Acc:41.89 %  Valid Acc:40.72 %
Epoch:2/100 	 Train Loss:1.1813 Valid Loss:1.0632 	 Train Acc:57.53 %  Valid Acc:61.80 %
Epoch:3/100 	 Train Loss:1.0162 Valid Loss:0.9900 	 Train Acc:63.51 %  Valid Acc:64.76 %
Epoch:4/100 	 Train Loss:0.8992 Valid Loss:0.8667 	 Train Acc:67.69 %  Valid Acc:69.46 %
Epoch:5/100 	 Train Loss:0.8086 Valid Loss:0.8177 	 Train Acc:71.44 %  Valid Acc:71.66 %
Epoch:6/100 	 Train Loss:0.7269 Valid Loss:0.7091 	 Train Acc:74.57 %  Valid Acc:75.67 %
Epoch:7/100 	 Train Loss:0.6701 Valid Loss:0.6491 	 Train Acc:76.60 %  Valid Acc:77.88 %
Epoch:8/100 	 Train Loss:0.6193 Valid Loss:0.6264 	 Train Acc:78.34 %  Valid Acc:79.08 %
Epoch:9/100 	 Train Loss:0.5816 Valid Loss:0.6039 	 Train Acc:79.82 %  Valid Acc:79.32 %
Epoch:10/100 	 Train Loss:0.5549 Valid Loss:0.6374 	 Train Acc:80.77 %  Valid Acc:78.62 %
Epoch:11/100 	 Tra

[I 2024-09-29 08:45:59,855] Trial 15 finished with value: 87.3 and parameters: {'sigma_1': 5.034616179146819, 'sigma_2': 4.958700622763903, 'sigma_3': 4.501861162901548, 'sigma_4': 2.211501346866684, 'sigma_5': 2.4742184082004077, 'sigma_6': 2.980281495177554, 'sigma_7': 3.4180308904675485, 'sigma_8': 5.92252126594941, 'sigma_9': 2.715728065164089, 'sigma_10': 4.338679487158466, 'sigma_11': 2.8991773940778582, 'sigma_12': 3.6739585433487, 'sigma_13': 3.8443592324274825}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1289 Valid Loss:0.4944 	 Train Acc:95.38 %  Valid Acc:87.30 %
Epoch:1/100 	 Train Loss:1.5496 Valid Loss:1.4319 	 Train Acc:42.61 %  Valid Acc:48.55 %
Epoch:2/100 	 Train Loss:1.1944 Valid Loss:1.0696 	 Train Acc:57.17 %  Valid Acc:62.48 %
Epoch:3/100 	 Train Loss:1.0407 Valid Loss:1.0292 	 Train Acc:62.70 %  Valid Acc:64.19 %
Epoch:4/100 	 Train Loss:0.9340 Valid Loss:0.9272 	 Train Acc:66.60 %  Valid Acc:67.36 %
Epoch:5/100 	 Train Loss:0.8446 Valid Loss:0.7795 	 Train Acc:70.26 %  Valid Acc:72.56 %
Epoch:6/100 	 Train Loss:0.7639 Valid Loss:0.7635 	 Train Acc:73.07 %  Valid Acc:73.98 %
Epoch:7/100 	 Train Loss:0.7070 Valid Loss:0.6806 	 Train Acc:75.22 %  Valid Acc:76.65 %
Epoch:8/100 	 Train Loss:0.6596 Valid Loss:0.6711 	 Train Acc:76.84 %  Valid Acc:77.28 %
Epoch:9/100 	 Train Loss:0.6222 Valid Loss:0.6380 	 Train Acc:78.52 %  Valid Acc:78.41 %
Epoch:10/100 	 Train Loss:0.5930 Valid Loss:0.6038 	 Train Acc:79.34 %  Valid Acc:79.24 %
Epoch:11/100 	 Tra

[I 2024-09-29 10:00:01,534] Trial 16 finished with value: 87.26 and parameters: {'sigma_1': 2.9799906018247397, 'sigma_2': 3.545290540425837, 'sigma_3': 2.1863293629526135, 'sigma_4': 3.0784288892137974, 'sigma_5': 3.6927270674456936, 'sigma_6': 2.58512963216946, 'sigma_7': 2.9555284913736894, 'sigma_8': 5.06410108636856, 'sigma_9': 2.7103789236824474, 'sigma_10': 3.477044560731313, 'sigma_11': 2.853781092885727, 'sigma_12': 3.6594826421419784, 'sigma_13': 5.196863969801162}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1508 Valid Loss:0.4808 	 Train Acc:94.56 %  Valid Acc:87.26 %
Epoch:1/100 	 Train Loss:1.5695 Valid Loss:1.3198 	 Train Acc:41.84 %  Valid Acc:51.54 %
Epoch:2/100 	 Train Loss:1.1980 Valid Loss:1.0881 	 Train Acc:56.78 %  Valid Acc:60.95 %
Epoch:3/100 	 Train Loss:1.0226 Valid Loss:0.9631 	 Train Acc:63.51 %  Valid Acc:66.14 %
Epoch:4/100 	 Train Loss:0.8883 Valid Loss:0.8960 	 Train Acc:68.46 %  Valid Acc:68.40 %
Epoch:5/100 	 Train Loss:0.7925 Valid Loss:0.7383 	 Train Acc:72.26 %  Valid Acc:74.88 %
Epoch:6/100 	 Train Loss:0.7224 Valid Loss:0.6998 	 Train Acc:74.70 %  Valid Acc:76.23 %
Epoch:7/100 	 Train Loss:0.6678 Valid Loss:0.6869 	 Train Acc:76.53 %  Valid Acc:76.52 %
Epoch:8/100 	 Train Loss:0.6218 Valid Loss:0.6238 	 Train Acc:78.45 %  Valid Acc:78.98 %
Epoch:9/100 	 Train Loss:0.5896 Valid Loss:0.6538 	 Train Acc:79.52 %  Valid Acc:78.36 %
Epoch:10/100 	 Train Loss:0.5607 Valid Loss:0.5726 	 Train Acc:80.53 %  Valid Acc:80.70 %
Epoch:11/100 	 Tra

[I 2024-09-29 11:13:30,546] Trial 17 finished with value: 86.66 and parameters: {'sigma_1': 5.080519082866377, 'sigma_2': 4.969503248687305, 'sigma_3': 4.552543511828239, 'sigma_4': 3.7243031340105492, 'sigma_5': 4.4254845270387975, 'sigma_6': 3.436171266524419, 'sigma_7': 4.222484287270668, 'sigma_8': 3.9838303963408657, 'sigma_9': 3.856360595620377, 'sigma_10': 4.6234003119110305, 'sigma_11': 2.648473313755211, 'sigma_12': 2.192528220751478, 'sigma_13': 5.024636976243608}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1489 Valid Loss:0.4799 	 Train Acc:94.73 %  Valid Acc:86.66 %
Epoch:1/100 	 Train Loss:1.5288 Valid Loss:1.4712 	 Train Acc:43.68 %  Valid Acc:47.64 %
Epoch:2/100 	 Train Loss:1.1839 Valid Loss:1.0593 	 Train Acc:57.36 %  Valid Acc:62.05 %
Epoch:3/100 	 Train Loss:1.0268 Valid Loss:0.9675 	 Train Acc:63.12 %  Valid Acc:65.32 %
Epoch:4/100 	 Train Loss:0.9190 Valid Loss:0.8840 	 Train Acc:67.06 %  Valid Acc:69.04 %
Epoch:5/100 	 Train Loss:0.8304 Valid Loss:0.8415 	 Train Acc:70.49 %  Valid Acc:70.75 %
Epoch:6/100 	 Train Loss:0.7635 Valid Loss:0.7229 	 Train Acc:73.17 %  Valid Acc:74.86 %
Epoch:7/100 	 Train Loss:0.7022 Valid Loss:0.6706 	 Train Acc:75.19 %  Valid Acc:77.17 %
Epoch:8/100 	 Train Loss:0.6529 Valid Loss:0.6472 	 Train Acc:76.96 %  Valid Acc:78.01 %
Epoch:9/100 	 Train Loss:0.6229 Valid Loss:0.6707 	 Train Acc:78.12 %  Valid Acc:76.90 %
Epoch:10/100 	 Train Loss:0.5843 Valid Loss:0.6559 	 Train Acc:79.51 %  Valid Acc:77.79 %
Epoch:11/100 	 Tra

[I 2024-09-29 12:28:16,248] Trial 18 finished with value: 87.35000000000001 and parameters: {'sigma_1': 5.470405003288581, 'sigma_2': 4.376988088021423, 'sigma_3': 3.4406343431737767, 'sigma_4': 2.365674717692928, 'sigma_5': 2.5254749387211586, 'sigma_6': 2.591059747502902, 'sigma_7': 3.5647256656621313, 'sigma_8': 4.797334216205891, 'sigma_9': 2.577081901694997, 'sigma_10': 3.5541924011226835, 'sigma_11': 3.364818206427023, 'sigma_12': 2.6109904675759537, 'sigma_13': 2.747362372834456}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1383 Valid Loss:0.4968 	 Train Acc:95.09 %  Valid Acc:87.35 %
Epoch:1/100 	 Train Loss:1.5857 Valid Loss:1.4090 	 Train Acc:40.77 %  Valid Acc:48.31 %
Epoch:2/100 	 Train Loss:1.2133 Valid Loss:1.0512 	 Train Acc:56.01 %  Valid Acc:62.28 %
Epoch:3/100 	 Train Loss:1.0421 Valid Loss:1.0218 	 Train Acc:62.75 %  Valid Acc:63.28 %
Epoch:4/100 	 Train Loss:0.9319 Valid Loss:0.8893 	 Train Acc:67.07 %  Valid Acc:68.27 %
Epoch:5/100 	 Train Loss:0.8432 Valid Loss:0.8057 	 Train Acc:70.14 %  Valid Acc:72.63 %
Epoch:6/100 	 Train Loss:0.7655 Valid Loss:0.7680 	 Train Acc:73.04 %  Valid Acc:73.32 %
Epoch:7/100 	 Train Loss:0.7062 Valid Loss:0.7024 	 Train Acc:75.35 %  Valid Acc:75.91 %
Epoch:8/100 	 Train Loss:0.6582 Valid Loss:0.6620 	 Train Acc:77.14 %  Valid Acc:78.00 %
Epoch:9/100 	 Train Loss:0.6159 Valid Loss:0.6097 	 Train Acc:78.49 %  Valid Acc:79.52 %
Epoch:10/100 	 Train Loss:0.5866 Valid Loss:0.6277 	 Train Acc:79.62 %  Valid Acc:78.91 %
Epoch:11/100 	 Tra

[I 2024-09-29 13:41:15,224] Trial 19 finished with value: 87.46000000000001 and parameters: {'sigma_1': 4.278433278421295, 'sigma_2': 3.371086291845102, 'sigma_3': 4.1453050307834065, 'sigma_4': 3.7808569514257533, 'sigma_5': 3.4731121517127894, 'sigma_6': 3.442913761000586, 'sigma_7': 4.081345804209523, 'sigma_8': 5.533530113840385, 'sigma_9': 3.3353561409624635, 'sigma_10': 4.215631028027772, 'sigma_11': 2.482728392351857, 'sigma_12': 3.659540972160993, 'sigma_13': 4.013921412857482}. Best is trial 5 with value: 87.79.


Epoch:100/100 	 Train Loss:0.1399 Valid Loss:0.4941 	 Train Acc:95.01 %  Valid Acc:87.46 %
Best trial:
  Value:  87.79
  Params: 
    sigma_1: 3.9858475313312236
    sigma_2: 5.2531078598099485
    sigma_3: 5.089494814479712
    sigma_4: 2.8074438391635086
    sigma_5: 2.2022404256941384
    sigma_6: 2.8515413976412893
    sigma_7: 4.4484960003692535
    sigma_8: 5.070710170952079
    sigma_9: 2.894160101529802
    sigma_10: 4.369134671365424
    sigma_11: 2.698460011357618
    sigma_12: 2.9889064668682894
    sigma_13: 2.7814107653406643


In [1]:
import random
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
import pandas as pd
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data as Data

import matplotlib.pyplot as plt
%matplotlib inline
import math
import torchvision
import torchvision.transforms as transforms
import optuna  # 载入 optuna 优化包

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# 图像预处理模块
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()
])

# CIFAR-10 数据集
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                             train=True,
                                             transform=transform,
                                             download=True)

test_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                            train=False,
                                            transform=transforms.ToTensor())

# 数据加载器
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100,
                                          shuffle=False)

# 自定义 Raylu 激活函数类
class Raylu(nn.Module):
    def __init__(self):
        super().__init__()
        self.sigma = nn.Parameter(2 * torch.randn(1))  # 可学习参数 sigma

    def forward(self, input):
        x = torch.where(input >= 0, input, input * torch.exp(-input ** 2 / self.sigma ** 2 / 2))
        return x

# 3x3 卷积层
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3,
                     stride=stride, padding=1, bias=False)

# 残差块
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.Raylu1 = Raylu()
        self.Raylu2 = Raylu()
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.Raylu1(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.Raylu2(out)
        return out

# ResNet 模型
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(3, 16)
        self.bn = nn.BatchNorm2d(16)
        self.Raylu3 = Raylu()
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        out = self.Raylu3(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

# 模型实例化
model = ResNet(ResidualBlock, [2, 2, 2]).to(device)

# 损失函数和优化器
criterion = nn.CrossEntropyLoss()
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# 初始化 Raylu 激活函数中的 sigma 参数
z = model.Raylu3.sigma
nn.init.constant_(z, 6)
z101 = model.layer1[0].Raylu1.sigma
nn.init.constant_(z101, 4)
z102 = model.layer1[0].Raylu2.sigma
nn.init.constant_(z102, 4)
z111 = model.layer1[1].Raylu1.sigma
nn.init.constant_(z111, 3)
z112 = model.layer1[1].Raylu2.sigma
nn.init.constant_(z112, 3)
z201 = model.layer2[0].Raylu1.sigma
nn.init.constant_(z201, 5)
z202 = model.layer2[0].Raylu2.sigma
nn.init.constant_(z202, 5)
z211 = model.layer2[1].Raylu1.sigma
nn.init.constant_(z211, 5)
z212 = model.layer2[1].Raylu2.sigma
nn.init.constant_(z212, 5)
z301 = model.layer3[0].Raylu1.sigma
nn.init.constant_(z301, 2)
z302 = model.layer3[0].Raylu2.sigma
nn.init.constant_(z302, 4)
z311 = model.layer3[1].Raylu1.sigma
nn.init.constant_(z311, 2)
z312 = model.layer3[1].Raylu2.sigma
nn.init.constant_(z312, 3)

# 更新学习率函数
def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

# 训练模型
total_step = len(train_loader)
curr_lr = learning_rate

history = {'train_loss': [], 'valid_loss': [], 'train_acc': [], 'valid_acc': []}
num_epochs = 80

for epoch in range(num_epochs):
    train_loss, train_correct = 0.0, 0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)

        # 前向传播
        outputs = model(images)
        loss = criterion(outputs, labels)

        # 反向传播与优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * images.size(0)
        scores, predictions = torch.max(outputs.data, 1)
        train_correct += (predictions == labels).sum().item()

    # 每 20 轮衰减学习率
    if (epoch + 1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)

    valid_loss, val_correct = 0.0, 0
    model.eval()
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        valid_loss += loss.item() * images.size(0)
        scores, predictions = torch.max(outputs.data, 1)
        val_correct += (predictions == labels).sum().item()

    train_loss /= len(train_loader.sampler)
    train_acc = train_correct / len(train_loader.sampler) * 100
    valid_loss /= len(test_loader.sampler)
    valid_acc = val_correct / len(test_loader.sampler) * 100

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Valid Loss: {valid_loss:.4f}, "
          f"Train Acc: {train_acc:.2f}%, Valid Acc: {valid_acc:.2f}%")

    history['train_loss'].append(train_loss)
    history['valid_loss'].append(valid_loss)
    history['train_acc'].append(train_acc)
    history['valid_acc'].append(valid_acc)


Files already downloaded and verified
Epoch [1/80], Train Loss: 1.6510, Valid Loss: 1.4344, Train Acc: 38.62%, Valid Acc: 47.45%
Epoch [2/80], Train Loss: 1.2280, Valid Loss: 1.0794, Train Acc: 55.61%, Valid Acc: 61.53%
Epoch [3/80], Train Loss: 0.9942, Valid Loss: 0.9287, Train Acc: 64.46%, Valid Acc: 67.08%
Epoch [4/80], Train Loss: 0.8649, Valid Loss: 0.8015, Train Acc: 69.40%, Valid Acc: 71.98%
Epoch [5/80], Train Loss: 0.7708, Valid Loss: 0.7369, Train Acc: 72.96%, Valid Acc: 74.62%
Epoch [6/80], Train Loss: 0.6855, Valid Loss: 0.7360, Train Acc: 76.24%, Valid Acc: 75.53%
Epoch [7/80], Train Loss: 0.6239, Valid Loss: 0.6345, Train Acc: 78.12%, Valid Acc: 78.65%
Epoch [8/80], Train Loss: 0.5756, Valid Loss: 0.6337, Train Acc: 79.75%, Valid Acc: 78.94%
Epoch [9/80], Train Loss: 0.5403, Valid Loss: 0.5981, Train Acc: 81.07%, Valid Acc: 79.79%
Epoch [10/80], Train Loss: 0.5121, Valid Loss: 0.5507, Train Acc: 82.20%, Valid Acc: 81.54%
Epoch [11/80], Train Loss: 0.4869, Valid Loss: 0.58