In [6]:
import random
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
import pandas as pd
#import imageio
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data as Data

import matplotlib.pyplot as plt
%matplotlib inline
import math

import torchvision
import torchvision.transforms as transforms
import optuna  #载入optuna优化包

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                             train=True,
                                             transform=transform,
                                             download=True)

test_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                            train=False,
                                            transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100,
                                          shuffle=False)


##optuna######
###耗费时间的过程。。。。。。。。。。。。。。。
# Define a basic convolutional layer
class RayLU(nn.Module):
    def __init__(self, trial, seqFlag):
        super(RayLU, self).__init__()
        self.sigma = nn.Parameter(torch.tensor(trial.suggest_float(f'sigma_{seqFlag}', 0.1, 5.0)))

    def forward(self, x):
        return torch.where(x >= 0, x, x * torch.exp(-x**2 / (2 * self.sigma**2)))



def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)


# 定义ResNet模型
class ResNet(nn.Module):
    def __init__(self, trial, num_classes=10):
        super(ResNet, self).__init__()
        self.conv1 = conv3x3(3, 16)
        self.bn1 = nn.BatchNorm2d(16)
        self.custom1 = RayLU(trial, 1)  # 自定义激活函数层

        # Layer 1
        self.layer1_conv1 = conv3x3(16, 16)
        self.layer1_bn1 = nn.BatchNorm2d(16)
        self.layer1_conv2 = conv3x3(16, 16)
        self.layer1_bn2 = nn.BatchNorm2d(16)
        self.layer1_extra_conv1 = conv3x3(16, 16)
        self.layer1_extra_bn1 = nn.BatchNorm2d(16)
        self.layer1_extra_conv2 = conv3x3(16, 16)
        self.layer1_extra_bn2 = nn.BatchNorm2d(16)
        self.gelub11 = RayLU(trial, 2)
        self.gelub12 = RayLU(trial, 3)
        self.gelub13 = RayLU(trial, 4)
        self.gelub14 = RayLU(trial, 5)

        # Layer 2
        self.layer2_conv1 = conv3x3(16, 32, stride=2)
        self.layer2_bn1 = nn.BatchNorm2d(32)
        self.layer2_conv2 = conv3x3(32, 32)
        self.layer2_bn2 = nn.BatchNorm2d(32)
        self.layer2_extra_conv1 = conv3x3(16, 32, stride=2)  # 调整residual的通道数
        self.layer2_extra_bn1 = nn.BatchNorm2d(32)
        self.layer2_extra_conv2 = conv3x3(32, 32)
        self.layer2_extra_bn2 = nn.BatchNorm2d(32)
        self.gelub21 = RayLU(trial, 6)
        self.gelub22 = RayLU(trial, 7)
        self.gelub23 = RayLU(trial, 8)
        self.gelub24 = RayLU(trial, 9)
        self.layer2_downsample = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=1, stride=2, bias=False),
            nn.BatchNorm2d(32)
        )

        # Layer 3
        self.layer3_conv1 = conv3x3(32, 64, stride=2)
        self.layer3_bn1 = nn.BatchNorm2d(64)
        self.layer3_conv2 = conv3x3(64, 64)
        self.layer3_bn2 = nn.BatchNorm2d(64)
        self.layer3_extra_conv1 = conv3x3(32, 64, stride=2)  # 调整residual的通道数
        self.layer3_extra_bn1 = nn.BatchNorm2d(64)
        self.layer3_extra_conv2 = conv3x3(64, 64)
        self.layer3_extra_bn2 = nn.BatchNorm2d(64)
        self.gelub31 = RayLU(trial, 10)
        self.gelub32 = RayLU(trial, 11)
        self.gelub33 = RayLU(trial, 12)
        self.gelub34 = RayLU(trial, 13)

        self.layer3_downsample = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=1, stride=2, bias=False),
            nn.BatchNorm2d(64)
        )

        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        # Layer 0
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.custom1(out)

        # Layer 1
        residual = out
        out = self.layer1_conv1(out)
        out = self.layer1_bn1(out)
        out = self.gelub11(out)
        out = self.layer1_conv2(out)
        out = self.layer1_bn2(out)
        out += residual
        out = self.gelub12(out)

        residual = out
        out = self.layer1_extra_conv1(out)
        out = self.layer1_extra_bn1(out)
        out = self.gelub13(out)
        out = self.layer1_extra_conv2(out)
        out = self.layer1_extra_bn2(out)
        out += residual
        out = self.gelub14(out)

        # Layer 2
        residual = out
        out = self.layer2_conv1(out)
        out = self.layer2_bn1(out)
        out = self.gelub21(out)
        out = self.layer2_conv2(out)
        out = self.layer2_bn2(out)
        out = self.gelub22(out)
        residual = self.layer2_downsample(residual)
        out += residual
        out = self.gelub22(out)

        residual = out
        out = self.layer2_extra_conv2(out)
        out = self.layer2_extra_bn2(out)
        out = self.gelub23(out)
        out = self.layer2_extra_conv2(out)
        out = self.layer2_extra_bn2(out)
        out += residual
        out = self.gelub24(out)

        # Layer 3
        residual = out
        out = self.layer3_conv1(out)
        out = self.layer3_bn1(out)
        out = self.gelub31(out)
        out = self.layer3_conv2(out)
        out = self.layer3_bn2(out)
        residual = self.layer3_downsample(residual)
        out += residual
        out = self.gelub32(out)

        residual = out
        out = self.layer3_extra_conv2(out)
        out = self.layer3_extra_bn2(out)
        out = self.gelub33(out)
        out = self.layer3_extra_conv2(out)
        out = self.layer3_extra_bn2(out)
        out += residual
        out = self.gelub34(out)

        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


learning_rate = 0.001
num_epochs = 100


# 定义目标函数
def objective(trial):
    # 模型实例化
    model = ResNet(trial)
    model.to(device)
    curr_lr = learning_rate
    # 损失函数和优化器
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=curr_lr)  # 保持 Adam 优化器

    #history = {'train_loss': [], 'valid_loss': [], 'train_acc': [], 'valid_acc': []}

    for epoch in range(num_epochs):
        optimizer = torch.optim.Adam(model.parameters(), lr=curr_lr)  # 保持 Adam 优化器
        train_loss, train_correct = 0.0, 0
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * images.size(0)
            scores, predictions = torch.max(outputs.data, 1)
            train_correct += (predictions == labels).sum().item()

        # Decay learning rate
        if (epoch + 1) % 20 == 0:
            curr_lr /= 3
            update_lr(optimizer, curr_lr)

        valid_loss, valid_correct = 0.0, 0
        model.eval()
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            valid_loss += loss.item() * images.size(0)
            scores, predictions = torch.max(outputs.data, 1)
            valid_correct += (predictions == labels).sum().item()
        ##不好的trial剪枝    
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

        train_loss = train_loss / len(train_loader.sampler)
        train_acc = train_correct / len(train_loader.sampler) * 100
        valid_loss = valid_loss / len(test_loader.sampler)
        valid_acc = valid_correct / len(test_loader.sampler) * 100

        print("Epoch:{}/{} \t Train Loss:{:.4f} Valid Loss:{:.4f} \t Train Acc:{:.2f} %  Valid Acc:{:.2f} %".format(
            epoch + 1, num_epochs,
            train_loss,
            valid_loss,
            train_acc,
            valid_acc))
        # history['train_loss'].append(train_loss)
        # history['valid_loss'].append(valid_loss)
        # history['train_acc'].append(train_acc)
        # history['valid_acc'].append(valid_acc)

    return valid_acc


study = optuna.create_study(direction='maximize')

# 运行优化过程
study.optimize(objective, n_trials=20)

# 打印最佳参数和目标值
print('Best trial:')
print('  Value: ', study.best_trial.value)
print('  Params: ')
for key, value in study.best_trial.params.items():
    print('    {}: {}'.format(key, value))

Files already downloaded and verified


[I 2024-09-28 12:03:17,121] A new study created in memory with name: no-name-68596a98-ff02-40dd-bc78-4c85e467d3ac


Epoch:1/100 	 Train Loss:1.5924 Valid Loss:1.5206 	 Train Acc:40.88 %  Valid Acc:45.91 %
Epoch:2/100 	 Train Loss:1.2400 Valid Loss:1.1888 	 Train Acc:55.58 %  Valid Acc:57.44 %
Epoch:3/100 	 Train Loss:1.0427 Valid Loss:0.9666 	 Train Acc:62.93 %  Valid Acc:65.90 %
Epoch:4/100 	 Train Loss:0.9365 Valid Loss:0.8835 	 Train Acc:66.62 %  Valid Acc:68.39 %
Epoch:5/100 	 Train Loss:0.8655 Valid Loss:0.8234 	 Train Acc:69.26 %  Valid Acc:71.10 %
Epoch:6/100 	 Train Loss:0.8081 Valid Loss:0.7862 	 Train Acc:71.50 %  Valid Acc:72.32 %
Epoch:7/100 	 Train Loss:0.7565 Valid Loss:0.7661 	 Train Acc:73.47 %  Valid Acc:73.18 %
Epoch:8/100 	 Train Loss:0.7003 Valid Loss:0.6973 	 Train Acc:75.40 %  Valid Acc:75.80 %
Epoch:9/100 	 Train Loss:0.6593 Valid Loss:0.6970 	 Train Acc:76.60 %  Valid Acc:76.43 %
Epoch:10/100 	 Train Loss:0.6254 Valid Loss:0.6772 	 Train Acc:78.04 %  Valid Acc:77.05 %
Epoch:11/100 	 Train Loss:0.5883 Valid Loss:0.6461 	 Train Acc:79.24 %  Valid Acc:77.95 %
Epoch:12/100 	 Trai

[I 2024-09-28 13:23:25,538] Trial 0 finished with value: 86.76 and parameters: {'sigma_1': 3.423476052186064, 'sigma_2': 4.098131311369579, 'sigma_3': 1.3439461442440241, 'sigma_4': 2.592789434602616, 'sigma_5': 2.4784358311579053, 'sigma_6': 4.121806497231036, 'sigma_7': 2.3422659667291836, 'sigma_8': 4.977929158208426, 'sigma_9': 2.340357248697875, 'sigma_10': 2.008432233176517, 'sigma_11': 4.112695128215287, 'sigma_12': 4.541384343668526, 'sigma_13': 3.2551058020068773}. Best is trial 0 with value: 86.76.


Epoch:100/100 	 Train Loss:0.1571 Valid Loss:0.4801 	 Train Acc:94.44 %  Valid Acc:86.76 %
Epoch:1/100 	 Train Loss:1.4795 Valid Loss:1.6203 	 Train Acc:45.19 %  Valid Acc:42.00 %
Epoch:2/100 	 Train Loss:1.1960 Valid Loss:1.0485 	 Train Acc:57.00 %  Valid Acc:62.23 %
Epoch:3/100 	 Train Loss:1.0568 Valid Loss:1.0018 	 Train Acc:62.01 %  Valid Acc:64.50 %
Epoch:4/100 	 Train Loss:0.9667 Valid Loss:0.8914 	 Train Acc:65.35 %  Valid Acc:67.74 %
Epoch:5/100 	 Train Loss:0.8785 Valid Loss:0.8787 	 Train Acc:68.68 %  Valid Acc:68.55 %
Epoch:6/100 	 Train Loss:0.8111 Valid Loss:0.7748 	 Train Acc:71.33 %  Valid Acc:73.09 %
Epoch:7/100 	 Train Loss:0.7559 Valid Loss:0.7115 	 Train Acc:73.36 %  Valid Acc:74.81 %
Epoch:8/100 	 Train Loss:0.6977 Valid Loss:0.6798 	 Train Acc:75.56 %  Valid Acc:76.51 %
Epoch:9/100 	 Train Loss:0.6565 Valid Loss:0.6716 	 Train Acc:76.92 %  Valid Acc:76.97 %
Epoch:10/100 	 Train Loss:0.6248 Valid Loss:0.6360 	 Train Acc:78.09 %  Valid Acc:78.22 %
Epoch:11/100 	 Tra

[I 2024-09-28 14:44:00,099] Trial 1 finished with value: 87.12 and parameters: {'sigma_1': 1.460078412930732, 'sigma_2': 0.2850514594273182, 'sigma_3': 4.741645683751368, 'sigma_4': 0.13794080391823532, 'sigma_5': 3.5363863499999013, 'sigma_6': 0.2707511221430693, 'sigma_7': 1.2551966642837775, 'sigma_8': 4.3336125432779, 'sigma_9': 3.18592054239998, 'sigma_10': 1.8833143561449015, 'sigma_11': 4.232682390990908, 'sigma_12': 2.087922137128098, 'sigma_13': 3.7364446487582934}. Best is trial 1 with value: 87.12.


Epoch:100/100 	 Train Loss:0.1543 Valid Loss:0.4776 	 Train Acc:94.59 %  Valid Acc:87.12 %
Epoch:1/100 	 Train Loss:1.4340 Valid Loss:1.3840 	 Train Acc:47.21 %  Valid Acc:49.08 %
Epoch:2/100 	 Train Loss:1.1223 Valid Loss:1.0375 	 Train Acc:59.49 %  Valid Acc:63.13 %
Epoch:3/100 	 Train Loss:1.0105 Valid Loss:0.9477 	 Train Acc:63.65 %  Valid Acc:66.52 %
Epoch:4/100 	 Train Loss:0.9263 Valid Loss:0.9419 	 Train Acc:66.80 %  Valid Acc:67.06 %
Epoch:5/100 	 Train Loss:0.8648 Valid Loss:0.9078 	 Train Acc:69.00 %  Valid Acc:68.48 %
Epoch:6/100 	 Train Loss:0.8195 Valid Loss:0.8261 	 Train Acc:70.99 %  Valid Acc:70.53 %
Epoch:7/100 	 Train Loss:0.7595 Valid Loss:0.7383 	 Train Acc:73.15 %  Valid Acc:74.08 %
Epoch:8/100 	 Train Loss:0.7149 Valid Loss:0.7786 	 Train Acc:74.65 %  Valid Acc:72.83 %
Epoch:9/100 	 Train Loss:0.6769 Valid Loss:0.6798 	 Train Acc:76.15 %  Valid Acc:76.55 %
Epoch:10/100 	 Train Loss:0.6398 Valid Loss:0.6677 	 Train Acc:77.64 %  Valid Acc:77.25 %
Epoch:11/100 	 Tra

[I 2024-09-28 16:05:07,957] Trial 2 finished with value: 86.66 and parameters: {'sigma_1': 1.3447156070416522, 'sigma_2': 1.4379097826625207, 'sigma_3': 0.35941033919313503, 'sigma_4': 2.209548981916471, 'sigma_5': 3.674288462459729, 'sigma_6': 2.818344999273431, 'sigma_7': 0.8664302979982087, 'sigma_8': 0.6830547146775832, 'sigma_9': 4.673451899321812, 'sigma_10': 0.8285981569855777, 'sigma_11': 4.274502357724631, 'sigma_12': 1.4525907588423939, 'sigma_13': 0.5241027526455244}. Best is trial 1 with value: 87.12.


Epoch:100/100 	 Train Loss:0.1695 Valid Loss:0.4727 	 Train Acc:94.09 %  Valid Acc:86.66 %
Epoch:1/100 	 Train Loss:1.5289 Valid Loss:1.8247 	 Train Acc:43.49 %  Valid Acc:37.84 %
Epoch:2/100 	 Train Loss:1.2049 Valid Loss:1.0827 	 Train Acc:56.43 %  Valid Acc:61.06 %
Epoch:3/100 	 Train Loss:1.0453 Valid Loss:0.9428 	 Train Acc:62.58 %  Valid Acc:65.81 %
Epoch:4/100 	 Train Loss:0.9307 Valid Loss:0.9029 	 Train Acc:66.79 %  Valid Acc:68.39 %
Epoch:5/100 	 Train Loss:0.8384 Valid Loss:0.8429 	 Train Acc:70.21 %  Valid Acc:70.70 %
Epoch:6/100 	 Train Loss:0.7649 Valid Loss:0.7504 	 Train Acc:73.15 %  Valid Acc:73.65 %
Epoch:7/100 	 Train Loss:0.7159 Valid Loss:0.7607 	 Train Acc:74.85 %  Valid Acc:74.16 %
Epoch:8/100 	 Train Loss:0.6657 Valid Loss:0.7247 	 Train Acc:76.76 %  Valid Acc:75.32 %
Epoch:9/100 	 Train Loss:0.6332 Valid Loss:0.6248 	 Train Acc:77.92 %  Valid Acc:78.61 %
Epoch:10/100 	 Train Loss:0.5998 Valid Loss:0.6584 	 Train Acc:79.00 %  Valid Acc:77.98 %
Epoch:11/100 	 Tra

[I 2024-09-28 17:26:35,242] Trial 3 finished with value: 86.50999999999999 and parameters: {'sigma_1': 4.581856856718059, 'sigma_2': 4.752382022153147, 'sigma_3': 2.539332952363392, 'sigma_4': 3.4473626527469503, 'sigma_5': 2.9704739698355835, 'sigma_6': 1.6973136363833932, 'sigma_7': 2.5462767031268982, 'sigma_8': 0.1376363777650112, 'sigma_9': 2.318275459848565, 'sigma_10': 2.582739742655172, 'sigma_11': 1.6376371158647485, 'sigma_12': 3.8674053233666585, 'sigma_13': 4.8793740113618425}. Best is trial 1 with value: 87.12.


Epoch:100/100 	 Train Loss:0.1783 Valid Loss:0.4766 	 Train Acc:93.56 %  Valid Acc:86.51 %
Epoch:1/100 	 Train Loss:1.4919 Valid Loss:1.3779 	 Train Acc:44.76 %  Valid Acc:51.06 %
Epoch:2/100 	 Train Loss:1.1767 Valid Loss:1.0358 	 Train Acc:57.70 %  Valid Acc:62.78 %
Epoch:3/100 	 Train Loss:1.0530 Valid Loss:0.9537 	 Train Acc:62.11 %  Valid Acc:65.77 %
Epoch:4/100 	 Train Loss:0.9456 Valid Loss:0.9554 	 Train Acc:66.46 %  Valid Acc:66.01 %
Epoch:5/100 	 Train Loss:0.8718 Valid Loss:0.8284 	 Train Acc:69.23 %  Valid Acc:70.91 %
Epoch:6/100 	 Train Loss:0.8107 Valid Loss:0.7945 	 Train Acc:71.24 %  Valid Acc:72.42 %
Epoch:7/100 	 Train Loss:0.7498 Valid Loss:0.7311 	 Train Acc:73.82 %  Valid Acc:74.70 %
Epoch:8/100 	 Train Loss:0.7117 Valid Loss:0.6824 	 Train Acc:75.13 %  Valid Acc:76.48 %
Epoch:9/100 	 Train Loss:0.6740 Valid Loss:0.6908 	 Train Acc:76.56 %  Valid Acc:76.14 %
Epoch:10/100 	 Train Loss:0.6385 Valid Loss:0.6524 	 Train Acc:77.67 %  Valid Acc:77.83 %
Epoch:11/100 	 Tra

[I 2024-09-28 18:48:05,789] Trial 4 finished with value: 86.39 and parameters: {'sigma_1': 1.7847775057687831, 'sigma_2': 0.31839888559000784, 'sigma_3': 1.9223263120048808, 'sigma_4': 4.889108897639944, 'sigma_5': 3.5689985210236634, 'sigma_6': 4.313771486775137, 'sigma_7': 4.251414460118188, 'sigma_8': 1.6076099193326965, 'sigma_9': 2.621461654721011, 'sigma_10': 0.24318243553628094, 'sigma_11': 2.3106276814327327, 'sigma_12': 4.459998132076116, 'sigma_13': 3.986516066245128}. Best is trial 1 with value: 87.12.


Epoch:100/100 	 Train Loss:0.1799 Valid Loss:0.4744 	 Train Acc:93.61 %  Valid Acc:86.39 %
Epoch:1/100 	 Train Loss:1.5357 Valid Loss:1.4812 	 Train Acc:43.07 %  Valid Acc:48.42 %
Epoch:2/100 	 Train Loss:1.1695 Valid Loss:1.2110 	 Train Acc:57.93 %  Valid Acc:58.60 %
Epoch:3/100 	 Train Loss:1.0077 Valid Loss:0.9557 	 Train Acc:63.68 %  Valid Acc:66.57 %
Epoch:4/100 	 Train Loss:0.9129 Valid Loss:0.9369 	 Train Acc:67.47 %  Valid Acc:66.99 %
Epoch:5/100 	 Train Loss:0.8397 Valid Loss:0.8348 	 Train Acc:70.16 %  Valid Acc:71.46 %
Epoch:6/100 	 Train Loss:0.7676 Valid Loss:0.8174 	 Train Acc:73.01 %  Valid Acc:71.91 %
Epoch:7/100 	 Train Loss:0.7040 Valid Loss:0.7009 	 Train Acc:75.17 %  Valid Acc:75.94 %
Epoch:8/100 	 Train Loss:0.6556 Valid Loss:0.7042 	 Train Acc:77.32 %  Valid Acc:76.30 %
Epoch:9/100 	 Train Loss:0.6159 Valid Loss:0.6049 	 Train Acc:78.51 %  Valid Acc:79.41 %
Epoch:10/100 	 Train Loss:0.5881 Valid Loss:0.6416 	 Train Acc:79.67 %  Valid Acc:78.34 %
Epoch:11/100 	 Tra

[I 2024-09-28 20:08:57,398] Trial 5 finished with value: 87.35000000000001 and parameters: {'sigma_1': 0.8891341644285307, 'sigma_2': 1.280474010311344, 'sigma_3': 4.694478666906056, 'sigma_4': 0.9017762785392068, 'sigma_5': 3.334382957924633, 'sigma_6': 4.487507589459152, 'sigma_7': 2.014190519215793, 'sigma_8': 4.479474791880304, 'sigma_9': 2.1678025593557586, 'sigma_10': 1.7809911547985093, 'sigma_11': 4.1170922048362275, 'sigma_12': 2.2172734233864566, 'sigma_13': 0.1902320583825528}. Best is trial 5 with value: 87.35000000000001.


Epoch:100/100 	 Train Loss:0.1477 Valid Loss:0.4612 	 Train Acc:94.79 %  Valid Acc:87.35 %
Epoch:1/100 	 Train Loss:1.4196 Valid Loss:1.2360 	 Train Acc:47.81 %  Valid Acc:54.79 %
Epoch:2/100 	 Train Loss:1.1171 Valid Loss:1.0064 	 Train Acc:59.73 %  Valid Acc:64.45 %
Epoch:3/100 	 Train Loss:0.9824 Valid Loss:0.9376 	 Train Acc:64.57 %  Valid Acc:66.11 %
Epoch:4/100 	 Train Loss:0.8895 Valid Loss:0.9350 	 Train Acc:68.23 %  Valid Acc:67.08 %
Epoch:5/100 	 Train Loss:0.8158 Valid Loss:0.7674 	 Train Acc:71.30 %  Valid Acc:73.05 %
Epoch:6/100 	 Train Loss:0.7544 Valid Loss:0.7609 	 Train Acc:73.41 %  Valid Acc:74.59 %
Epoch:7/100 	 Train Loss:0.7086 Valid Loss:0.6964 	 Train Acc:75.16 %  Valid Acc:76.15 %
Epoch:8/100 	 Train Loss:0.6702 Valid Loss:0.6386 	 Train Acc:76.68 %  Valid Acc:78.28 %
Epoch:9/100 	 Train Loss:0.6347 Valid Loss:0.6106 	 Train Acc:77.85 %  Valid Acc:79.31 %
Epoch:10/100 	 Train Loss:0.6119 Valid Loss:0.6382 	 Train Acc:78.82 %  Valid Acc:78.77 %
Epoch:11/100 	 Tra

[I 2024-09-28 21:29:02,748] Trial 6 finished with value: 10.0 and parameters: {'sigma_1': 3.7518045989486786, 'sigma_2': 1.2409438795455021, 'sigma_3': 3.8224729043884444, 'sigma_4': 3.9279838680443833, 'sigma_5': 1.0812486500594363, 'sigma_6': 1.1744680308891733, 'sigma_7': 0.8708390818539142, 'sigma_8': 0.10075923489935311, 'sigma_9': 2.008550275021666, 'sigma_10': 4.36453228559648, 'sigma_11': 0.6035148361387805, 'sigma_12': 3.392306949088642, 'sigma_13': 4.073060639522797}. Best is trial 5 with value: 87.35000000000001.


Epoch:100/100 	 Train Loss:nan Valid Loss:nan 	 Train Acc:10.00 %  Valid Acc:10.00 %
Epoch:1/100 	 Train Loss:1.5082 Valid Loss:1.6646 	 Train Acc:44.11 %  Valid Acc:42.43 %
Epoch:2/100 	 Train Loss:1.1679 Valid Loss:1.0595 	 Train Acc:57.95 %  Valid Acc:61.69 %
Epoch:3/100 	 Train Loss:1.0165 Valid Loss:0.9758 	 Train Acc:63.72 %  Valid Acc:66.16 %
Epoch:4/100 	 Train Loss:0.9070 Valid Loss:0.9054 	 Train Acc:67.76 %  Valid Acc:68.14 %
Epoch:5/100 	 Train Loss:0.8231 Valid Loss:0.7851 	 Train Acc:70.89 %  Valid Acc:72.29 %
Epoch:6/100 	 Train Loss:0.7540 Valid Loss:0.7053 	 Train Acc:73.61 %  Valid Acc:75.64 %
Epoch:7/100 	 Train Loss:0.6899 Valid Loss:0.6868 	 Train Acc:75.80 %  Valid Acc:76.41 %
Epoch:8/100 	 Train Loss:0.6482 Valid Loss:0.6274 	 Train Acc:77.23 %  Valid Acc:78.47 %
Epoch:9/100 	 Train Loss:0.6093 Valid Loss:0.6403 	 Train Acc:78.73 %  Valid Acc:78.42 %
Epoch:10/100 	 Train Loss:0.5783 Valid Loss:0.6721 	 Train Acc:79.78 %  Valid Acc:77.78 %
Epoch:11/100 	 Train Los

[I 2024-09-28 22:49:12,872] Trial 7 finished with value: 87.03999999999999 and parameters: {'sigma_1': 1.4587836508181475, 'sigma_2': 1.002416907402547, 'sigma_3': 2.957919729913672, 'sigma_4': 0.5431900168844337, 'sigma_5': 4.937997900408576, 'sigma_6': 2.749666720207438, 'sigma_7': 2.4869959299996927, 'sigma_8': 1.109225819945721, 'sigma_9': 3.828869432389412, 'sigma_10': 1.7146711384293567, 'sigma_11': 2.672687743252857, 'sigma_12': 3.4359058384367946, 'sigma_13': 4.808792628312248}. Best is trial 5 with value: 87.35000000000001.


Epoch:100/100 	 Train Loss:0.1450 Valid Loss:0.4803 	 Train Acc:94.70 %  Valid Acc:87.04 %
Epoch:1/100 	 Train Loss:1.4947 Valid Loss:1.4159 	 Train Acc:44.78 %  Valid Acc:48.61 %
Epoch:2/100 	 Train Loss:1.1355 Valid Loss:1.0979 	 Train Acc:59.46 %  Valid Acc:61.01 %
Epoch:3/100 	 Train Loss:0.9842 Valid Loss:0.8668 	 Train Acc:65.20 %  Valid Acc:69.19 %
Epoch:4/100 	 Train Loss:0.8960 Valid Loss:0.8547 	 Train Acc:68.38 %  Valid Acc:68.96 %
Epoch:5/100 	 Train Loss:0.8181 Valid Loss:0.7517 	 Train Acc:71.16 %  Valid Acc:73.72 %
Epoch:6/100 	 Train Loss:0.7632 Valid Loss:0.7277 	 Train Acc:73.15 %  Valid Acc:74.32 %
Epoch:7/100 	 Train Loss:0.7128 Valid Loss:0.7217 	 Train Acc:74.71 %  Valid Acc:75.00 %
Epoch:8/100 	 Train Loss:0.6711 Valid Loss:0.6748 	 Train Acc:76.55 %  Valid Acc:77.10 %
Epoch:9/100 	 Train Loss:0.6367 Valid Loss:0.6395 	 Train Acc:77.67 %  Valid Acc:77.92 %
Epoch:10/100 	 Train Loss:0.6042 Valid Loss:0.6900 	 Train Acc:79.05 %  Valid Acc:76.21 %
Epoch:11/100 	 Tra

[I 2024-09-29 00:09:02,149] Trial 8 finished with value: 87.0 and parameters: {'sigma_1': 1.6715017272409696, 'sigma_2': 2.290353518303827, 'sigma_3': 0.14603268567920705, 'sigma_4': 4.159207835363518, 'sigma_5': 1.5436361700431787, 'sigma_6': 4.402258750011341, 'sigma_7': 3.544743759480845, 'sigma_8': 2.1647783108172165, 'sigma_9': 4.007883055110152, 'sigma_10': 1.7013797578102734, 'sigma_11': 0.8996646022911582, 'sigma_12': 2.256974966154796, 'sigma_13': 3.4006798787252976}. Best is trial 5 with value: 87.35000000000001.


Epoch:100/100 	 Train Loss:0.1546 Valid Loss:0.4860 	 Train Acc:94.43 %  Valid Acc:87.00 %
Epoch:1/100 	 Train Loss:1.4951 Valid Loss:1.6530 	 Train Acc:44.72 %  Valid Acc:42.47 %
Epoch:2/100 	 Train Loss:1.1798 Valid Loss:1.0724 	 Train Acc:57.19 %  Valid Acc:61.50 %
Epoch:3/100 	 Train Loss:1.0507 Valid Loss:0.9834 	 Train Acc:62.53 %  Valid Acc:65.17 %
Epoch:4/100 	 Train Loss:0.9556 Valid Loss:0.8946 	 Train Acc:66.08 %  Valid Acc:68.25 %
Epoch:5/100 	 Train Loss:0.8744 Valid Loss:0.9190 	 Train Acc:68.92 %  Valid Acc:68.54 %
Epoch:6/100 	 Train Loss:0.8052 Valid Loss:0.7678 	 Train Acc:71.64 %  Valid Acc:73.31 %
Epoch:7/100 	 Train Loss:0.7512 Valid Loss:0.7403 	 Train Acc:73.72 %  Valid Acc:74.44 %
Epoch:8/100 	 Train Loss:0.7039 Valid Loss:0.6858 	 Train Acc:75.39 %  Valid Acc:76.30 %
Epoch:9/100 	 Train Loss:0.6687 Valid Loss:0.6559 	 Train Acc:76.50 %  Valid Acc:77.21 %
Epoch:10/100 	 Train Loss:0.6299 Valid Loss:0.6282 	 Train Acc:78.09 %  Valid Acc:78.77 %
Epoch:11/100 	 Tra

[I 2024-09-29 01:28:46,081] Trial 9 finished with value: 86.5 and parameters: {'sigma_1': 4.22565595078747, 'sigma_2': 2.4999033931354413, 'sigma_3': 2.5455165517976117, 'sigma_4': 2.176238206435269, 'sigma_5': 0.9554349423855967, 'sigma_6': 4.159570239458707, 'sigma_7': 0.6452512446748805, 'sigma_8': 4.931202454624929, 'sigma_9': 2.0568610413059423, 'sigma_10': 3.5955421364279814, 'sigma_11': 2.4013644220993804, 'sigma_12': 4.1331462048500835, 'sigma_13': 3.240889859738038}. Best is trial 5 with value: 87.35000000000001.


Epoch:100/100 	 Train Loss:0.1630 Valid Loss:0.4887 	 Train Acc:94.20 %  Valid Acc:86.50 %
Epoch:1/100 	 Train Loss:1.5232 Valid Loss:1.8242 	 Train Acc:43.66 %  Valid Acc:39.72 %
Epoch:2/100 	 Train Loss:1.2717 Valid Loss:1.1910 	 Train Acc:54.16 %  Valid Acc:59.34 %
Epoch:3/100 	 Train Loss:1.1292 Valid Loss:1.0414 	 Train Acc:59.76 %  Valid Acc:62.49 %
Epoch:4/100 	 Train Loss:1.0329 Valid Loss:1.0759 	 Train Acc:63.48 %  Valid Acc:63.26 %
Epoch:5/100 	 Train Loss:0.9571 Valid Loss:0.8745 	 Train Acc:66.30 %  Valid Acc:69.58 %
Epoch:6/100 	 Train Loss:0.9023 Valid Loss:0.8822 	 Train Acc:68.22 %  Valid Acc:68.81 %
Epoch:7/100 	 Train Loss:0.8389 Valid Loss:0.8727 	 Train Acc:70.52 %  Valid Acc:69.91 %
Epoch:8/100 	 Train Loss:0.7790 Valid Loss:0.7743 	 Train Acc:72.87 %  Valid Acc:73.57 %
Epoch:9/100 	 Train Loss:0.7323 Valid Loss:0.7152 	 Train Acc:74.29 %  Valid Acc:75.62 %
Epoch:10/100 	 Train Loss:0.6857 Valid Loss:0.7405 	 Train Acc:76.15 %  Valid Acc:74.91 %
Epoch:11/100 	 Tra

[I 2024-09-29 02:48:33,779] Trial 10 finished with value: 86.97 and parameters: {'sigma_1': 0.10754174896682223, 'sigma_2': 3.4804878551667167, 'sigma_3': 4.829826554360956, 'sigma_4': 1.0477192242102458, 'sigma_5': 4.839545942157102, 'sigma_6': 3.4108209572518637, 'sigma_7': 1.714934236387204, 'sigma_8': 3.4772880105979844, 'sigma_9': 0.11434744187053658, 'sigma_10': 3.013075176454797, 'sigma_11': 4.973529975707915, 'sigma_12': 0.2753829696202015, 'sigma_13': 0.2597250026854613}. Best is trial 5 with value: 87.35000000000001.


Epoch:100/100 	 Train Loss:0.1724 Valid Loss:0.4677 	 Train Acc:93.90 %  Valid Acc:86.97 %
Epoch:1/100 	 Train Loss:1.4752 Valid Loss:1.5345 	 Train Acc:45.76 %  Valid Acc:46.02 %
Epoch:2/100 	 Train Loss:1.1533 Valid Loss:1.1236 	 Train Acc:58.47 %  Valid Acc:61.16 %
Epoch:3/100 	 Train Loss:1.0330 Valid Loss:0.9961 	 Train Acc:63.17 %  Valid Acc:65.09 %
Epoch:4/100 	 Train Loss:0.9352 Valid Loss:0.8618 	 Train Acc:66.37 %  Valid Acc:69.89 %
Epoch:5/100 	 Train Loss:0.8424 Valid Loss:0.8102 	 Train Acc:70.03 %  Valid Acc:71.68 %
Epoch:6/100 	 Train Loss:0.7799 Valid Loss:0.7653 	 Train Acc:72.48 %  Valid Acc:72.85 %
Epoch:7/100 	 Train Loss:0.7343 Valid Loss:0.7176 	 Train Acc:74.34 %  Valid Acc:75.45 %
Epoch:8/100 	 Train Loss:0.6905 Valid Loss:0.7009 	 Train Acc:75.77 %  Valid Acc:75.98 %
Epoch:9/100 	 Train Loss:0.6587 Valid Loss:0.6570 	 Train Acc:77.03 %  Valid Acc:77.20 %
Epoch:10/100 	 Train Loss:0.6216 Valid Loss:0.6399 	 Train Acc:78.20 %  Valid Acc:78.25 %
Epoch:11/100 	 Tra

[I 2024-09-29 04:08:34,136] Trial 11 finished with value: 86.86 and parameters: {'sigma_1': 0.27388842055938856, 'sigma_2': 0.1519871740631049, 'sigma_3': 4.944184946101624, 'sigma_4': 0.37863275756226267, 'sigma_5': 3.8877633657711472, 'sigma_6': 0.3811367013011917, 'sigma_7': 1.6838497573358109, 'sigma_8': 3.696678001336503, 'sigma_9': 0.9805643942456987, 'sigma_10': 1.062167524794611, 'sigma_11': 3.5660801221024783, 'sigma_12': 2.164360014259923, 'sigma_13': 1.6026960275276965}. Best is trial 5 with value: 87.35000000000001.


Epoch:100/100 	 Train Loss:0.1587 Valid Loss:0.4712 	 Train Acc:94.42 %  Valid Acc:86.86 %
Epoch:1/100 	 Train Loss:1.4678 Valid Loss:1.2527 	 Train Acc:46.06 %  Valid Acc:54.75 %
Epoch:2/100 	 Train Loss:1.2058 Valid Loss:1.0722 	 Train Acc:56.45 %  Valid Acc:61.94 %
Epoch:3/100 	 Train Loss:1.0742 Valid Loss:1.0181 	 Train Acc:61.39 %  Valid Acc:63.83 %
Epoch:4/100 	 Train Loss:0.9881 Valid Loss:0.9395 	 Train Acc:64.54 %  Valid Acc:66.63 %
Epoch:5/100 	 Train Loss:0.9066 Valid Loss:0.9200 	 Train Acc:67.80 %  Valid Acc:67.98 %
Epoch:6/100 	 Train Loss:0.8316 Valid Loss:0.8175 	 Train Acc:70.19 %  Valid Acc:71.25 %
Epoch:7/100 	 Train Loss:0.7677 Valid Loss:0.7367 	 Train Acc:72.88 %  Valid Acc:74.01 %
Epoch:8/100 	 Train Loss:0.7214 Valid Loss:0.7632 	 Train Acc:74.59 %  Valid Acc:74.01 %
Epoch:9/100 	 Train Loss:0.6784 Valid Loss:0.6723 	 Train Acc:76.17 %  Valid Acc:77.24 %
Epoch:10/100 	 Train Loss:0.6433 Valid Loss:0.6711 	 Train Acc:77.66 %  Valid Acc:77.44 %
Epoch:11/100 	 Tra

[I 2024-09-29 05:28:25,667] Trial 12 finished with value: 87.05000000000001 and parameters: {'sigma_1': 0.8383226645311572, 'sigma_2': 1.9454581115604253, 'sigma_3': 3.864009497737176, 'sigma_4': 1.2707155285857872, 'sigma_5': 2.4604574130094385, 'sigma_6': 0.18211203647842256, 'sigma_7': 0.12755052573941716, 'sigma_8': 3.8260252972459066, 'sigma_9': 3.2651795320789843, 'sigma_10': 3.2408769400222033, 'sigma_11': 3.594316521204685, 'sigma_12': 1.2846473844907573, 'sigma_13': 1.9464845496243162}. Best is trial 5 with value: 87.35000000000001.


Epoch:100/100 	 Train Loss:0.1737 Valid Loss:0.4746 	 Train Acc:93.90 %  Valid Acc:87.05 %
Epoch:1/100 	 Train Loss:1.5387 Valid Loss:1.3867 	 Train Acc:43.12 %  Valid Acc:50.65 %
Epoch:2/100 	 Train Loss:1.1639 Valid Loss:1.0073 	 Train Acc:58.20 %  Valid Acc:63.06 %
Epoch:3/100 	 Train Loss:1.0148 Valid Loss:0.9921 	 Train Acc:63.75 %  Valid Acc:63.65 %
Epoch:4/100 	 Train Loss:0.9201 Valid Loss:0.8979 	 Train Acc:67.20 %  Valid Acc:68.31 %
Epoch:5/100 	 Train Loss:0.8383 Valid Loss:0.8018 	 Train Acc:70.25 %  Valid Acc:71.27 %
Epoch:6/100 	 Train Loss:0.7720 Valid Loss:0.7559 	 Train Acc:72.75 %  Valid Acc:73.11 %
Epoch:7/100 	 Train Loss:0.7146 Valid Loss:0.7005 	 Train Acc:74.88 %  Valid Acc:75.85 %
Epoch:8/100 	 Train Loss:0.6774 Valid Loss:0.6740 	 Train Acc:76.24 %  Valid Acc:77.08 %
Epoch:9/100 	 Train Loss:0.6443 Valid Loss:0.6341 	 Train Acc:77.49 %  Valid Acc:77.87 %
Epoch:10/100 	 Train Loss:0.6132 Valid Loss:0.6338 	 Train Acc:78.63 %  Valid Acc:77.73 %
Epoch:11/100 	 Tra

[I 2024-09-29 06:47:53,900] Trial 13 finished with value: 86.87 and parameters: {'sigma_1': 2.3994107295689187, 'sigma_2': 0.7372144086589018, 'sigma_3': 4.040517483582185, 'sigma_4': 1.2570861115622631, 'sigma_5': 4.143134397229131, 'sigma_6': 1.8114723337674072, 'sigma_7': 1.749233833685064, 'sigma_8': 2.8938506620891977, 'sigma_9': 1.2051231994267035, 'sigma_10': 2.1316336213552467, 'sigma_11': 4.907218483023147, 'sigma_12': 1.3979651436248233, 'sigma_13': 1.100386336559624}. Best is trial 5 with value: 87.35000000000001.


Epoch:100/100 	 Train Loss:0.1546 Valid Loss:0.4744 	 Train Acc:94.46 %  Valid Acc:86.87 %
Epoch:1/100 	 Train Loss:1.5123 Valid Loss:1.2084 	 Train Acc:43.95 %  Valid Acc:56.62 %
Epoch:2/100 	 Train Loss:1.1275 Valid Loss:1.1858 	 Train Acc:59.44 %  Valid Acc:58.74 %
Epoch:3/100 	 Train Loss:0.9749 Valid Loss:0.9136 	 Train Acc:65.18 %  Valid Acc:67.97 %
Epoch:4/100 	 Train Loss:0.8812 Valid Loss:0.8859 	 Train Acc:68.82 %  Valid Acc:68.73 %
Epoch:5/100 	 Train Loss:0.8029 Valid Loss:0.8029 	 Train Acc:71.59 %  Valid Acc:72.49 %
Epoch:6/100 	 Train Loss:0.7415 Valid Loss:0.7366 	 Train Acc:73.83 %  Valid Acc:74.10 %
Epoch:7/100 	 Train Loss:0.6844 Valid Loss:0.6675 	 Train Acc:75.99 %  Valid Acc:77.42 %
Epoch:8/100 	 Train Loss:0.6395 Valid Loss:0.6213 	 Train Acc:77.75 %  Valid Acc:78.86 %
Epoch:9/100 	 Train Loss:0.5941 Valid Loss:0.5838 	 Train Acc:79.27 %  Valid Acc:80.43 %
Epoch:10/100 	 Train Loss:0.5646 Valid Loss:0.5728 	 Train Acc:80.45 %  Valid Acc:80.23 %
Epoch:11/100 	 Tra

[I 2024-09-29 08:07:32,796] Trial 14 finished with value: 87.53 and parameters: {'sigma_1': 2.7710378333777306, 'sigma_2': 3.10956870691802, 'sigma_3': 4.452437301185996, 'sigma_4': 0.1602517871569942, 'sigma_5': 2.9819742646401113, 'sigma_6': 3.1962193834158907, 'sigma_7': 3.529916762665707, 'sigma_8': 4.411809158334843, 'sigma_9': 3.155470527187394, 'sigma_10': 1.2437140031305443, 'sigma_11': 3.1579504167411585, 'sigma_12': 2.983944389343507, 'sigma_13': 2.4247983318977}. Best is trial 14 with value: 87.53.


Epoch:100/100 	 Train Loss:0.1409 Valid Loss:0.4778 	 Train Acc:94.94 %  Valid Acc:87.53 %
Epoch:1/100 	 Train Loss:1.5188 Valid Loss:1.2821 	 Train Acc:43.96 %  Valid Acc:53.07 %
Epoch:2/100 	 Train Loss:1.1638 Valid Loss:1.0067 	 Train Acc:58.18 %  Valid Acc:63.67 %
Epoch:3/100 	 Train Loss:1.0098 Valid Loss:0.9041 	 Train Acc:64.10 %  Valid Acc:67.83 %
Epoch:4/100 	 Train Loss:0.9044 Valid Loss:0.9074 	 Train Acc:67.92 %  Valid Acc:69.03 %
Epoch:5/100 	 Train Loss:0.8211 Valid Loss:0.7628 	 Train Acc:71.00 %  Valid Acc:73.56 %
Epoch:6/100 	 Train Loss:0.7485 Valid Loss:0.7557 	 Train Acc:73.79 %  Valid Acc:74.13 %
Epoch:7/100 	 Train Loss:0.6900 Valid Loss:0.6858 	 Train Acc:75.93 %  Valid Acc:76.33 %
Epoch:8/100 	 Train Loss:0.6477 Valid Loss:0.6795 	 Train Acc:77.61 %  Valid Acc:76.38 %
Epoch:9/100 	 Train Loss:0.6202 Valid Loss:0.6366 	 Train Acc:78.61 %  Valid Acc:78.55 %
Epoch:10/100 	 Train Loss:0.5884 Valid Loss:0.5927 	 Train Acc:79.54 %  Valid Acc:79.51 %
Epoch:11/100 	 Tra

[I 2024-09-29 09:26:56,488] Trial 15 finished with value: 86.9 and parameters: {'sigma_1': 3.319375583975227, 'sigma_2': 3.07675917038456, 'sigma_3': 3.292327338773682, 'sigma_4': 1.695161105430996, 'sigma_5': 2.0129957805260354, 'sigma_6': 4.972982084216207, 'sigma_7': 3.6484602401166324, 'sigma_8': 4.237056255455503, 'sigma_9': 1.4741298716919837, 'sigma_10': 0.8704423384931304, 'sigma_11': 3.125608480870266, 'sigma_12': 3.0206025115709876, 'sigma_13': 2.493521091507873}. Best is trial 14 with value: 87.53.


Epoch:100/100 	 Train Loss:0.1558 Valid Loss:0.4855 	 Train Acc:94.45 %  Valid Acc:86.90 %
Epoch:1/100 	 Train Loss:1.4280 Valid Loss:1.2462 	 Train Acc:47.83 %  Valid Acc:54.83 %
Epoch:2/100 	 Train Loss:1.1345 Valid Loss:1.0503 	 Train Acc:59.31 %  Valid Acc:62.07 %
Epoch:3/100 	 Train Loss:0.9946 Valid Loss:0.9266 	 Train Acc:64.33 %  Valid Acc:66.87 %
Epoch:4/100 	 Train Loss:0.9031 Valid Loss:0.8616 	 Train Acc:68.01 %  Valid Acc:69.56 %
Epoch:5/100 	 Train Loss:0.8246 Valid Loss:0.8493 	 Train Acc:70.88 %  Valid Acc:70.89 %
Epoch:6/100 	 Train Loss:0.7504 Valid Loss:0.7474 	 Train Acc:73.41 %  Valid Acc:73.99 %
Epoch:7/100 	 Train Loss:0.6984 Valid Loss:0.6885 	 Train Acc:75.44 %  Valid Acc:75.78 %
Epoch:8/100 	 Train Loss:0.6457 Valid Loss:0.7053 	 Train Acc:77.52 %  Valid Acc:76.11 %
Epoch:9/100 	 Train Loss:0.6081 Valid Loss:0.6316 	 Train Acc:78.90 %  Valid Acc:78.04 %
Epoch:10/100 	 Train Loss:0.5775 Valid Loss:0.5918 	 Train Acc:79.90 %  Valid Acc:79.85 %
Epoch:11/100 	 Tra

[I 2024-09-29 10:45:54,301] Trial 16 finished with value: 87.64 and parameters: {'sigma_1': 2.552892614417707, 'sigma_2': 3.4882592168857727, 'sigma_3': 4.352113072004928, 'sigma_4': 0.738816595610332, 'sigma_5': 0.1056008808873683, 'sigma_6': 3.587607017492251, 'sigma_7': 3.4530867515762034, 'sigma_8': 2.8790174876906804, 'sigma_9': 3.1238297211006296, 'sigma_10': 0.12856715281533626, 'sigma_11': 3.217293494647598, 'sigma_12': 2.7807219332711726, 'sigma_13': 1.1450978029061871}. Best is trial 16 with value: 87.64.


Epoch:100/100 	 Train Loss:0.1394 Valid Loss:0.4679 	 Train Acc:95.01 %  Valid Acc:87.64 %
Epoch:1/100 	 Train Loss:1.3975 Valid Loss:1.1530 	 Train Acc:49.09 %  Valid Acc:59.26 %
Epoch:2/100 	 Train Loss:1.1011 Valid Loss:0.9716 	 Train Acc:60.49 %  Valid Acc:65.41 %
Epoch:3/100 	 Train Loss:0.9843 Valid Loss:0.8994 	 Train Acc:65.03 %  Valid Acc:68.14 %
Epoch:4/100 	 Train Loss:0.8777 Valid Loss:0.8442 	 Train Acc:69.14 %  Valid Acc:70.71 %
Epoch:5/100 	 Train Loss:0.7799 Valid Loss:0.7708 	 Train Acc:72.55 %  Valid Acc:73.07 %
Epoch:6/100 	 Train Loss:0.7182 Valid Loss:0.7278 	 Train Acc:75.01 %  Valid Acc:75.05 %
Epoch:7/100 	 Train Loss:0.6725 Valid Loss:0.6751 	 Train Acc:76.46 %  Valid Acc:76.90 %
Epoch:8/100 	 Train Loss:0.6371 Valid Loss:0.6853 	 Train Acc:77.85 %  Valid Acc:77.12 %
Epoch:9/100 	 Train Loss:0.5931 Valid Loss:0.6229 	 Train Acc:79.40 %  Valid Acc:79.17 %
Epoch:10/100 	 Train Loss:0.5707 Valid Loss:0.6282 	 Train Acc:80.15 %  Valid Acc:78.68 %
Epoch:11/100 	 Tra

[I 2024-09-29 12:04:43,405] Trial 17 finished with value: 87.37 and parameters: {'sigma_1': 2.626044487717572, 'sigma_2': 3.238226035024801, 'sigma_3': 4.22071827079075, 'sigma_4': 0.1264545490086569, 'sigma_5': 0.18330192682061286, 'sigma_6': 3.422046909955416, 'sigma_7': 4.9957083788676035, 'sigma_8': 2.5872621751660505, 'sigma_9': 4.914985561697237, 'sigma_10': 0.13355375569485162, 'sigma_11': 1.7022467563977164, 'sigma_12': 2.8722467099380102, 'sigma_13': 2.378898647545772}. Best is trial 16 with value: 87.64.


Epoch:100/100 	 Train Loss:0.1446 Valid Loss:0.4766 	 Train Acc:94.80 %  Valid Acc:87.37 %
Epoch:1/100 	 Train Loss:1.4858 Valid Loss:1.2230 	 Train Acc:45.28 %  Valid Acc:55.80 %
Epoch:2/100 	 Train Loss:1.1330 Valid Loss:1.0171 	 Train Acc:59.07 %  Valid Acc:63.48 %
Epoch:3/100 	 Train Loss:0.9950 Valid Loss:0.9477 	 Train Acc:64.56 %  Valid Acc:66.22 %
Epoch:4/100 	 Train Loss:0.9072 Valid Loss:0.8739 	 Train Acc:67.76 %  Valid Acc:69.16 %
Epoch:5/100 	 Train Loss:0.8194 Valid Loss:0.7939 	 Train Acc:70.97 %  Valid Acc:71.21 %
Epoch:6/100 	 Train Loss:0.7620 Valid Loss:0.7307 	 Train Acc:73.01 %  Valid Acc:75.04 %
Epoch:7/100 	 Train Loss:0.7029 Valid Loss:0.7332 	 Train Acc:75.25 %  Valid Acc:74.49 %
Epoch:8/100 	 Train Loss:0.6553 Valid Loss:0.6371 	 Train Acc:76.91 %  Valid Acc:78.01 %
Epoch:9/100 	 Train Loss:0.6140 Valid Loss:0.6182 	 Train Acc:78.34 %  Valid Acc:78.87 %
Epoch:10/100 	 Train Loss:0.5752 Valid Loss:0.6459 	 Train Acc:79.94 %  Valid Acc:78.46 %
Epoch:11/100 	 Tra

[I 2024-09-29 13:24:24,837] Trial 18 finished with value: 87.33999999999999 and parameters: {'sigma_1': 2.7024356361231927, 'sigma_2': 3.9679345761297053, 'sigma_3': 3.4105596936675955, 'sigma_4': 1.6812633123351326, 'sigma_5': 0.7211859155353433, 'sigma_6': 3.3831713546077644, 'sigma_7': 3.331247144811644, 'sigma_8': 3.025150310748803, 'sigma_9': 3.1695391281493617, 'sigma_10': 0.654886961988781, 'sigma_11': 3.0469020180884554, 'sigma_12': 3.7067735456569433, 'sigma_13': 1.0558307429209117}. Best is trial 16 with value: 87.64.


Epoch:100/100 	 Train Loss:0.1464 Valid Loss:0.4666 	 Train Acc:94.76 %  Valid Acc:87.34 %
Epoch:1/100 	 Train Loss:1.5069 Valid Loss:1.3054 	 Train Acc:44.28 %  Valid Acc:52.95 %
Epoch:2/100 	 Train Loss:1.1344 Valid Loss:1.0242 	 Train Acc:59.01 %  Valid Acc:63.44 %
Epoch:3/100 	 Train Loss:0.9736 Valid Loss:0.9358 	 Train Acc:65.13 %  Valid Acc:67.18 %
Epoch:4/100 	 Train Loss:0.8719 Valid Loss:0.8092 	 Train Acc:68.87 %  Valid Acc:71.16 %
Epoch:5/100 	 Train Loss:0.7886 Valid Loss:0.7844 	 Train Acc:72.45 %  Valid Acc:72.69 %
Epoch:6/100 	 Train Loss:0.7258 Valid Loss:0.7279 	 Train Acc:74.44 %  Valid Acc:75.06 %
Epoch:7/100 	 Train Loss:0.6739 Valid Loss:0.6824 	 Train Acc:76.54 %  Valid Acc:77.05 %
Epoch:8/100 	 Train Loss:0.6317 Valid Loss:0.7362 	 Train Acc:78.09 %  Valid Acc:74.99 %
Epoch:9/100 	 Train Loss:0.5969 Valid Loss:0.6703 	 Train Acc:79.24 %  Valid Acc:76.94 %
Epoch:10/100 	 Train Loss:0.5688 Valid Loss:0.5644 	 Train Acc:80.20 %  Valid Acc:80.50 %
Epoch:11/100 	 Tra

[I 2024-09-29 14:23:13,059] Trial 19 finished with value: 87.11 and parameters: {'sigma_1': 2.372211917995976, 'sigma_2': 4.696423499963366, 'sigma_3': 4.328879601640268, 'sigma_4': 0.6471922939677988, 'sigma_5': 1.7514318969792253, 'sigma_6': 2.3662396501741343, 'sigma_7': 3.068965447439853, 'sigma_8': 2.094138834469828, 'sigma_9': 3.929064548616785, 'sigma_10': 1.2050553466740448, 'sigma_11': 1.7274097849914685, 'sigma_12': 4.890422164869193, 'sigma_13': 1.5680232782184458}. Best is trial 16 with value: 87.64.


Epoch:100/100 	 Train Loss:0.1481 Valid Loss:0.4838 	 Train Acc:94.79 %  Valid Acc:87.11 %
Best trial:
  Value:  87.64
  Params: 
    sigma_1: 2.552892614417707
    sigma_2: 3.4882592168857727
    sigma_3: 4.352113072004928
    sigma_4: 0.738816595610332
    sigma_5: 0.1056008808873683
    sigma_6: 3.587607017492251
    sigma_7: 3.4530867515762034
    sigma_8: 2.8790174876906804
    sigma_9: 3.1238297211006296
    sigma_10: 0.12856715281533626
    sigma_11: 3.217293494647598
    sigma_12: 2.7807219332711726
    sigma_13: 1.1450978029061871


In [1]:
import random
from torchvision import datasets, transforms
import torch.optim as optim
import torch.nn as nn
import pandas as pd
#import imageio
import numpy as np
import torch
from torch.autograd import Variable
import torch.nn.functional as F
import torch.utils.data as Data

import matplotlib.pyplot as plt
%matplotlib inline
import math

import torchvision
import torchvision.transforms as transforms
import optuna  #载入optuna优化包

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Image preprocessing modules
transform = transforms.Compose([
    transforms.Pad(4),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32),
    transforms.ToTensor()])

# CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                             train=True,
                                             transform=transform,
                                             download=True)

test_dataset = torchvision.datasets.CIFAR10(root='../../data/',
                                            train=False,
                                            transform=transforms.ToTensor())

# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=100,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=100,
                                          shuffle=False)

Files already downloaded and verified


In [3]:
##看看其他激活函数，比如RayLU
class Raylu(nn.Module):  #带参数的gelu激活函数
    def __init__(self):
        super().__init__()
        self.sigma = nn.Parameter(2 * torch.randn(1))  #可学习参数 sigma

    def forward(self, input):
        x = torch.where(input >= 0, input, input * torch.exp(-input ** 2 / self.sigma ** 2 / 2))
        return x


# 3x3 convolution
def conv3x3(in_channels, out_channels, stride=1):
    return nn.Conv2d(in_channels, out_channels, kernel_size=3,
                     stride=stride, padding=1, bias=False)


# Residual block
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = conv3x3(in_channels, out_channels, stride)
        self.bn1 = nn.BatchNorm2d(out_channels)
        #self.relu = nn.ReLU()
        self.Raylu1 = Raylu()
        self.Raylu2 = Raylu()
        self.conv2 = conv3x3(out_channels, out_channels)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.downsample = downsample

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        #out = self.relu(out)
        out = self.Raylu1(out)
        out = self.conv2(out)
        out = self.bn2(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.Raylu2(out)
        return out


# ResNet
class ResNet(nn.Module):
    def __init__(self, block, layers, num_classes=10):
        super(ResNet, self).__init__()
        self.in_channels = 16
        self.conv = conv3x3(3, 16)
        self.bn = nn.BatchNorm2d(16)
        #self.relu = nn.ReLU()
        self.Raylu3 = Raylu()
        self.layer1 = self.make_layer(block, 16, layers[0])
        self.layer2 = self.make_layer(block, 32, layers[1], 2)
        self.layer3 = self.make_layer(block, 64, layers[2], 2)
        self.avg_pool = nn.AvgPool2d(8)
        self.fc = nn.Linear(64, num_classes)

    def make_layer(self, block, out_channels, blocks, stride=1):
        downsample = None
        if (stride != 1) or (self.in_channels != out_channels):
            downsample = nn.Sequential(
                conv3x3(self.in_channels, out_channels, stride=stride),
                nn.BatchNorm2d(out_channels))
        layers = []
        layers.append(block(self.in_channels, out_channels, stride, downsample))
        self.in_channels = out_channels
        for i in range(1, blocks):
            layers.append(block(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv(x)
        out = self.bn(out)
        #out = self.relu(out)
        out = self.Raylu3(out)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.avg_pool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


#model1 = ResidualBlock()
model = ResNet(ResidualBlock, [2, 2, 2]).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
learning_rate=0.001
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

#各个含有GELU的激活函数赋初值
z = model.Raylu3.sigma
nn.init.constant_(z, 6)
z101 = model.layer1[0].Raylu1.sigma
nn.init.constant_(z101, 4)
z102 = model.layer1[0].Raylu2.sigma
nn.init.constant_(z102, 4)
z111 = model.layer1[1].Raylu1.sigma
nn.init.constant_(z111, 3)
z112 = model.layer1[1].Raylu2.sigma
nn.init.constant_(z112, 3)
z201 = model.layer2[0].Raylu1.sigma
nn.init.constant_(z201, 5)
z202 = model.layer2[0].Raylu2.sigma
nn.init.constant_(z202, 5)
z211 = model.layer2[1].Raylu1.sigma
nn.init.constant_(z211, 5)
z212 = model.layer2[1].Raylu2.sigma
nn.init.constant_(z212, 5)
z301 = model.layer3[0].Raylu1.sigma
nn.init.constant_(z301, 2)
z302 = model.layer3[0].Raylu2.sigma
nn.init.constant_(z302, 4)
z311 = model.layer3[1].Raylu1.sigma
nn.init.constant_(z311, 2)
z312 = model.layer3[1].Raylu2.sigma
nn.init.constant_(z312, 3)


############################################

# For updating learning rate
def update_lr(optimizer, lr):
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr


# Train the model
total_step = len(train_loader)
curr_lr = learning_rate

historycifaRaylu = {'Raylu_train_loss': [], 'Raylu_valid_loss': [], 'Raylu_train_acc': [], 'Raylu_valid_acc': []}
num_epochs = 80

for epoch in range(num_epochs):
    Raylu_train_loss, Raylu_train_correct = 0.0, 0
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        Raylu_train_loss += loss.item() * images.size(0)
        scores, predictions = torch.max(outputs.data, 1)  #最大化输出得到标签0-9
        Raylu_train_correct += (predictions == labels).sum().item()  #对比标签得到正确的标签个数   

    # Decay learning rate
    if (epoch + 1) % 20 == 0:
        curr_lr /= 3
        update_lr(optimizer, curr_lr)
    Raylu_valid_loss, Raylu_val_correct = 0.0, 0
    model.eval()  #评估模式,非训练模式,batch normalization 和 dropout关闭
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        Raylu_valid_loss += loss.item() * images.size(0)
        scores, predictions = torch.max(outputs.data, 1)  #最大化输出得到标签0-9
        Raylu_val_correct += (predictions == labels).sum().item()  #对比标签得到正确的标签个数

    Raylu_train_loss = Raylu_train_loss / len(train_loader.sampler)  #除以48000
    Raylu_train_acc = Raylu_train_correct / len(train_loader.sampler) * 100
    Raylu_valid_loss = Raylu_valid_loss / len(test_loader.sampler)  #除以12000
    Raylu_valid_acc = Raylu_val_correct / len(test_loader.sampler) * 100

    print("Epoch:{}/{} \t AVERAGE TL:{:.4f} AVERAGE VL:{:.4f} \t AVERAGE TA:{:.2f} %  AVERAGE VA:{:.2f} %".format(
        epoch + 1, num_epochs,
        Raylu_train_loss,
        Raylu_valid_loss,
        Raylu_train_acc,
        Raylu_valid_acc))
    historycifaRaylu['Raylu_train_loss'].append(Raylu_train_loss)  #每次训练和验证记录下来，后面画图   
    historycifaRaylu['Raylu_valid_loss'].append(Raylu_valid_loss)
    historycifaRaylu['Raylu_train_acc'].append(Raylu_train_acc)
    historycifaRaylu['Raylu_valid_acc'].append(Raylu_valid_acc) 

Epoch:1/80 	 AVERAGE TL:1.6360 AVERAGE VL:1.5498 	 AVERAGE TA:39.24 %  AVERAGE VA:44.04 %
Epoch:2/80 	 AVERAGE TL:1.2425 AVERAGE VL:1.0412 	 AVERAGE TA:55.47 %  AVERAGE VA:62.48 %
Epoch:3/80 	 AVERAGE TL:1.0140 AVERAGE VL:0.9309 	 AVERAGE TA:63.95 %  AVERAGE VA:66.75 %
Epoch:4/80 	 AVERAGE TL:0.8664 AVERAGE VL:0.8680 	 AVERAGE TA:69.17 %  AVERAGE VA:69.56 %
Epoch:5/80 	 AVERAGE TL:0.7861 AVERAGE VL:0.7686 	 AVERAGE TA:72.15 %  AVERAGE VA:73.09 %
Epoch:6/80 	 AVERAGE TL:0.7069 AVERAGE VL:0.7657 	 AVERAGE TA:75.13 %  AVERAGE VA:73.74 %
Epoch:7/80 	 AVERAGE TL:0.6527 AVERAGE VL:0.6688 	 AVERAGE TA:77.09 %  AVERAGE VA:76.68 %
Epoch:8/80 	 AVERAGE TL:0.5998 AVERAGE VL:0.6221 	 AVERAGE TA:79.08 %  AVERAGE VA:79.01 %
Epoch:9/80 	 AVERAGE TL:0.5619 AVERAGE VL:0.5747 	 AVERAGE TA:80.22 %  AVERAGE VA:80.03 %
Epoch:10/80 	 AVERAGE TL:0.5191 AVERAGE VL:0.5607 	 AVERAGE TA:81.96 %  AVERAGE VA:80.89 %
Epoch:11/80 	 AVERAGE TL:0.4952 AVERAGE VL:0.5721 	 AVERAGE TA:82.87 %  AVERAGE VA:80.85 %
Epoch:12