# Example 08: PyTorch Parameter-optimization with Optuna

## 事前準備

In [1]:
import torch

# GPUが使えるか確認してデバイスを設定
# NOTE: `x = x.to(device) ` とすることで対象のデバイスに切り替え可能
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## CNN

In [2]:
import optuna
import torch
import torchinfo
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms
from torchvision.datasets import CIFAR10
from tqdm.notebook import tqdm

import matplotlib.pyplot as plt

### DataLoader設定

In [3]:
# transformを準備
affine = transforms.RandomAffine((-30, 30), scale=(0.8, 1.2))
flip = transforms.RandomHorizontalFlip(p=0.5)
normalize = transforms.Normalize((0.0, 0.0, 0.0), (1.0, 1.0, 1.0))  # 平均0、標準偏差1

transform_train = transforms.Compose([
    affine,
    flip,
    transforms.ToTensor(),
    normalize
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    normalize
])

In [4]:
# DataLoader作成
cifar10_train = CIFAR10(root='../cache/data', train=True, download=True, transform=transform_train)
cifar10_test = CIFAR10(root='../cache/data', train=False, download=True, transform=transform_test)
cifar10_classes = cifar10_train.classes

In [5]:
# DataLoaderの設定
batch_size = 128
train_loader = DataLoader(cifar10_train, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(cifar10_test, batch_size=batch_size, shuffle=False)

In [6]:
len(cifar10_train), len(cifar10_test)

(50000, 10000)

### モデル構築

`dropout_prob`, `activation_func` を自動チューニングするため、コンストラクタで設定できるようにモデルクラスを作成する。

In [7]:
class Net(nn.Module):
    def __init__(self, n_classes: int,
                       dropout_prob: float = 0.5,
                       activation_func: nn.Module = nn.ReLU()):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, 5)         # 入力チャネル、出力チャネル、フィルタ数
        self.active = activation_func
        self.pool = nn.MaxPool2d(2, 2)          # 領域のサイズ、領域の間隔
        self.conv2 = nn.Conv2d(8, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 256)
        self.dropout = nn.Dropout(dropout_prob)          # ドロップアウト率
        self.fc2 = nn.Linear(256, n_classes)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.active(self.conv1(x))
        x = self.pool(x)
        x = self.active(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 16 * 5 * 5)
        x = self.active(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [8]:
net = Net(len(cifar10_classes))

### 訓練 & 検証用関数を作成

自動パラメータチューニングで作成するoptunaの目的関数で使用するため、訓練 & 検証用の関数を作成する。

In [9]:
def train(net: nn.Module, train_loader: DataLoader, optimizer: optim.Optimizer, criterion: nn.Module = nn.CrossEntropyLoss(), verbose: bool = True) -> float:
    net.train()
    loss_train = 0.0

    # verbose がTrueの場合はプログレスバーを表示する
    iterator = tqdm(train_loader) if verbose else train_loader
    
    for (x, t) in iterator:
        x, t = x.to(device), t.to(device)
        y = net(x)

        loss = criterion(y, t)
        loss_train += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    loss_train /= len(train_loader)
    return loss_train

In [10]:
def test(net: nn.Module, test_loader: DataLoader, criterion: nn.Module = nn.CrossEntropyLoss(), verbose: bool = True) -> float:
    net.eval()
    loss_test = 0.0

    # verbose がTrueの場合はプログレスバーを表示する
    iterator = tqdm(train_loader) if verbose else train_loader
    
    for (x, t) in iterator:
        x, t = x.to(device), t.to(device)
        y = net(x)

        loss = criterion(y, t)
        loss_test += loss.item()

    loss_test /= len(test_loader)
    return loss_test

### 自動チューニング設定(optuna)

今回は以下のパラメータをチューニング対象とする。

- 最適化アルゴリズム
- 活性化関数
- ドロップアウト率

#### Optimizer

In [11]:
def get_adam_optimizer(trial: optuna.trial.Trial, model: nn.Module) -> optim.Optimizer:
    lr = trial.suggest_float('adam_lr', 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float('adam_weight_decay', 1e-10, 1e-3)
    optimizer = optim.Adam(model.parameters(),
                           lr=lr,
                           weight_decay=weight_decay)
    return optimizer

def get_momentum_sgd_optimizer(trial: optuna.trial.Trial, model: nn.Module) -> optim.Optimizer:
    lr = trial.suggest_float('momentum_sgd_lr', 1e-5, 1e-1, log=True)
    weight_decay = trial.suggest_float('momentum_sgd_weight_decay', 1e-10, 1e-3, log=True)
    optimizer = optim.SGD(model.parameters(),
                          lr=lr,
                          momentum=0.9,
                          weight_decay=weight_decay)
    return optimizer

def get_rms_prob_optimizer(trial: optuna.trial.Trial, model: nn.Module) -> optim.Optimizer:
    lr = trial.suggest_float('rms_prob_lr', 1e-5, 1e-1, log=True)
    optimizer = optim.RMSprop(model.parameters(), lr=lr)
    return optimizer

In [12]:
def get_optimizer(trial: optuna.trial.Trial, model: nn.Module) -> optim.Optimizer:
    optimizer_names = ['Adam', 'MomentumSGD', 'rmsprop']
    optimizer_name = trial.suggest_categorical('optimizer', optimizer_names)
    
    if optimizer_name == 'Adam': 
        optimizer = get_adam_optimizer(trial, model)
    elif optimizer_name == 'MomentumSGD':
        optimizer = get_momentum_sgd_optimizer(trial, model)
    else:
        optimizer = get_rms_prob_optimizer(trial, model)
    
    return optimizer

#### 活性化関数

In [13]:
def get_activation(trial: optuna.trial.Trial) -> nn.Module:
    activation_names = ['ReLU', 'Tanh']
    activation_name = trial.suggest_categorical('activation', activation_names)
    
    if activation_name == 'ReLU':
        activation = nn.ReLU()
    else:
        activation = nn.Tanh()
    return activation

#### 目的関数(クラス)の定義

評価時の誤り率を最小化するような目的関数を作成する。

In [14]:
class Objective:
    def __init__(self, n_traial: int, train_loader: DataLoader, test_loader: DataLoader, n_class: int, n_epoch: int = 10):
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.n_class = n_class
        self.n_epoch = n_epoch
        self.progress_bar = tqdm(total=n_traial)
        
    def __call__(self, trial: optuna.trial.Trial) -> float:
        dropout_prob = trial.suggest_float("dropout_prob", 0.2, 0.8, step=0.1)
        activation = get_activation(trial)
  
        net = Net(self.n_class, dropout_prob, activation).to(device)
        optimizer = get_optimizer(trial, net)

        criterion = nn.CrossEntropyLoss()
        
        for epoch in range(self.n_epoch):
            train(net, train_loader, optimizer, criterion, verbose=False)
            loss = test(net, test_loader, criterion, verbose=False)
        
        self.progress_bar.update(1)
        
        return loss

### 自動チューニング実施(optuna)

In [15]:
n_trials = 30

n_epoch = 10
n_class = len(cifar10_classes)

In [16]:
# 誤り率の最小化を行うため、direction='minimize'に設定
objective = Objective(n_trials, train_loader, test_loader, n_class, n_epoch)
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=n_trials)

  0%|          | 0/30 [00:00<?, ?it/s]

[I 2025-08-22 00:43:47,336] A new study created in memory with name: no-name-22aca879-1f2e-4856-897a-29f5915adc6f
[I 2025-08-22 00:46:01,474] Trial 0 finished with value: 11.87208314183392 and parameters: {'dropout_prob': 0.2, 'activation': 'Tanh', 'optimizer': 'rmsprop', 'rms_prob_lr': 0.005442623693178118}. Best is trial 0 with value: 11.87208314183392.
[I 2025-08-22 00:48:15,549] Trial 1 finished with value: 8.583642515955091 and parameters: {'dropout_prob': 0.8, 'activation': 'ReLU', 'optimizer': 'Adam', 'adam_lr': 5.379379269473184e-05, 'adam_weight_decay': 0.0008491901632931595}. Best is trial 1 with value: 8.583642515955091.
[I 2025-08-22 00:50:27,046] Trial 2 finished with value: 9.786721526821957 and parameters: {'dropout_prob': 0.7, 'activation': 'ReLU', 'optimizer': 'Adam', 'adam_lr': 1.4116438789115086e-05, 'adam_weight_decay': 0.000986737591616569}. Best is trial 1 with value: 8.583642515955091.
[I 2025-08-22 00:52:39,358] Trial 3 finished with value: 7.5367707315879535 an

In [17]:
study.best_value

5.834608320948444

In [18]:
study.best_params

{'dropout_prob': 0.2,
 'activation': 'Tanh',
 'optimizer': 'Adam',
 'adam_lr': 0.0020255100798272398,
 'adam_weight_decay': 1.6714355364919236e-05}

In [19]:
df = study.trials_dataframe() # pandasのDataFrame形式
df.sort_values('value')

Unnamed: 0,number,value,datetime_start,datetime_complete,duration,params_activation,params_adam_lr,params_adam_weight_decay,params_dropout_prob,params_momentum_sgd_lr,params_momentum_sgd_weight_decay,params_optimizer,params_rms_prob_lr,state
24,24,5.834608,2025-08-22 01:36:46.177674,2025-08-22 01:39:02.205869,0 days 00:02:16.028195,Tanh,0.002026,1.7e-05,0.2,,,Adam,,COMPLETE
11,11,6.419107,2025-08-22 01:08:04.001777,2025-08-22 01:10:15.837520,0 days 00:02:11.835743,ReLU,0.00158,3.3e-05,0.5,,,Adam,,COMPLETE
16,16,6.585788,2025-08-22 01:19:05.253054,2025-08-22 01:21:16.508186,0 days 00:02:11.255132,ReLU,,,0.5,0.032739,1.639823e-08,MomentumSGD,,COMPLETE
26,26,6.588651,2025-08-22 01:41:14.888778,2025-08-22 01:43:28.139266,0 days 00:02:13.250488,Tanh,0.002795,0.000169,0.6,,,Adam,,COMPLETE
21,21,6.668511,2025-08-22 01:30:06.538818,2025-08-22 01:32:19.493055,0 days 00:02:12.954237,Tanh,0.000594,0.000613,0.2,,,Adam,,COMPLETE
18,18,6.73499,2025-08-22 01:23:29.325277,2025-08-22 01:25:42.283919,0 days 00:02:12.958642,Tanh,0.000575,0.000604,0.3,,,Adam,,COMPLETE
20,20,6.825363,2025-08-22 01:27:53.682796,2025-08-22 01:30:06.538389,0 days 00:02:12.855593,ReLU,,,0.8,0.015874,1.348938e-05,MomentumSGD,,COMPLETE
28,28,7.028982,2025-08-22 01:45:43.939905,2025-08-22 01:47:55.497247,0 days 00:02:11.557342,Tanh,,,0.6,,,rmsprop,0.000334,COMPLETE
23,23,7.081546,2025-08-22 01:34:32.884205,2025-08-22 01:36:46.177193,0 days 00:02:13.292988,Tanh,0.000243,0.000336,0.4,,,Adam,,COMPLETE
8,8,7.09246,2025-08-22 01:01:27.470736,2025-08-22 01:03:39.489577,0 days 00:02:12.018841,ReLU,,,0.5,0.004072,1.921871e-09,MomentumSGD,,COMPLETE
