# Optuna
Optunaとは，Preferred Networks発の 機械学習におけるハイパーパラメータの自動最適化（チューニング）を行うフレームワークのこと。  
目的関数に対して，適当なハイパーパラメータを使って評価を繰り返し，その目的関数が最小となる最適なハイパーパラメータを探し出す．


Optunaを使ってハイパーパラメータの中で最も良いものを探索してみることを目標とする。

今回は簡単な課題としてMNISTを用いてチューニングを実行してみる。



# 目的関数の定義
最小化する目的関数を設定しないといけないが、これはobjective()関数で定義する。これは引数としてtrialオブジェクトが必要

    def objective(trial):

        return

# trialオブジェクトの定義

- カテゴリの試行を行うパラメータ
param1 = trial.suggest_categorical(name, choices)

- 整数値の試行を行うパラメータ
param2 = trial.suggest_int(name, low, high)

- 連続値の試行を行うパラメータ
param3 = trial.suggest_uniform(name, low, high)

- 離散値の試行を行うパラメータ
param4 = trial.suggest_discrete_uniform(name, low, high, q)

- 対数値の試行を行うパラメータ
param5 = trial.suggest_loguniform(name, low, high)  



nameはstr型でありパラメータの名前を指定し，choicesはlist型であり複数のカテゴリ名の選択肢として提示する引数となる。low，highではパラメータの最小値と最大値を提示し、qによってその値間を試行する間隔を設定する。

# Studyオブジェクトの定義

studyオブジェクトに最適化の結果が表示される。

    study=optuna.create_study()

それにoptimizerメソッドを用いることで最適なハイパーパラメータを求めることができる。

    study.optimizer(objective,n_trailas=100)

optimizeメソッドの第1引数は目的関数であるobjective関数となり，第2引数は試行回数となる．studyオブジェクト内でtrialの処理が行われるため，optimizeメソッドを実行するだけで自動的に目的関数の最小値およびそのハイパーパラメータを探してくれる。

最適化の結果は以下で見ることができる。

- 最適化したハイパーパラメータの結果
study.best_params

- 最適化後の目的関数の値
study.best_value

- 全試行過程
study.trials

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
import os
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import mlflow
from torchvision import transforms,datasets

In [2]:

BATCHSIZE = 128

transform = transforms.Compose([transforms.ToTensor()])

dataloader_train=torch.utils.data.DataLoader(
    datasets.MNIST('../data/mnist',train=True,download=True,transform=transform),
    batch_size=BATCHSIZE,
    shuffle=True
)

dataloader_valid=torch.utils.data.DataLoader(
    datasets.MNIST('../data/mnist/',train=False,download=True,transform=transform),
    batch_size=BATCHSIZE,
    shuffle=True
)

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import optuna
optuna.logging.disable_default_handler()


#モデルの定義

#入力画像の高さと幅，畳み込み層のカーネルサイズ
in_height = 28
in_width = 28
kernel = 3
class Net(nn.Module):
  def __init__(self, trial, num_layer, mid_units, num_filters):
    super(Net, self).__init__()
    self.activation = get_activation(trial)
    #第1層
    self.convs = nn.ModuleList([nn.Conv2d(in_channels=1, out_channels=num_filters[0], kernel_size=3)])
    self.out_height = in_height - kernel +1
    self.out_width = in_width - kernel +1
    #第2層以降
    for i in range(1, num_layer):
      self.convs.append(nn.Conv2d(in_channels=num_filters[i-1], out_channels=num_filters[i], kernel_size=3))
      self.out_height = self.out_height - kernel + 1
      self.out_width = self.out_width - kernel +1
    #pooling層
    self.pool = nn.AvgPool2d(kernel_size=2, stride=2)
    self.out_height = int(self.out_height / 2)
    self.out_width = int(self.out_width / 2)
    #線形層
    self.out_feature = self.out_height * self.out_width * num_filters[num_layer - 1]
    self.fc1 = nn.Linear(in_features=self.out_feature, out_features=mid_units) 
    self.fc2 = nn.Linear(in_features=mid_units, out_features=10)

  def forward(self, x):
    for i,l in enumerate(self.convs):
      x = l(x)
      x = self.activation(x)
    x = self.pool(x)
    x = x.view(-1, self.out_feature)
    x = self.fc1(x)
    x = self.fc2(x)
    return F.log_softmax(x, dim=1)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
def train(model, device, train_loader, optimizer):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()

def test(model, device, test_loader):
    model.eval()
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            pred = output.max(1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
    return 1 - correct / len(test_loader.dataset)

In [5]:
import torch.optim as optim

def get_optimizer(trial, model):
    optimizer_names = ['Adam', 'MomentumSGD', 'rmsprop']
    optimizer_name = trial.suggest_categorical('optimizer', optimizer_names)
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-10, 1e-3)
    if optimizer_name == optimizer_names[0]: 
        adam_lr = trial.suggest_loguniform('adam_lr', 1e-5, 1e-1)
        optimizer = optim.Adam(model.parameters(), lr=adam_lr, weight_decay=weight_decay)
    elif optimizer_name == optimizer_names[1]:
        momentum_sgd_lr = trial.suggest_loguniform('momentum_sgd_lr', 1e-5, 1e-1)
        optimizer = optim.SGD(model.parameters(), lr=momentum_sgd_lr, momentum=0.9, weight_decay=weight_decay)
    else:
        optimizer = optim.RMSprop(model.parameters())
    return optimizer


In [6]:
def get_activation(trial):
    activation_names = ['ReLU', 'ELU']
    activation_name = trial.suggest_categorical('activation', activation_names)
    if activation_name == activation_names[0]:
        activation = F.relu
    else:
        activation = F.elu
    return activation

In [7]:
EPOCH = 10
def objective(trial):
    device = "cuda" if torch.cuda.is_available() else "cpu"

#畳み込み層の数
    num_layer = trial.suggest_int('num_layer', 1, 3)

#FC層のユニット数
    mid_units = int(trial.suggest_discrete_uniform("mid_units", 100, 500, 100))

#各畳込み層のフィルタ数
    num_filters = [int(trial.suggest_discrete_uniform("num_filter_"+str(i), 16, 128, 16)) for i in range(num_layer)]

    model = Net(trial, num_layer, mid_units, num_filters).to(device)
    optimizer = get_optimizer(trial, model)

    for step in range(EPOCH):
        train(model, device, dataloader_train, optimizer)
        error_rate = test(model, device, dataloader_valid)

    return error_rate
#目的関数の定義だが、Optunaは目的関数が小さくなるようにハイパーパラメータをチューニングするため
# objective関数は認識率でなく誤り率error_rateを返すよう定義。

In [8]:
TRIAL_SIZE = 100
study = optuna.create_study()
study.optimize(objective, n_trials=TRIAL_SIZE,n_jobs=16)



In [None]:
print(study.best_params)

{'num_layer': 5, 'mid_units': 400.0, 'num_filter_0': 32.0, 'num_filter_1': 48.0, 'num_filter_2': 64.0, 'num_filter_3': 112.0, 'num_filter_4': 128.0, 'activation': 'ReLU', 'optimizer': 'Adam', 'weight_decay': 7.1955149991328544e-06, 'adam_lr': 0.0007119522124205308}


In [None]:

in_height=28
in_width=28


class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()

        self.conv_layers=nn.Sequential()

        if study.best_params['activation']=='ReLU':
            Activation=nn.ReLU()
        else:
            Activation=nn.ELU()

        self.conv_layers.add_module(nn.Conv2d(1,study.best_params['num_filter'+str(i)]),3)
        self.conv_layers.add_module(nn.BatchNorm2d(study.best_params['num_filter'+str(i)]))
        self.conv_layers.add_module(nn.AvgPool2d(2))
        self.conv_layers.add_module(Activation)

        self.in_height = in_height-2         
        self.in_width = in_width-2
        
        for i in range(1,study.best_paemas['num_layer']):
            self.conv_layers.add_module(f"conv_{i+1}",nn.Conv2d(study.best_params['num_filter'+str(i)],study.best_parmas['num_filter'+str(i+1)],3))
            self.in_height = in_height-2         
            self.in_width = in_width-2
            self.conv_layers.add_module(f'batch_norm_{i+1}',nn.BatchNorm2d(study.bast_params['num_filter'+str(i+1)]))
            self.conv_layers.add_module(f'relu_{i+1}',Activation)
            self.conv_layers.add_module(nn.AvgPool2d(2))
            self.out_height = int(self.out_height / 2)
            self.out_width = int(self.out_width / 2)
            
        self.fc_layers=nn.Sequential(
            nn.Linear(self.out_height * self.out_width * study.best_params['num_filters'+str(study.best_paemas['num_layer'])]),
            nn.ReLU(),
            nn.Linear(study.best_params['num_filters'+str(study.best_paemas['num_layer'])],10)            
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x


In [None]:
config={
    'batch_size':100,
    'epochs':100
}

In [None]:
def init_weights(m):  # Heの初期化
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        torch.nn.init.kaiming_normal_(m.weight)
        m.bias.data.fill_(0.0)

model=CNN()
model.apply(init_weights)

batch_size = config['batch_size']
device = 'cuda'

Model=model.to(device)
optimizer = optim.Adam(Model.parameters(), lr=study.best_params['adam_lr'],weight_decay=study.best_param['weight_decay'])
loss_function = nn.CrossEntropyLoss()

UnboundLocalError: cannot access local variable 'i' where it is not associated with a value