## Optuna를 활용한 CNN Model의 Hyperparameter Tuning
- https://colab.research.google.com/drive/1TEILbIeyE1wW3daNWOx9rLq0Hc9TXmbV#scrollTo=Bm7t3gfI-1GF

In [71]:
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import random_split
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Dataset

In [72]:
# GPU 정보도 받으면서 GPU 설정하기
print("============================================================================================")
# set device to cpu or cuda
device = torch.device('cpu')
if(torch.cuda.is_available()): 
    device = torch.device('cuda:0') 
    torch.cuda.empty_cache()
    print("Device set to : " + str(torch.cuda.get_device_name(device)))
else:
    print("Device set to : cpu")
print("============================================================================================")

Device set to : NVIDIA GeForce GTX 1050 Ti


## 데이터

In [73]:
train_data = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
test_data = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

In [74]:
print(train_data)
print()
print(test_data)

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: ToTensor()

Dataset MNIST
    Number of datapoints: 10000
    Root location: ./data
    Split: Test
    StandardTransform
Transform: ToTensor()


In [75]:
def get_mnist_loaders(train_batch_size, test_batch_size):
    
    train_loader = DataLoader(train_data, batch_size=train_batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=test_batch_size, shuffle=False)
    
    return train_loader, test_loader

## CNN

In [76]:
class Net(nn.Module):
    def __init__(self, out_channels1, out_channels2, prob1, prob2, prob3, hidden1):
        super(Net, self).__init__()

        self.layer1 = nn.Sequential(
                                    nn.Conv2d(1, out_channels1, kernel_size=3, padding=1),  # in_channels는 절대 건들면 안 됨
                                    nn.ReLU(),
                                    nn.MaxPool2d(2),
                                    nn.Dropout2d(prob1)  )

        self.layer2 = nn.Sequential(
                                    nn.Conv2d(out_channels1, out_channels2, kernel_size=3, padding=1), # 
                                    nn.ReLU(),
                                    nn.MaxPool2d(2),
                                    nn.Dropout2d(prob2) )
        
        self.linear1 = nn.Linear(out_channels2 * 7 * 7, hidden1) # out_channels2 * 7 * 7은 CNN에서 output_size 구하는 계산 식에서 얻음
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(prob3)
        self.linear2 = nn.Linear(hidden1, 10)
        self.logsoftmax = nn.LogSoftmax(dim=1)


    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)

        x = torch.flatten(x, start_dim=1)
        x = self.relu(self.linear1(x))
        x = self.dropout(x)
        x = self.linear2(x)
        x = self.logsoftmax(x)

        return x

In [77]:
a = torch.ones(1, 1, 28, 28)
print(a.shape)

torch.Size([1, 1, 28, 28])


## TRAIN
- nll_loss 사용해서 loss가 음수로 출력됨

In [78]:
def train(log_interval, model, train_loader, optimizer, epoch, loss_func):
    model.train()

    for batch_idx, (x, y_true) in enumerate(train_loader):

        x, y_true = x.to(device), y_true.to(device)#🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨

        optimizer.zero_grad()
        y_pred = model(x) ##🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨
        loss = loss_func(y_pred, y_true) ##🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨
        loss.backward()
        optimizer.step()

        if batch_idx % log_interval == 0:
            print( f'Train Epoch: {epoch} [{batch_idx*len(x)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}' )

## TEST

In [79]:
def test(model, test_loader, loss_func):
    model.eval()
    
    test_loss = 0
    correct = 0
    
    with torch.no_grad():
        for x, y_true in test_loader:

            x, y_true = x.to(device), y_true.to(device)

            y_pred = model(x) # torch.Size([1000, 10])

            # loss_func(y_pred, y_true).item() ====> Scalar
            test_loss += loss_func(y_pred, y_true).item()  # loss에 대한 스칼라 값 얻음

            # dimension을 유지한 채로 argmax를 찾음
            pred = y_pred.argmax(dim=1, keepdim=True)

            # 그게 정답과 같은지 비교(.eq)해줌 ==> 결과는 True/False로 나옴
            # True인 것들을 합산하면, 제대로 예측한 개수를 얻을 수 있음
            correct += pred.eq(y_true.view_as(pred)).sum().item() 

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)

    print( f'\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n' )
    
    return accuracy

In [80]:
# 잘되는지 테스트 하는 곳
'''
net = Net(out_channels1=32, out_channels2=64, prob1=0.2, prob2=0.2, prob3=0.2, hidden1=128)
optimizer = optim.Adam(net.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
train_loader, test_loader = get_mnist_loaders(train_batch_size=64, test_batch_size=1000)

for epoch in range(1, 10+1):
    train(log_interval=100, model=net, train_loader=train_loader, optimizer=optimizer, epoch=epoch, loss_func=criterion)
    test(model=net, test_loader=test_loader, loss_func=criterion)
'''

'\nnet = Net(out_channels1=32, out_channels2=64, prob1=0.2, prob2=0.2, prob3=0.2, hidden1=128)\noptimizer = optim.Adam(net.parameters(), lr=0.001)\ncriterion = nn.CrossEntropyLoss()\ntrain_loader, test_loader = get_mnist_loaders(train_batch_size=64, test_batch_size=1000)\n\nfor epoch in range(1, 10+1):\n    train(log_interval=100, model=net, train_loader=train_loader, optimizer=optimizer, epoch=epoch, loss_func=criterion)\n    test(model=net, test_loader=test_loader, loss_func=criterion)\n'

## 본격적 학습

In [81]:
def train_mnist(trial):

    cfg = { #'device' : "cuda" if torch.cuda.is_available() else "cpu",##🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨
            'train_batch_size' : trial.suggest_categorical('train_batch_size', [64, 128, 256]),
            'test_batch_size' : trial.suggest_categorical('test_batch_size', [64, 128, 256]),
            'n_epochs' : 5,
            'seed' : 0,
            'log_interval' : 100, # 100번째 배치마다 loss 출력할 거임(로그 남기는 거)
            'save_model' : False,
            'lr' : trial.suggest_loguniform('lr', 1e-3, 1e-2),          
            'momentum': trial.suggest_uniform('momentum', 0.4, 0.99),
            'optimizer': trial.suggest_categorical('optimizer',['RAdam', 'Adam', 'RMSProp']),

            'out_channels1': trial.suggest_categorical('out_channels1', [16, 32, 64]),
            'out_channels2': trial.suggest_categorical('out_channels2', [16, 32, 64]),
            'prob1': trial.suggest_uniform('prob1', 0.2, 0.5),
            'prob2': trial.suggest_uniform('prob2', 0.2, 0.5),
            'prob3': trial.suggest_uniform('prob3', 0.2, 0.5),
            'hidden1': trial.suggest_categorical('hidden1', [64, 128, 256]),
            }

    # suggest_categorical에서 사용할 수 있는 건 int, str, bool, ......뿐임. 아래와 같이 작성해야 경고 문구 안 나옴.
    optimizer_candidate = {'RAdam':optim.RAdam, 'Adam':optim.Adam, 'RMSProp':optim.RMSprop}[cfg['optimizer']]


    #torch.manual_seed(cfg['seed'])
    model = Net(cfg['out_channels1'], cfg['out_channels2'], cfg['prob1'], cfg['prob2'], cfg['prob3'], cfg['hidden1']).to(device)
    optimizer = optimizer_candidate(model.parameters(), lr=cfg['lr'])
    loss_func = nn.NLLLoss().to(device)
    train_loader, test_loader = get_mnist_loaders(cfg['train_batch_size'], cfg['test_batch_size'])


    for epoch in range(1, cfg['n_epochs'] + 1):
        train(cfg['log_interval'], model, train_loader, optimizer, epoch, loss_func)
        test_accuracy = test(model, test_loader, loss_func)
        # print(test_accuracy)
        
    if cfg['save_model']:
        torch.save(model.state_dict(), "mnist_cnn.pt")

    return test_accuracy

## Optimization

In [82]:
sampler = optuna.samplers.TPESampler()
    
study = optuna.create_study(sampler=sampler, direction='maximize')
study.optimize(func=train_mnist, n_trials=20)

[32m[I 2022-08-08 20:00:36,888][0m A new study created in memory with name: no-name-73ea5c4d-6872-466e-93e6-4a697e703441[0m



Test set: Average loss: 0.0003, Accuracy: 9726/10000 (97%)


Test set: Average loss: 0.0002, Accuracy: 9827/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9843/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9889/10000 (99%)



[32m[I 2022-08-08 20:01:18,555][0m Trial 0 finished with value: 99.02 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.003038332838428516, 'momentum': 0.7675489001374012, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 32, 'prob1': 0.21765708977921416, 'prob2': 0.26984438461641297, 'prob3': 0.31985142559466134, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9902/10000 (99%)






Test set: Average loss: 0.0003, Accuracy: 9774/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9850/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9858/10000 (99%)


Test set: Average loss: 0.0001, Accuracy: 9879/10000 (99%)



[32m[I 2022-08-08 20:01:55,786][0m Trial 1 finished with value: 98.85 and parameters: {'train_batch_size': 256, 'test_batch_size': 256, 'lr': 0.0073344386654486055, 'momentum': 0.496217804407862, 'optimizer': <class 'torch.optim.adam.Adam'>, 'out_channels1': 32, 'out_channels2': 64, 'prob1': 0.46213332451334194, 'prob2': 0.3318042220703836, 'prob3': 0.4451061248321557, 'hidden1': 128}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9885/10000 (99%)






Test set: Average loss: 0.0018, Accuracy: 9659/10000 (97%)


Test set: Average loss: 0.0014, Accuracy: 9701/10000 (97%)


Test set: Average loss: 0.0011, Accuracy: 9761/10000 (98%)


Test set: Average loss: 0.0009, Accuracy: 9797/10000 (98%)



[32m[I 2022-08-08 20:02:33,213][0m Trial 2 finished with value: 98.16 and parameters: {'train_batch_size': 256, 'test_batch_size': 64, 'lr': 0.00491097103562962, 'momentum': 0.8957109948010837, 'optimizer': <class 'torch.optim.rmsprop.RMSprop'>, 'out_channels1': 32, 'out_channels2': 32, 'prob1': 0.2632662815491492, 'prob2': 0.2264663927602279, 'prob3': 0.43780786413831707, 'hidden1': 128}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0009, Accuracy: 9816/10000 (98%)






Test set: Average loss: 0.0003, Accuracy: 9792/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9863/10000 (99%)


Test set: Average loss: 0.0001, Accuracy: 9879/10000 (99%)


Test set: Average loss: 0.0001, Accuracy: 9882/10000 (99%)



[32m[I 2022-08-08 20:03:14,243][0m Trial 3 finished with value: 98.93 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.0030545557793605622, 'momentum': 0.4634149584438463, 'optimizer': <class 'torch.optim.adam.Adam'>, 'out_channels1': 64, 'out_channels2': 32, 'prob1': 0.4375660996286772, 'prob2': 0.3437007057001645, 'prob3': 0.4558095215549236, 'hidden1': 128}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9893/10000 (99%)






Test set: Average loss: 0.0016, Accuracy: 9666/10000 (97%)


Test set: Average loss: 0.0011, Accuracy: 9762/10000 (98%)


Test set: Average loss: 0.0009, Accuracy: 9805/10000 (98%)


Test set: Average loss: 0.0006, Accuracy: 9857/10000 (99%)



[32m[I 2022-08-08 20:04:04,478][0m Trial 4 finished with value: 98.67 and parameters: {'train_batch_size': 64, 'test_batch_size': 64, 'lr': 0.0017011474531837512, 'momentum': 0.7468803550658518, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 32, 'out_channels2': 32, 'prob1': 0.4448928672628161, 'prob2': 0.4942064252013733, 'prob3': 0.30593373430027254, 'hidden1': 64}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0006, Accuracy: 9867/10000 (99%)






Test set: Average loss: 0.0011, Accuracy: 9765/10000 (98%)


Test set: Average loss: 0.0009, Accuracy: 9826/10000 (98%)


Test set: Average loss: 0.0006, Accuracy: 9863/10000 (99%)


Test set: Average loss: 0.0007, Accuracy: 9854/10000 (99%)



[32m[I 2022-08-08 20:04:42,289][0m Trial 5 finished with value: 98.74 and parameters: {'train_batch_size': 256, 'test_batch_size': 64, 'lr': 0.007178165653462983, 'momentum': 0.5886980579554965, 'optimizer': <class 'torch.optim.adam.Adam'>, 'out_channels1': 16, 'out_channels2': 32, 'prob1': 0.4715309303638362, 'prob2': 0.36734460928981894, 'prob3': 0.2629812116409431, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0006, Accuracy: 9874/10000 (99%)






Test set: Average loss: 0.0005, Accuracy: 9770/10000 (98%)


Test set: Average loss: 0.0004, Accuracy: 9827/10000 (98%)


Test set: Average loss: 0.0004, Accuracy: 9829/10000 (98%)


Test set: Average loss: 0.0003, Accuracy: 9870/10000 (99%)



[32m[I 2022-08-08 20:05:28,658][0m Trial 6 finished with value: 98.85 and parameters: {'train_batch_size': 64, 'test_batch_size': 128, 'lr': 0.0021752022484455747, 'momentum': 0.8969680331951099, 'optimizer': <class 'torch.optim.adam.Adam'>, 'out_channels1': 16, 'out_channels2': 32, 'prob1': 0.29733475394544656, 'prob2': 0.4797836668176265, 'prob3': 0.27444446784310084, 'hidden1': 64}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0003, Accuracy: 9885/10000 (99%)






Test set: Average loss: 0.0005, Accuracy: 9770/10000 (98%)


Test set: Average loss: 0.0004, Accuracy: 9828/10000 (98%)


Test set: Average loss: 0.0004, Accuracy: 9840/10000 (98%)


Test set: Average loss: 0.0003, Accuracy: 9865/10000 (99%)



[32m[I 2022-08-08 20:06:11,088][0m Trial 7 finished with value: 98.77 and parameters: {'train_batch_size': 128, 'test_batch_size': 128, 'lr': 0.006866354059050057, 'momentum': 0.8558903695388249, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 64, 'out_channels2': 32, 'prob1': 0.4327045703172899, 'prob2': 0.29431343693980955, 'prob3': 0.3521549221254967, 'hidden1': 64}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0003, Accuracy: 9877/10000 (99%)






Test set: Average loss: 0.0019, Accuracy: 9607/10000 (96%)


Test set: Average loss: 0.0013, Accuracy: 9700/10000 (97%)


Test set: Average loss: 0.0010, Accuracy: 9781/10000 (98%)


Test set: Average loss: 0.0008, Accuracy: 9819/10000 (98%)



[32m[I 2022-08-08 20:06:52,738][0m Trial 8 finished with value: 98.39 and parameters: {'train_batch_size': 128, 'test_batch_size': 64, 'lr': 0.004496781966139664, 'momentum': 0.420487032660392, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 16, 'prob1': 0.3896318613823224, 'prob2': 0.4937285636682606, 'prob3': 0.2074229881607425, 'hidden1': 128}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0008, Accuracy: 9839/10000 (98%)






Test set: Average loss: 0.0021, Accuracy: 9567/10000 (96%)


Test set: Average loss: 0.0012, Accuracy: 9759/10000 (98%)


Test set: Average loss: 0.0009, Accuracy: 9840/10000 (98%)


Test set: Average loss: 0.0008, Accuracy: 9820/10000 (98%)



[32m[I 2022-08-08 20:07:30,198][0m Trial 9 finished with value: 98.59 and parameters: {'train_batch_size': 256, 'test_batch_size': 64, 'lr': 0.002275077814573021, 'momentum': 0.9283807828698443, 'optimizer': <class 'torch.optim.rmsprop.RMSprop'>, 'out_channels1': 32, 'out_channels2': 16, 'prob1': 0.41819683813225933, 'prob2': 0.2739757756949725, 'prob3': 0.4195730328594575, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0007, Accuracy: 9859/10000 (99%)






Test set: Average loss: 0.0005, Accuracy: 9530/10000 (95%)


Test set: Average loss: 0.0002, Accuracy: 9795/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9848/10000 (98%)


Test set: Average loss: 0.0001, Accuracy: 9881/10000 (99%)



[32m[I 2022-08-08 20:08:11,356][0m Trial 10 finished with value: 98.94 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.0011240098530167213, 'momentum': 0.7017341038886893, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 64, 'prob1': 0.2254439842793332, 'prob2': 0.21219538425935727, 'prob3': 0.36230999108062484, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9894/10000 (99%)






Test set: Average loss: 0.0006, Accuracy: 9544/10000 (95%)


Test set: Average loss: 0.0002, Accuracy: 9810/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9846/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9862/10000 (99%)



[32m[I 2022-08-08 20:08:52,572][0m Trial 11 finished with value: 98.88 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.0010179052119993512, 'momentum': 0.7223078223835823, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 64, 'prob1': 0.20070363651350284, 'prob2': 0.20074151246839697, 'prob3': 0.3689375874356092, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9888/10000 (99%)






Test set: Average loss: 0.0005, Accuracy: 9620/10000 (96%)


Test set: Average loss: 0.0002, Accuracy: 9801/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9859/10000 (99%)


Test set: Average loss: 0.0001, Accuracy: 9898/10000 (99%)



[32m[I 2022-08-08 20:09:33,912][0m Trial 12 finished with value: 98.8 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.0010909009072298693, 'momentum': 0.5898841393915872, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 64, 'prob1': 0.20097023853698176, 'prob2': 0.2542582567025402, 'prob3': 0.3857756653377118, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9880/10000 (99%)






Test set: Average loss: 0.0005, Accuracy: 9640/10000 (96%)


Test set: Average loss: 0.0002, Accuracy: 9823/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9861/10000 (99%)


Test set: Average loss: 0.0001, Accuracy: 9886/10000 (99%)



[32m[I 2022-08-08 20:10:15,202][0m Trial 13 finished with value: 98.83 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.0015072399383461131, 'momentum': 0.7740270403565286, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 64, 'prob1': 0.27489053367179567, 'prob2': 0.40725163768987893, 'prob3': 0.31669881185166276, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9883/10000 (99%)






Test set: Average loss: 0.0003, Accuracy: 9747/10000 (97%)


Test set: Average loss: 0.0002, Accuracy: 9808/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9864/10000 (99%)


Test set: Average loss: 0.0001, Accuracy: 9882/10000 (99%)



[32m[I 2022-08-08 20:10:56,190][0m Trial 14 finished with value: 98.77 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.004222060423749639, 'momentum': 0.6484440347560189, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 64, 'prob1': 0.3236192127583273, 'prob2': 0.2454307323189835, 'prob3': 0.3953689103931352, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0002, Accuracy: 9877/10000 (99%)






Test set: Average loss: 0.0004, Accuracy: 9669/10000 (97%)


Test set: Average loss: 0.0003, Accuracy: 9786/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9844/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9847/10000 (98%)



[32m[I 2022-08-08 20:11:37,243][0m Trial 15 finished with value: 98.83 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.0027887177960779653, 'momentum': 0.8144518790099673, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 16, 'prob1': 0.23864206224518766, 'prob2': 0.2993553046753705, 'prob3': 0.329688666012933, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9883/10000 (99%)






Test set: Average loss: 0.0012, Accuracy: 9528/10000 (95%)


Test set: Average loss: 0.0009, Accuracy: 9625/10000 (96%)


Test set: Average loss: 0.0007, Accuracy: 9714/10000 (97%)


Test set: Average loss: 0.0008, Accuracy: 9673/10000 (97%)



[32m[I 2022-08-08 20:12:23,516][0m Trial 16 finished with value: 97.57 and parameters: {'train_batch_size': 64, 'test_batch_size': 128, 'lr': 0.009896241442532249, 'momentum': 0.6704884663993561, 'optimizer': <class 'torch.optim.rmsprop.RMSprop'>, 'out_channels1': 64, 'out_channels2': 64, 'prob1': 0.34892543668463377, 'prob2': 0.23095220101298464, 'prob3': 0.4905793652502975, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0006, Accuracy: 9757/10000 (98%)






Test set: Average loss: 0.0005, Accuracy: 9607/10000 (96%)


Test set: Average loss: 0.0002, Accuracy: 9788/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9845/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9855/10000 (99%)



[32m[I 2022-08-08 20:13:04,778][0m Trial 17 finished with value: 98.93 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.0013599893952876603, 'momentum': 0.5933373357145804, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 32, 'prob1': 0.22309386441733817, 'prob2': 0.20095224562820418, 'prob3': 0.2781627639361019, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9893/10000 (99%)






Test set: Average loss: 0.0003, Accuracy: 9724/10000 (97%)


Test set: Average loss: 0.0002, Accuracy: 9830/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9861/10000 (99%)


Test set: Average loss: 0.0001, Accuracy: 9885/10000 (99%)



[32m[I 2022-08-08 20:13:45,872][0m Trial 18 finished with value: 98.91 and parameters: {'train_batch_size': 128, 'test_batch_size': 256, 'lr': 0.0020872318920785254, 'momentum': 0.9873777809420754, 'optimizer': <class 'torch.optim.radam.RAdam'>, 'out_channels1': 16, 'out_channels2': 64, 'prob1': 0.2608960588142399, 'prob2': 0.3061188312566299, 'prob3': 0.23176978866498277, 'hidden1': 256}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0001, Accuracy: 9891/10000 (99%)






Test set: Average loss: 0.0006, Accuracy: 9534/10000 (95%)


Test set: Average loss: 0.0004, Accuracy: 9667/10000 (97%)


Test set: Average loss: 0.0003, Accuracy: 9776/10000 (98%)


Test set: Average loss: 0.0002, Accuracy: 9803/10000 (98%)



[32m[I 2022-08-08 20:14:31,461][0m Trial 19 finished with value: 98.3 and parameters: {'train_batch_size': 64, 'test_batch_size': 256, 'lr': 0.0036151216702108332, 'momentum': 0.7828590183994253, 'optimizer': <class 'torch.optim.rmsprop.RMSprop'>, 'out_channels1': 64, 'out_channels2': 16, 'prob1': 0.3057144797270942, 'prob2': 0.3832311358153075, 'prob3': 0.3431025121965455, 'hidden1': 64}. Best is trial 0 with value: 99.02.[0m



Test set: Average loss: 0.0002, Accuracy: 9830/10000 (98%)



## The Best Trial 확인

In [84]:
best_trial = study.best_trial

for key, value in best_trial.params.items():
    print("{}: {}".format(key, value))

train_batch_size: 128
test_batch_size: 256
lr: 0.003038332838428516
momentum: 0.7675489001374012
optimizer: <class 'torch.optim.radam.RAdam'>
out_channels1: 16
out_channels2: 32
prob1: 0.21765708977921416
prob2: 0.26984438461641297
prob3: 0.31985142559466134
hidden1: 256


In [89]:
optuna.visualization.plot_parallel_coordinate(study,params=['lr','momentum'])

## The End