In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import optuna

class Net(nn.Module):

    def __init__(self, trial):
        super(Net, self).__init__()
        
        num_layer = trial.suggest_int('num_layer', 3, 7)
        
        layers = []
        num_input = 300
        num_output_last = 4
        
        for i in range(num_layer):
            power_two = 1
            
            while power_two < num_input:
                power_two *= 2
            if i == num_layer-1:
                num_output = num_output_last
            else:
                num_output = power_two / 2
            layers.append(nn.Linear(int(num_input), int(num_output)))
            num_input = num_output
            
        self.layers = nn.ModuleList(layers)
        
    def forward(self, x):
        for i, layer in enumerate(self.layers):
            if i == len(self.layers) - 1:
                x = layer(x)
            else:
                x = F.relu(layer(x))
        return x

In [2]:
def make_optimizer(trial, net):
    lr = trial.suggest_loguniform('learning_rate', 1e-3, 1e1)
    wd = trial.suggest_loguniform('weight_decay', 1e-10, 1e-6)
    
    optimizer = optim.SGD(net.parameters(), lr=lr, weight_decay = wd)
    return optimizer

In [3]:
def make_dataset(trial):
    bs = trial.suggest_int('batch_size', 16, 256)
    
    queue_x = torch.from_numpy(np.load('results/70_result_queue_x_train.npy')).float()
    queue_y = torch.from_numpy(np.load('results/70_result_queue_y_train.npy')).long()
    dataset = torch.utils.data.TensorDataset(queue_x, queue_y)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=bs)
    queue_x_valid = torch.from_numpy(np.load('results/70_result_queue_x_valid.npy')).float()
    queue_y_valid = torch.from_numpy(np.load('results/70_result_queue_y_valid.npy')).long()
    
    return dataloader, queue_x_valid, queue_y_valid

In [4]:
def train(net, criterion, optimizer, dataloader):
    for epoch in range(100):
        running_loss = 0
        
        for data in dataloader:
            x, y = data
            optimizer.zero_grad()
            pre_y = net(x)
            loss = criterion(pre_y, y)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            
    return running_loss / len(dataloader)

In [5]:
def valid(net, criterion, queue_x_valid, queue_y_valid):
    pre_queue_y_valid = net(queue_x_valid)
    loss_valid = criterion(pre_queue_y_valid, queue_y_valid)
    pre_labels = torch.max(pre_queue_y_valid, 1)[1]
    accuracy = (pre_labels == queue_y_valid).sum().item() / len(queue_y_valid)
    
    return accuracy

In [6]:
def objective(trial):
    net = Net(trial)
    criterion = nn.CrossEntropyLoss()
    optimizer = make_optimizer(trial, net)
    
    dataloader, queue_x_valid, queue_y_valid = make_dataset(trial)
    
    train(net, criterion, optimizer, dataloader)
    
    accuracy = valid(net, criterion, queue_x_valid, queue_y_valid)
    
    return 1 - accuracy

In [7]:
study = optuna.create_study()
study.optimize(objective, n_trials=100)

[I 2020-04-26 20:51:45,194] Finished trial#0 with value: 0.24137931034482762 with parameters: {'num_layer': 5, 'learning_rate': 0.01732830658239056, 'weight_decay': 8.401974096500266e-08, 'batch_size': 219}. Best is trial#0 with value: 0.24137931034482762.
[I 2020-04-26 20:52:22,991] Finished trial#1 with value: 0.5839580209895052 with parameters: {'num_layer': 6, 'learning_rate': 0.8325541833231815, 'weight_decay': 1.4695232484499537e-07, 'batch_size': 50}. Best is trial#0 with value: 0.24137931034482762.
[I 2020-04-26 20:52:47,594] Finished trial#2 with value: 0.16116941529235385 with parameters: {'num_layer': 5, 'learning_rate': 0.02158948078724805, 'weight_decay': 5.374336742811176e-08, 'batch_size': 105}. Best is trial#2 with value: 0.16116941529235385.
[I 2020-04-26 20:53:12,776] Finished trial#3 with value: 0.22488755622188905 with parameters: {'num_layer': 5, 'learning_rate': 0.009270453878670148, 'weight_decay': 3.1790230101594304e-10, 'batch_size': 100}. Best is trial#2 with 

[I 2020-04-26 21:18:26,077] Finished trial#62 with value: 0.13043478260869568 with parameters: {'num_layer': 4, 'learning_rate': 1.0593846747448965, 'weight_decay': 2.641981084980579e-08, 'batch_size': 256}. Best is trial#16 with value: 0.11694152923538226.
[I 2020-04-26 21:18:44,750] Finished trial#63 with value: 0.15217391304347827 with parameters: {'num_layer': 4, 'learning_rate': 0.03669233694075693, 'weight_decay': 1.14041429600516e-07, 'batch_size': 233}. Best is trial#16 with value: 0.11694152923538226.
[I 2020-04-26 21:19:03,376] Finished trial#64 with value: 0.13268365817091454 with parameters: {'num_layer': 4, 'learning_rate': 0.2712959124139724, 'weight_decay': 2.8362285365403683e-07, 'batch_size': 216}. Best is trial#16 with value: 0.11694152923538226.
[I 2020-04-26 21:19:22,917] Finished trial#65 with value: 0.14767616191904043 with parameters: {'num_layer': 7, 'learning_rate': 0.1198941303703335, 'weight_decay': 4.145698356965399e-08, 'batch_size': 248}. Best is trial#16 

In [8]:
print('best value: {0}'.format(study.best_value))
print('best params: {0}'.format(study.best_params))

best value: 0.11694152923538226
best params: {'num_layer': 4, 'learning_rate': 0.42201658501567807, 'weight_decay': 1.1561179942699311e-08, 'batch_size': 42}
