## Optuna로 Hyperparameter Tuning 하기
- Optuna는 여러 Hyperparameter에 대해 자동으로 실험을 수행해주는 Library임
- trial ====> 어떤 hyperparameter에 대해 시도 한 번 해보는 걸 'trial'이라고 부름
- suggest_float ====> 범위 내에서 float 값을 hyperparameter로 선택
- study ====> 최적화 프로세스 전체를 이르는 말

In [174]:
import optuna
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

#### Hyperparameter
- Hidden Unit
- Dropout
- Weight Initialization
- learning_rate
- Momentum(Optimizer???)
- Epoch
- Batch_size
- Iteration
- Cost Function
- Regularizer

#### Data

In [175]:
df = pd.read_csv('C:/Users/user/Desktop/HEMS/heart.csv')
df.head(2)
df.shape

(918, 12)

In [176]:
df = pd.get_dummies(df)
df.head(2)

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease,Sex_F,Sex_M,ChestPainType_ASY,...,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ExerciseAngina_N,ExerciseAngina_Y,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,40,140,289,0,172,0.0,0,0,1,0,...,0,0,0,1,0,1,0,0,0,1
1,49,160,180,0,156,1.0,1,1,0,0,...,1,0,0,1,0,1,0,0,1,0


In [177]:
class Dataset(torch.utils.data.Dataset):

    def __init__(self, df):
        self.labels = [0 if label == 0 else 1 for label in df['HeartDisease']]
        self.features = df.drop(columns=['HeartDisease'], axis=1).values.tolist()

    def classes(self):
        return self.labels

    def __len__(self):
        return len(self.labels)

    def get_batch_labels(self, idx):
        return np.array(self.labels[idx])

    def get_batch_features(self, idx):
        return np.array(self.features[idx])

    def __getitem__(self, idx):
        batch_features = self.get_batch_features(idx)
        batch_y = self.get_batch_labels(idx)

        return batch_features, batch_y

#### 임의의 모델

In [178]:
def build_model_custom(trial):
    
    n_layers = trial.suggest_int("n_layers", 1, 8) # 🔥🔥🔥🔥🔥몇 단 쌓을 것인가(hyperparameter)
    layers = []

    in_features = 20 # 입력 사이즈는 데이터가 원래 그렇게 생겼기 때문에 건들면 안 됨
    
    # 🔥🔥🔥🔥여기부터🔥🔥🔥🔥

    for i in range(n_layers):
        
        out_features = trial.suggest_int("n_units_l{}".format(i), 4, 129) # 🔥🔥🔥🔥🔥 units의 수(hyperparameter)
        prob = trial.suggest_uniform("dropout_l{}".format(i), 0.0, 0.5) # 🔥🔥🔥🔥🔥 dropout의 확률(hyperparameter)
        
        layers.append(nn.Linear(in_features, out_features))
        layers.append(nn.LeakyReLU())
        layers.append(nn.Dropout(prob)) #🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨🟨

        in_features = out_features
        
    layers.append(nn.Linear(in_features, 2)) # 출력 사이즈 2는 절대 건들면 안 됨
    layers.append(nn.LeakyReLU())

    # 🔥🔥🔥🔥여기까지🔥🔥🔥🔥 Neural Network 구성
    
    return nn.Sequential(*layers) # nn.Sequential로 넘겨줌

#### Training and Validation

In [179]:
getattr(optim, 'RAdam')

torch.optim.radam.RAdam

In [180]:
EPOCHS = 30

def train_and_evaluate(param, model, trial):
    
    df = df = pd.read_csv('C:/Users/user/Desktop/HEMS/heart.csv')
    df = pd.get_dummies(df)
    
    train_data, val_data = train_test_split(df, test_size = 0.2, random_state = 42)
    train, val = Dataset(train_data), Dataset(val_data)

    train_dataloader = torch.utils.data.DataLoader(train, batch_size=2, shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(val, batch_size=2)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    criterion = nn.CrossEntropyLoss()


    #🔥🔥🔥🔥🔥optimizer 뭐 쓸거냐🔥🔥🔥🔥🔥🔥
    #🔥🔥🔥🔥🔥learning_rate 몇으로 할거냐🔥🔥🔥🔥🔥🔥
    optimizer = getattr(optim, param['optimizer'])(model.parameters(), lr= param['learning_rate']) 
    if use_cuda:

            model = model.cuda()
            criterion = criterion.cuda()


    for epoch_num in range(EPOCHS):

            total_acc_train = 0
            total_loss_train = 0

            for train_input, train_label in train_dataloader:
                model.train()

                train_label = train_label.to(device)
                train_input = train_input.to(device)

                output = model(train_input.float())
                
                batch_loss = criterion(output, train_label.long())
                total_loss_train += batch_loss.item()
                
                acc = (output.argmax(dim=1) == train_label).sum().item()
                total_acc_train += acc

                model.zero_grad()
                batch_loss.backward()
                optimizer.step()
            
            total_acc_val = 0
            total_loss_val = 0

            with torch.no_grad():

                for val_input, val_label in val_dataloader:
                    model.eval()

                    val_label = val_label.to(device)
                    val_input = val_input.to(device)

                    output = model(val_input.float())

                    batch_loss = criterion(output, val_label.long())
                    total_loss_val += batch_loss.item()
                    
                    acc = (output.argmax(dim=1) == val_label).sum().item()
                    total_acc_val += acc
            
            accuracy = total_acc_val/len(val_data)
            
            # Add prune mechanism
            trial.report(accuracy, epoch_num)

            if trial.should_prune():
                raise optuna.exceptions.TrialPruned()

    return accuracy

#### optuna.create_study
- direction ====> ROC, AUC 같은 성능 지표 향상이 목적이면 'maximize'
- direction ====> loss, MSE 같은 error 줄이는 게 목적이면 'minimize'
- sampler ====> hyperparameter를 어떤 방식으로 뽑아올 거냐(선택할 거냐)
- GridSampler ====> search space 내에서 combination 함
- RandomSampler ====> search space 내에서 걍 random하게 뽑아옴
- TPESampler ====> Bayesian hyperparameter optimization(Default)
- pruner ====> 시간 절약을 위해 사용
- PercentilePruner ====> 현재 trial의 intermediate value가 같은 training epochs 동안 백분위 하위에 속하면 prune함

#### Objective Function

In [181]:
# Search space Definition

EPOCHS = 30

def objective(trial):

        params = {'learning_rate': trial.suggest_loguniform('learning_rate', 1e-5, 1e-1),
                'optimizer': trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "RAdam"]) }

        model = build_model_custom(trial)

        accuracy = train_and_evaluate(params, model, trial)

        return accuracy

#### Bayesian Optimization

In [182]:
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(), \
                            pruner=optuna.pruners.PercentilePruner(25))

study.optimize(objective, n_trials=150)

[32m[I 2022-08-07 15:48:06,237][0m A new study created in memory with name: no-name-e1208630-6902-4de2-9219-c2836cf5f62e[0m
[32m[I 2022-08-07 15:48:48,655][0m Trial 0 finished with value: 0.7228260869565217 and parameters: {'learning_rate': 0.0019318668009019153, 'optimizer': 'Adam', 'n_layers': 6, 'n_units_l0': 44, 'dropout_l0': 0.3971839612138412, 'n_units_l1': 97, 'dropout_l1': 0.24221758879174382, 'n_units_l2': 36, 'dropout_l2': 0.35823508508652563, 'n_units_l3': 65, 'dropout_l3': 0.29434225448489476, 'n_units_l4': 73, 'dropout_l4': 0.1544305315497076, 'n_units_l5': 97, 'dropout_l5': 0.16105879503190002}. Best is trial 0 with value: 0.7228260869565217.[0m
[32m[I 2022-08-07 15:49:29,455][0m Trial 1 finished with value: 0.6413043478260869 and parameters: {'learning_rate': 0.00011375979154043635, 'optimizer': 'Adam', 'n_layers': 6, 'n_units_l0': 82, 'dropout_l0': 0.32896282830094326, 'n_units_l1': 76, 'dropout_l1': 0.3263020982917403, 'n_units_l2': 4, 'dropout_l2': 0.197281127

#### Best Trial 확인

In [183]:
best_trial = study.best_trial

for key, value in best_trial.params.items():
    print("{}: {}".format(key, value))

learning_rate: 0.009401875489771557
optimizer: RMSprop
n_layers: 1
n_units_l0: 51
dropout_l0: 0.2889951742114946


#### Visualization

In [184]:
# objective function에 대한 시각화(여기서는 Accuracy)
optuna.visualization.plot_intermediate_values(study)

In [185]:
# oprimization history에 대한 시각화
optuna.visualization.plot_optimization_history(study)

In [186]:
# 걍 해당하는 값들을 연결한 선임
# 이걸로 얻을 수 있는 insight는 없음
optuna.visualization.plot_parallel_coordinate(study)

In [187]:
optuna.visualization.plot_param_importances(study)

In [188]:
optuna.visualization.plot_contour(study)

## The End