# HPO with Optuna

In [1]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import os, random
from typing import List

import optuna
from optuna import Trial

from data import load_data

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(929)

## Import Classifiers

In [2]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier, ExtraTreesClassifier, RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB
from xgboost import XGBClassifier
from lightgbm.sklearn import LGBMClassifier
from catboost import CatBoostClassifier

METRIC = accuracy_score
METRIC_NAME = 'Accuracy'
DIRECTION = 'maximize'

## Import Regressors

In [3]:
# from sklearn.metrics import mean_squared_error
# from sklearn.linear_model import BayesianRidge, ElasticNet, OrthogonalMatchingPursuit
# from sklearn.ensemble import GradientBoostingRegressor, ExtraTreesRegressor, RandomForestRegressor
# from xgboost import XGBRegressor
# from lightgbm import LGBMRegressor
# from catboost import CatBoostRegressor

# METRIC = mean_squared_error
# METRIC_NAME = 'MSE'
# DIRECTION = 'minimize'

## Load Data

In [4]:
x_train, x_test, y_train, y_test = load_data()
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

100%|[32m██████████[0m| 400/400 [00:09<00:00, 41.73it/s]


Dataset 생성 완료
(280, 40) (120, 40) (280,) (120,)


## Optuna

In [5]:
def make_params(trial: Trial, keys: List[str]):
    params = dict()
    params['n_estimators'] = trial.suggest_int('n_estimators',100,2000,10) if 'n_estimators' in keys else None
    params['max_depth'] = trial.suggest_int('max_depth',2,16) if 'max_depth' in keys else None
    params['learning_rate'] = trial.suggest_categorical('learning_rate',[0.001,0.01,1]) if 'learning_rate' in keys else None
    params['subsample'] = trial.suggest_categorical('subsample',[0.5,0.75,1]) if 'subsample' in keys else None
    params['n_neighbors'] = trial.suggest_int('n_neighbors',2,50) if 'n_neighbors' in keys else None
    params['C'] = trial.suggest_categorical('C',[1,10,100,1000]) if 'C' in keys else None
    params['gamma'] = trial.suggest_categorical('gamma',[1e-1,1e-2,1e-3,1e-4]) if 'gamma' in keys else None
    params.update({
        'criterion': 'absolute_error',
        'random_state': 929,
        'seed': 929,
        'verbose': 0,
        'verbosity': 0,
    })
    return {key:value for key,value in params.items() if key in keys}

In [6]:
def objective(trial: Trial, model: any, keys: List[str]) -> float:
    params = make_params(trial, keys)
    model = model(**params)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    return METRIC(y_test, y_pred)

In [7]:
def print_result(study: optuna.study.Study):
    trial = study.best_trial
    print(f'{METRIC_NAME}:', trial.value)
    print('Best Hyperparameters:', trial.params)

## XGBoost

In [None]:
def objective_xgb(trial: Trial):
    keys = ['n_estimators','max_depth','seed','verbosity']
    return objective(trial, XGBClassifier, keys)

study_xgb = optuna.create_study(direction=DIRECTION)
study_xgb.optimize(objective_xgb, n_trials=100)

In [9]:
print_result(study_xgb) # 640, 10

Accuracy: 0.7083333333333334
Best Hyperparameters: {'n_estimators': 1790, 'max_depth': 15}


## LightGBM

In [None]:
def objective_lgbm(trial: Trial):
    keys = ['n_estimators','max_depth','random_state']
    return objective(trial, LGBMClassifier, keys)

study_lgbm = optuna.create_study(direction=DIRECTION)
study_lgbm.optimize(objective_lgbm, n_trials=100)

In [11]:
print_result(study_lgbm)

Accuracy: 0.725
Best Hyperparameters: {'n_estimators': 1850, 'max_depth': 9}


## CatBoost

In [None]:
def objective_cat(trial: Trial):
    keys = ['n_estimators','max_depth','random_state','verbose']
    return objective(trial, CatBoostClassifier, keys)

study_cat = optuna.create_study(direction=DIRECTION)
study_cat.optimize(objective_cat, n_trials=100)

In [None]:
print_result(study_cat)

## Gradient Boosting

In [None]:
def objective_gbr(trial: Trial):
    keys = ['n_estimators','max_depth','learning_rate','subsample','random_state']
    return objective(trial, GradientBoostingClassifier, keys)

study_gbr = optuna.create_study(direction=DIRECTION)
study_gbr.optimize(objective_gbr, n_trials=100)

In [None]:
print_result(study_gbr)

## Extra Trees

In [None]:
def objective_et(trial: Trial):
    keys = ['n_estimators','max_depth','random_state']
    return objective(trial, ExtraTreesClassifier, keys)

study_et = optuna.create_study(direction=DIRECTION)
study_et.optimize(objective_et, n_trials=100)

In [None]:
print_result(study_et)

## Random Forest

In [None]:
def objective_rf(trial: Trial):
    keys = ['n_estimators','max_depth','random_state']
    return objective(trial, RandomForestClassifier, keys)

study_rf = optuna.create_study(direction=DIRECTION)
study_rf.optimize(objective_rf, n_trials=100)

In [None]:
print_result(study_rf)

## KNN

In [None]:
def objective_knn(trial: Trial):
    keys = ['n_neighbors']
    return objective(trial, KNeighborsClassifier, keys)

study_knn = optuna.create_study(direction=DIRECTION)
study_knn.optimize(objective_knn, n_trials=49)

In [19]:
print_result(study_knn)

Accuracy: 0.6083333333333333
Best Hyperparameters: {'n_neighbors': 7}


## SVM

In [None]:
def objective_svm(trial: Trial):
    keys = ['C','gamma','random_state']
    return objective(trial, SVC, keys)

study_svm = optuna.create_study(direction=DIRECTION)
study_svm.optimize(objective_svm, n_trials=16)

In [21]:
print_result(study_svm)

Accuracy: 0.725
Best Hyperparameters: {'C': 10, 'gamma': 0.0001}


## Decision Tree

In [None]:
def objective_dt(trial: Trial):
    keys = ['max_depth','random_state']
    return objective(trial, RandomForestClassifier, keys)

study_dt = optuna.create_study(direction=DIRECTION)
study_dt.optimize(objective_dt, n_trials=100)

In [23]:
print_result(study_dt)

Accuracy: 0.725
Best Hyperparameters: {'n_estimators': 1130, 'max_depth': 12}
