# HPO with Optuna

In [1]:
import os
root_dir = '../../' if os.getcwd().split('/')[-1] != 'customer' else './'
os.chdir(root_dir)

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import optuna
from optuna import Trial, visualization

from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import BayesianRidge, ElasticNet, OrthogonalMatchingPursuit
from sklearn.ensemble import GradientBoostingRegressor, ExtraTreesRegressor, RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

import customer_data

def NMAE(true, pred):
    mae = mean_absolute_error(true,pred)
    score = mae / np.mean(np.abs(true))
    return score

In [2]:
train_data, test_data, train_label, validation_set = customer_data.load_data()
x_train, x_test, y_train, y_test = validation_set
print(train_data.shape, test_data.shape, train_label.shape)
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(1102, 86) (1108, 86) (1102, 1)
(771, 86) (331, 86) (771, 1) (331, 1)


## LightGBM

In [3]:
def objective_lgbm(trial: Trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators',100,1000,100),
        'max_depth': trial.suggest_int('max_depth',2,16,2),
    }

    model = LGBMRegressor(**params,random_state=0)
    bst = model.fit(x_train, y_train)
    y_pred = bst.predict(x_test)
    nmae = NMAE(y_test,y_pred)
    return nmae

In [None]:
study_lgbm = optuna.create_study(direction='minimize')
study_lgbm.optimize(objective_lgbm,n_trials=100)

In [5]:
trial_lgbm = study_lgbm.best_trial
print('NMAE:', trial_lgbm.value)
print('Best Hyperparameters:', trial_lgbm.params)

NMAE: 0.20878214283479787
Best Hyperparameters: {'n_estimators': 100, 'max_depth': 8}


# XGBoost

In [31]:
def objective_xgb(trial: Trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators',100,1000,100),
        'max_depth': trial.suggest_int('max_depth',2,16,2)
    }

    model = XGBRegressor(**params,random_state=0,verbosity=0)
    bst = model.fit(x_train, y_train)
    y_pred = bst.predict(x_test)
    nmae = NMAE(y_test,y_pred)
    return nmae

In [None]:
study_xgb = optuna.create_study(direction='minimize')
study_xgb.optimize(objective_xgb,n_trials=100,show_progress_bar=True)

In [33]:
trial_xgb = study_xgb.best_trial
print('NMAE:', trial_xgb.value)
print('Best Hyperparameters:', trial_xgb.params)

NMAE: 0.20934568767583003
Best Hyperparameters: {'n_estimators': 100, 'max_depth': 4}


# CatBoost

In [40]:
def objective_cat(trial: Trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators',100,1000,100),
        'max_depth': trial.suggest_int('max_depth',2,16,2),
        'learning_rate': trial.suggest_categorical('learning_rate',[0.001,0.01,1]),
        'subsample': trial.suggest_categorical('subsample',[0.5,0.75,1])
    }

    model = CatBoostRegressor(**params,random_state=0,verbose=0)
    bst = model.fit(x_train, y_train)
    y_pred = bst.predict(x_test)
    nmae = NMAE(y_test,y_pred)
    return nmae

In [None]:
study_cat = optuna.create_study(direction='minimize')
study_cat.optimize(objective_cat,n_trials=100,show_progress_bar=True)

In [42]:
trial_cat = study_cat.best_trial
print('NMAE:', trial_cat.value)
print('Best Hyperparameters:', trial_cat.params)

NMAE: 0.206561301309736
Best Hyperparameters: {'n_estimators': 1000, 'max_depth': 8, 'learning_rate': 0.01, 'subsample': 0.5}


## Gradient Boosting

In [34]:
def objective_gbr(trial: Trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators',100,1000,100),
        'max_depth': trial.suggest_int('max_depth',2,16,2),
        'learning_rate': trial.suggest_categorical('learning_rate',[0.001,0.01,1]),
        'subsample': trial.suggest_categorical('subsample',[0.5,0.75,1])
    }

    model = GradientBoostingRegressor(**params,random_state=0)
    bst = model.fit(x_train, y_train)
    y_pred = bst.predict(x_test)
    nmae = NMAE(y_test,y_pred)
    return nmae

In [None]:
study_gbr = optuna.create_study(direction='minimize')
study_gbr.optimize(objective_gbr,n_trials=100,show_progress_bar=True)

In [11]:
trial_gbr = study_gbr.best_trial
print('NMAE:', trial_gbr.value)
print('Best Hyperparameters:', trial_gbr.params)

NMAE: 0.19033837430211062
Best Hyperparameters: {'n_estimators': 1000, 'max_depth': 6, 'learning_rate': 0.01, 'subsample': 0.5}


In [12]:
def objective_et(trial: Trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators',100,1000,100),
        'max_depth': trial.suggest_int('max_depth',2,16,2),
        'max_features': trial.suggest_categorical('max_features',['auto','sqrt','log2'])
    }

    model = ExtraTreesRegressor(**params,random_state=0,criterion='absolute_error')
    bst = model.fit(x_train, y_train)
    y_pred = bst.predict(x_test)
    nmae = NMAE(y_test,y_pred)
    return nmae

In [None]:
study_et = optuna.create_study(direction='minimize')
study_et.optimize(objective_et,n_trials=100,show_progress_bar=True)

In [14]:
trial_et = study_et.best_trial
print('NMAE:', trial_et.value)
print('Best Hyperparameters:', trial_et.params)

NMAE: 0.1912354390017572
Best Hyperparameters: {'n_estimators': 300, 'max_depth': 16, 'max_features': 'auto'}


In [15]:
def objective_rf(trial: Trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators',100,1000,100),
        'max_depth': trial.suggest_int('max_depth',2,16,2),
        'max_features': trial.suggest_categorical('max_features',['auto','sqrt','log2'])
    }

    model = RandomForestRegressor(**params,random_state=0,criterion='absolute_error')
    bst = model.fit(x_train, y_train)
    y_pred = bst.predict(x_test)
    nmae = NMAE(y_test,y_pred)
    return nmae

In [None]:
study_rf = optuna.create_study(direction='minimize')
study_rf.optimize(objective_rf,n_trials=100,show_progress_bar=True)

In [17]:
trial_rf = study_rf.best_trial
print('NMAE:', trial_rf.value)
print('Best Hyperparameters:', trial_rf.params)

NMAE: 0.20928070313991062
Best Hyperparameters: {'n_estimators': 300, 'max_depth': 14, 'max_features': 'auto'}
