In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import optuna

from optuna.samplers import TPESampler
from tqdm import tqdm

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_curve, auc

import xgboost as xgb
import psutil
import time

In [None]:
real_score = []

def objective(trial, data=train_X, target=train_Y):
    
    score = []
    kf = StratifiedKFold(n_splits = 10, random_state = 42 , shuffle = True)

    for train_fold, test_fold in tqdm(kf.split(train_X, train_Y), desc = 'k_fold'):
        X_train, X_test, y_train, y_test = train_X.iloc[train_fold], train_X.iloc[test_fold], train_Y[train_fold], train_Y[test_fold] 
        params = {
            "objective": "multi:softprob",
            "eval_metric":'mlogloss', # ['auc', 'error']
            "booster": 'gbtree', 
            #'tree_method':'gpu_hist', 'predictor':'gpu_predictor', 'gpu_id': 0, # use this line for gpu usage.
            "tree_method": 'exact', 'gpu_id': -1,  # CPU 사용시 
            "verbosity": 0,
            'num_class':3,
            "max_depth": trial.suggest_int("max_depth", 4, 10), 
            "learning_rate": trial.suggest_uniform('learning_rate', 0.0001, 0.99),
            'n_estimators': trial.suggest_int("n_estimators", 1000, 10000, step=100), 
            "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0), 
            "colsample_bylevel": trial.suggest_float("colsample_bylevel", 0.5, 1.0),
            "colsample_bynode": trial.suggest_float("colsample_bynode", 0.5, 1.0),
            "reg_lambda": trial.suggest_loguniform("reg_lambda", 1e-2, 1),
            "reg_alpha": trial.suggest_loguniform("reg_alpha", 1e-2, 1),
            'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1.0, 0.05),      
            'min_child_weight': trial.suggest_int('min_child_weight', 2, 15),
            "gamma": trial.suggest_float("gamma", 0.1, 1.0, log=True),
            # 'num_parallel_tree': trial.suggest_int("num_parallel_tree", 1, 500)
        }

        model = xgb.XGBClassifier(**params)  
        
        model.fit(X_train,y_train,eval_set=[(X_test,y_test)],verbose=False)
        
        preds = model.predict_proba(X_test)[:, 1]
        fpr, tpr, threshold = roc_curve(y_test, preds)
        auc_score = auc(fpr, tpr)
        score.append(auc_score)
        real_score.append(auc_score)

    return np.mean(score)


'''
0.8558211379019417 and parameters: {'max_depth': 10, 'learning_rate': 0.11423799062437275, 'n_estimators': 6900, 'colsample_bytree': 0.7390924842332742, 'colsample_bylevel': 0.6940109759294105, 'colsample_bynode': 0.5496585400738032, 'reg_lambda': 0.9713492867033704, 'reg_alpha': 0.8147232205798981, 'subsample': 0.85, 'min_child_weight': 5, 'gamma': 0.16280391709890213}
0.8765662376664448 and parameters: {'max_depth': 10, 'learning_rate': 0.025929426661789656, 'n_estimators': 6600, 'colsample_bytree': 0.7155456762839627, 'colsample_bylevel': 0.6123744502366096, 'colsample_bynode': 0.8016499020606728, 'reg_lambda': 0.0935399629775155, 'reg_alpha': 0.018771877229402035, 'subsample': 0.8, 'min_child_weight': 4, 'gamma': 0.6677845409584143}
'''

sampler = TPESampler(seed = 42)

optim = optuna.create_study(direction="maximize")

optim.optimize(objective, n_trials=20) 
print("Best auc:", optim.best_value)

##### for more XGBoost parameter informations, check this site : https://xgboost.readthedocs.io/en/stable/parameter.html   
##### best performance archive.

##### 0.8558211379019417 and parameters: {'max_depth': 10, 'learning_rate': 0.11423799062437275, 'n_estimators': 6900, 'colsample_bytree': 0.7390924842332742, 'colsample_bylevel': 0.6940109759294105, 'colsample_bynode': 0.5496585400738032, 'reg_lambda': 0.9713492867033704, 'reg_alpha': 0.8147232205798981, 'subsample': 0.85, 'min_child_weight': 5, 'gamma': 0.16280391709890213}
##### 0.8765662376664448 and parameters: {'max_depth': 10, 'learning_rate': 0.025929426661789656, 'n_estimators': 6600, 'colsample_bytree': 0.7155456762839627, 'colsample_bylevel': 0.6123744502366096, 'colsample_bynode': 0.8016499020606728, 'reg_lambda': 0.0935399629775155, 'reg_alpha': 0.018771877229402035, 'subsample': 0.8, 'min_child_weight': 4, 'gamma': 0.6677845409584143}