In [1]:
%%time
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import os
import random
from optuna.samplers import TPESampler
import multiprocessing
import catboost as cat
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
import pickle
from sklearn.utils import resample
from catboost import Pool
import sklearn.metrics

CPU times: user 671 ms, sys: 142 ms, total: 812 ms
Wall time: 1.74 s


# Initial conditions

In [2]:
%%time
n_trials = int(1)
SEED = 123

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.11 µs


In [3]:
%%time
# Function to seed everything
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
seed_everything(SEED)

CPU times: user 114 µs, sys: 0 ns, total: 114 µs
Wall time: 117 µs


# Preprocess

In [4]:
%%time
X_train = pd.read_pickle('../input/research-with-customized-sharp-weighted/X_train.pickle')
y_train = pd.read_pickle('../input/research-with-customized-sharp-weighted/y_train.pickle')
X = np.array(X_train)
y = np.array(y_train)

del X_train, y_train

CPU times: user 1.56 s, sys: 4.35 s, total: 5.91 s
Wall time: 24.5 s


# Functions

In [5]:
%%time
def objective(trial):
    train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.33, random_state=int(SEED), shuffle=True)
    train_pool = Pool(train_x, train_y)
    test_pool = Pool(test_x, test_y)
    
    # Parameters
    params = {
        'iterations' : trial.suggest_int('iterations', 50, 300),                         
        'depth' : trial.suggest_int('depth', 4, 10),                                       
        'learning_rate' : trial.suggest_loguniform('learning_rate', 0.01, 0.3),               
        'random_strength' :trial.suggest_int('random_strength', 0, 100),                       
        'bagging_temperature' :trial.suggest_loguniform('bagging_temperature', 0.01, 100.00),
        'learning_rate' :trial.suggest_loguniform('learning_rate', 1e-3, 1e-1),
        'od_type': trial.suggest_categorical('od_type', ['IncToDec', 'Iter'])
    }
    # Learning
    model = cat.CatBoostClassifier(
        loss_function="Logloss",
        eval_metric="AUC",
        task_type="GPU",
        l2_leaf_reg=50,
        random_seed=SEED,
        border_count=64,
        **params
    )        
    model.fit(train_pool)
    # Predict
    preds = model.predict(test_pool)
    pred_labels = np.rint(preds)
    y_pred_boot = resample(pred_labels, n_samples = len(train_y))
    # Evaluation
    ROC_AUC_Score = roc_auc_score(train_y, y_pred_boot)
    print('ROC AUC Score of CatBoost =', ROC_AUC_Score)
    return ROC_AUC_Score

CPU times: user 6 µs, sys: 0 ns, total: 6 µs
Wall time: 10.5 µs


# Optimisation

In [6]:
%%time
study = optuna.create_study(direction = "maximize", sampler = TPESampler(seed=int(SEED)))
study.optimize(objective, n_trials = n_trials, n_jobs = multiprocessing.cpu_count())

[32m[I 2021-01-10 04:48:12,667][0m A new study created in memory with name: no-name-ac4565c1-1d08-4560-b0b8-1a6971a88369[0m


0:	learn: 0.5203668	total: 39.6ms	remaining: 4.24s
1:	learn: 0.5281540	total: 90.3ms	remaining: 4.79s
2:	learn: 0.5304306	total: 135ms	remaining: 4.73s
3:	learn: 0.5292732	total: 160ms	remaining: 4.17s
4:	learn: 0.5295894	total: 190ms	remaining: 3.92s
5:	learn: 0.5299858	total: 230ms	remaining: 3.91s
6:	learn: 0.5294706	total: 285ms	remaining: 4.11s
7:	learn: 0.5303692	total: 330ms	remaining: 4.13s
8:	learn: 0.5303210	total: 355ms	remaining: 3.91s
9:	learn: 0.5298588	total: 378ms	remaining: 3.71s
10:	learn: 0.5295048	total: 398ms	remaining: 3.51s
11:	learn: 0.5289159	total: 414ms	remaining: 3.31s
12:	learn: 0.5293042	total: 446ms	remaining: 3.26s
13:	learn: 0.5293765	total: 475ms	remaining: 3.19s
14:	learn: 0.5290437	total: 503ms	remaining: 3.12s
15:	learn: 0.5291359	total: 529ms	remaining: 3.04s
16:	learn: 0.5299930	total: 574ms	remaining: 3.08s
17:	learn: 0.5296179	total: 591ms	remaining: 2.95s
18:	learn: 0.5295179	total: 614ms	remaining: 2.88s
19:	learn: 0.5297691	total: 654ms	remai

[32m[I 2021-01-10 04:50:44,289][0m Trial 0 finished with value: 0.5001952015525651 and parameters: {'iterations': 108, 'depth': 10, 'learning_rate': 0.017870608749031732, 'random_strength': 90, 'bagging_temperature': 0.22061030692011427, 'od_type': 'Iter'}. Best is trial 0 with value: 0.5001952015525651.[0m


ROC AUC Score of CatBoost = 0.5001952015525651
CPU times: user 2min 33s, sys: 3.8 s, total: 2min 37s
Wall time: 2min 31s


In [7]:
%%time
# Save
pickle.dump(study.best_trial.params, open('CatBoost_Hyperparameter.pickle', 'wb'))
print('CatBoost Hyperparameter:', study.best_trial.params)

CatBoost Hyperparameter: {'iterations': 108, 'depth': 10, 'learning_rate': 0.017870608749031732, 'random_strength': 90, 'bagging_temperature': 0.22061030692011427, 'od_type': 'Iter'}
CPU times: user 1.29 ms, sys: 16 µs, total: 1.31 ms
Wall time: 1.08 ms


# References
- [CatBoost HyperParameter Tuning with Optuna!](https://www.kaggle.com/saurabhshahane/catboost-hyperparameter-tuning-with-optuna)
- [Research with Customized Sharp Weighted](https://www.kaggle.com/satorushibata/research-with-customized-sharp-weighted)
- [catboost.predict](https://catboost.ai/docs/concepts/r-reference_catboost-predict.html)
- [CatBoost GPU](https://www.kaggle.com/zxspectrum/catboost-gpu)
- [catboostとOptunaでハイパーパラメータ自動最適化](https://qiita.com/shin_mura/items/8f1aa1ec90fa4ad6253e)
- [Python: CatBoost を GPU で学習させる](https://blog.amedama.jp/entry/catboost-gpu)