In [1]:
import sys
sys.path.append('..')

import joblib
import lightgbm
import optuna
from optuna.samplers import TPESampler

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedShuffleSplit

import src

In [2]:
optuna.logging.set_verbosity(optuna.logging.WARNING)

In [3]:
train_data = joblib.load(src.constants.TRAIN_DATA_PATH).set_index('customer_ID')

In [4]:
X_train = train_data.drop(columns=['target', 'S_2'])
y_train = train_data['target']

In [5]:
def objective(trial):
    model = lightgbm.LGBMClassifier(**get_hyperparams(trial, mode='fit'))
    scores = cross_val_score(
        estimator=model,
        X=X_train,
        y=y_train,
        scoring='roc_auc',
        cv=StratifiedShuffleSplit(n_splits=5, random_state=src.constants.RANDOM_STATE),
    )

    return scores.mean()


def get_hyperparams(trial, mode):
    if mode == 'fit':
        max_depth = trial.suggest_int('max_depth', 1, 5)
        num_leaves = trial.suggest_int('num_leaves', 2, 32)

    elif mode == 'refit':
        max_depth = trial.params['max_depth']
        num_leaves = trial.params['num_leaves']

    init_hypeparams = dict(
        objective='binary',
        is_unbalance=True,
        boosting_type='gbdt',
        max_depth=max_depth,
        num_leaves=num_leaves,
        random_state=src.constants.RANDOM_STATE,
        verbose=-1,
    )

    return init_hypeparams

In [6]:
study = optuna.create_study(
    sampler=TPESampler(seed=src.constants.RANDOM_STATE),
    direction='maximize',
    study_name='optuna',
)

In [7]:
%%time
study.optimize(objective, n_trials=25, show_progress_bar=True)

  0%|          | 0/25 [00:00<?, ?it/s]

CPU times: total: 1h 17min 39s
Wall time: 28min 44s


In [16]:
print('Лучшие гиперпараметры:')
for hyperparam, value in study.best_trial.params.items():
    print(f'* {hyperparam}: {value}')
print(f'Лучший AUC: {study.best_trial.value}')

Лучшие гиперпараметры:
* max_depth: 5
* num_leaves: 26
Лучший AUC: 0.9583847287838119


In [10]:
# 0.9582783195297522
# 0.9583847287838119
study.best_trial.value

0.9583847287838119