In [1]:
# !pip install optuna

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score

from lightgbm import LGBMClassifier

import optuna, pickle, os

# 데이터 로드

In [3]:
import os

colab = os.path.isdir('./sample_data')
mount = os.path.isdir('/content/drive')
if colab:
    if not mount:
        from google.colab import drive
        drive.mount('/content/drive')
    base_path = '/content/drive/MyDrive/Colab Notebooks/5_ML_Project/dulee/'
    data_path = '/content/drive/MyDrive/Colab Notebooks/5_ML_Project/data/'
else:
    base_path = ''
    data_path = '../data/'

In [4]:
submission_df = pd.read_csv(data_path + 'sample_submission.csv', index_col='id')

# train = pd.read_csv(base_path + 'train.csv', index_col='id')
# train = pd.read_csv(base_path + 'train_down.csv', index_col='id') # 다운 샘플링
train = pd.read_csv(base_path + 'train_scaling.csv', index_col='id') # 업 샘플링
test = pd.read_csv(base_path + 'test.csv', index_col='id')

##### 변수 설정

In [5]:
X = train.drop(columns=['defects'])
y = train['defects']
X_test = test

K = 15

# 모델 학습

### Light GBM

In [6]:
os.cpu_count()

8

##### Hyper-parameter Tuning
1. min_child_samples: min_samples_leaf과 비슷
2. colsample_bytree: node split시 동일한 세팅이어야하기에 global randomization을 제공

In [7]:
from sklearn.metrics import accuracy_score, precision_score
from sklearn.metrics import f1_score, recall_score
def metrics(y_true, y_pred, y_proba):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    roc_score = roc_auc_score(y_true, y_proba)
    print(f'acc : {accuracy:.2f}, precision : {precision:.2f}', end=', ')
    print(f'recall : {recall:.2f}, f1-score : {f1:.2f}, auc : {roc_score:.5f}')

In [8]:
def optimizer(trial):
    # max_depth = trial.suggest_int('max_depth', 15, 25)
    # num_leaves = trial.suggest_categorical('num_leaves', [4, 8, 16, 32])
    min_child_samples = trial.suggest_int('min_child_samples', 1, 100)
    colsample_bytree = trial.suggest_float('colsample_bytree', 0.7, 1.0)

    learning_rate = trial.suggest_float('learning_rate', 0.1, 0.3)
    n_estimators = trial.suggest_int('n_estimators', 500, 1000)
    # n_estimators = trial.suggest_int('n_estimators', 100, 30000)

    model = LGBMClassifier(
        # max_depth=max_depth,
        num_leaves=4,
        min_child_samples=min_child_samples,
        colsample_bytree=colsample_bytree,

        learning_rate=learning_rate,
        n_estimators=n_estimators,

#         class_weight = 'balanced',
        force_col_wise = True,
        verbose=-100,
        random_state = 61,
        n_jobs = os.cpu_count(),
    )

    folds = StratifiedKFold(n_splits=K, shuffle=True, random_state=61) # n_splits=5 (default값)
    scores = []
    for i, (train_idx, val_idx) in enumerate(folds.split(X, y)):
        model.fit(X.iloc[train_idx], y.iloc[train_idx])

        y_true = y.iloc[val_idx]
        y_pred = model.predict(X.iloc[val_idx])
        y_proba = model.predict_proba(X.iloc[val_idx])[:, 1]
        print(f'-----------------\t\tfold {i}\t\t-----------------')
        metrics(y_true, y_pred, y_proba)
        scores.append(roc_auc_score(y_true, y_proba))
    return np.mean(scores)

In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(optimizer, n_trials=50)

[I 2023-10-18 13:29:56,222] A new study created in memory with name: no-name-dc272198-aa4d-47b4-8720-854819fb568f


-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79724
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.79266
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79692
-----------------		fold 3		-----------------
acc : 0.72, precision : 0.74, recall : 0.63, f1-score : 0.68, auc : 0.78993
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80237
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.79964
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79505
-----------------		fold 7		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.80102
-----------------		fold 8		-----

[I 2023-10-18 13:30:25,591] Trial 0 finished with value: 0.7980350872903046 and parameters: {'min_child_samples': 44, 'colsample_bytree': 0.9431182692784608, 'learning_rate': 0.13035223116058353, 'n_estimators': 610}. Best is trial 0 with value: 0.7980350872903046.


-----------------		fold 14		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.79765
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79998
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79654
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80059
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.68, auc : 0.79289
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80632
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80357
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79895
-----------------		fold 7		----

[I 2023-10-18 13:31:01,380] Trial 1 finished with value: 0.8015562676511296 and parameters: {'min_child_samples': 8, 'colsample_bytree': 0.9930899211836357, 'learning_rate': 0.23522522285374198, 'n_estimators': 686}. Best is trial 1 with value: 0.8015562676511296.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80220
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79790
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.79368
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79734
-----------------		fold 3		-----------------
acc : 0.72, precision : 0.74, recall : 0.63, f1-score : 0.68, auc : 0.79092
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80284
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80009
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79644
-----------------		fold 7		----

[I 2023-10-18 13:31:31,254] Trial 2 finished with value: 0.798700457821968 and parameters: {'min_child_samples': 59, 'colsample_bytree': 0.878199792425139, 'learning_rate': 0.1471585008269974, 'n_estimators': 632}. Best is trial 1 with value: 0.8015562676511296.


-----------------		fold 14		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.70, auc : 0.79770
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80329
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79948
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80297
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79473
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80869
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80497
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80181
-----------------		fold 7		----

[I 2023-10-18 13:32:14,242] Trial 3 finished with value: 0.8037382636817739 and parameters: {'min_child_samples': 45, 'colsample_bytree': 0.9110404964627821, 'learning_rate': 0.25720834971302375, 'n_estimators': 903}. Best is trial 3 with value: 0.8037382636817739.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80394
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80102
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79797
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80210
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79486
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80735
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80517
-----------------		fold 6		-----------------
acc : 0.74, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80070
-----------------		fold 7		----

[I 2023-10-18 13:32:51,942] Trial 4 finished with value: 0.8027523515056817 and parameters: {'min_child_samples': 76, 'colsample_bytree': 0.969564888580908, 'learning_rate': 0.28225794227710643, 'n_estimators': 780}. Best is trial 3 with value: 0.8037382636817739.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80155
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80248
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79912
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80233
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79536
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80786
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80470
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80095
-----------------		fold 7		----

[I 2023-10-18 13:33:31,053] Trial 5 finished with value: 0.8032058458342665 and parameters: {'min_child_samples': 16, 'colsample_bytree': 0.947615087087528, 'learning_rate': 0.273957581686451, 'n_estimators': 800}. Best is trial 3 with value: 0.8037382636817739.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80142
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79944
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.79515
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79860
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.63, f1-score : 0.68, auc : 0.79203
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80479
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80165
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79729
-----------------		fold 7		----

[I 2023-10-18 13:34:02,777] Trial 6 finished with value: 0.8000444307749277 and parameters: {'min_child_samples': 65, 'colsample_bytree': 0.9240072457115966, 'learning_rate': 0.19478856057308744, 'n_estimators': 647}. Best is trial 3 with value: 0.8037382636817739.


-----------------		fold 14		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.70, auc : 0.80008
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79735
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.79263
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.79686
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.63, f1-score : 0.68, auc : 0.78987
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80230
-----------------		fold 5		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.79881
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79519
-----------------		fold 7		----

[I 2023-10-18 13:34:28,581] Trial 7 finished with value: 0.797775472027995 and parameters: {'min_child_samples': 56, 'colsample_bytree': 0.7906566614272591, 'learning_rate': 0.13280782227925134, 'n_estimators': 579}. Best is trial 3 with value: 0.8037382636817739.


-----------------		fold 14		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.79705
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.79979
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.79586
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80007
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.63, f1-score : 0.68, auc : 0.79282
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80595
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80205
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79819
-----------------		fold 7		----

[I 2023-10-18 13:34:59,734] Trial 8 finished with value: 0.8007967700344886 and parameters: {'min_child_samples': 99, 'colsample_bytree': 0.8573164868755867, 'learning_rate': 0.21427563165084784, 'n_estimators': 694}. Best is trial 3 with value: 0.8037382636817739.


-----------------		fold 14		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80003
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80095
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79766
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80200
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.75, recall : 0.63, f1-score : 0.69, auc : 0.79363
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80708
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80339
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.79936
-----------------		fold 7		----

[I 2023-10-18 13:35:42,682] Trial 9 finished with value: 0.8020603875581604 and parameters: {'min_child_samples': 36, 'colsample_bytree': 0.9710357039699753, 'learning_rate': 0.20174150780419203, 'n_estimators': 868}. Best is trial 3 with value: 0.8037382636817739.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80163
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80416
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79984
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80463
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79640
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.81008
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80695
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80314
-----------------		fold 7		----

[I 2023-10-18 13:36:22,790] Trial 10 finished with value: 0.8052285296424129 and parameters: {'min_child_samples': 28, 'colsample_bytree': 0.721690263471372, 'learning_rate': 0.29458468657247655, 'n_estimators': 974}. Best is trial 10 with value: 0.8052285296424129.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80490
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.71, auc : 0.80403
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80072
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80413
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79564
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80967
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80662
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80340
-----------------		fold 7		----

[I 2023-10-18 13:37:04,084] Trial 11 finished with value: 0.8051738256192351 and parameters: {'min_child_samples': 28, 'colsample_bytree': 0.7111330549358229, 'learning_rate': 0.29613316899034253, 'n_estimators': 997}. Best is trial 10 with value: 0.8052285296424129.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80468
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80443
-----------------		fold 1		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80133
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80470
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79632
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80938
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80726
-----------------		fold 6		-----------------
acc : 0.74, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80308
-----------------		fold 7		----

[I 2023-10-18 13:37:46,282] Trial 12 finished with value: 0.8049319238541279 and parameters: {'min_child_samples': 25, 'colsample_bytree': 0.7023216069815389, 'learning_rate': 0.29509079187545967, 'n_estimators': 998}. Best is trial 10 with value: 0.8052285296424129.


-----------------		fold 14		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80257
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80488
-----------------		fold 1		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80125
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80613
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.63, f1-score : 0.68, auc : 0.79625
-----------------		fold 4		-----------------
acc : 0.75, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.81100
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80786
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80458
-----------------		fold 7		----

[I 2023-10-18 13:38:27,616] Trial 13 finished with value: 0.8059573226432913 and parameters: {'min_child_samples': 1, 'colsample_bytree': 0.7013031230861149, 'learning_rate': 0.2956406271280229, 'n_estimators': 982}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80552
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80300
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80005
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80370
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79445
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.81004
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80557
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80064
-----------------		fold 7		----

[I 2023-10-18 13:39:04,991] Trial 14 finished with value: 0.8040520322886326 and parameters: {'min_child_samples': 2, 'colsample_bytree': 0.7570903317646767, 'learning_rate': 0.2560165610392945, 'n_estimators': 906}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80427
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80024
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.79597
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.70, auc : 0.79925
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79226
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80664
-----------------		fold 5		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80265
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79875
-----------------		fold 7		----

[I 2023-10-18 13:39:28,119] Trial 15 finished with value: 0.8007152559512192 and parameters: {'min_child_samples': 17, 'colsample_bytree': 0.7945499729826897, 'learning_rate': 0.2998217451684683, 'n_estimators': 500}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80035
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.71, auc : 0.80441
-----------------		fold 1		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80026
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80341
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79664
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80833
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80664
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80190
-----------------		fold 7		----

[I 2023-10-18 13:40:07,635] Trial 16 finished with value: 0.8048256151317434 and parameters: {'min_child_samples': 1, 'colsample_bytree': 0.7468796406838064, 'learning_rate': 0.2695077987191026, 'n_estimators': 956}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80505
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80155
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79789
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80180
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79481
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80750
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80474
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79961
-----------------		fold 7		----

[I 2023-10-18 13:40:43,451] Trial 17 finished with value: 0.8025302197750632 and parameters: {'min_child_samples': 31, 'colsample_bytree': 0.814798700577785, 'learning_rate': 0.2387951262166001, 'n_estimators': 847}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80267
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80308
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79882
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80335
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79454
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80937
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80603
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.80151
-----------------		fold 7		----

[I 2023-10-18 13:41:23,079] Trial 18 finished with value: 0.8041293937681593 and parameters: {'min_child_samples': 15, 'colsample_bytree': 0.7361685310519397, 'learning_rate': 0.2706366131128532, 'n_estimators': 932}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80378
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80205
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.70, auc : 0.79750
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80096
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79330
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80699
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80436
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79938
-----------------		fold 7		----

[I 2023-10-18 13:41:58,371] Trial 19 finished with value: 0.802118857731714 and parameters: {'min_child_samples': 75, 'colsample_bytree': 0.7080742890417082, 'learning_rate': 0.24464330616595978, 'n_estimators': 833}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80186
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80171
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.70, auc : 0.79778
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80280
-----------------		fold 3		-----------------
acc : 0.72, precision : 0.74, recall : 0.64, f1-score : 0.68, auc : 0.79360
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80776
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80456
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80071
-----------------		fold 7		----

[I 2023-10-18 13:42:29,879] Trial 20 finished with value: 0.802611831002524 and parameters: {'min_child_samples': 40, 'colsample_bytree': 0.766130498096935, 'learning_rate': 0.282986607766808, 'n_estimators': 741}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80089
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80385
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80009
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80462
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79531
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80968
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80655
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80275
-----------------		fold 7		----

[I 2023-10-18 13:43:11,226] Trial 21 finished with value: 0.8049542296024128 and parameters: {'min_child_samples': 25, 'colsample_bytree': 0.7201979484079521, 'learning_rate': 0.2939323134906572, 'n_estimators': 1000}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80440
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80371
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79959
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80410
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.63, f1-score : 0.68, auc : 0.79609
-----------------		fold 4		-----------------
acc : 0.75, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.81036
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80628
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80274
-----------------		fold 7		----

[I 2023-10-18 13:43:51,233] Trial 22 finished with value: 0.8048280839317307 and parameters: {'min_child_samples': 27, 'colsample_bytree': 0.7295779654093713, 'learning_rate': 0.2961236595584845, 'n_estimators': 955}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80365
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80358
-----------------		fold 1		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80078
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80412
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79498
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.81006
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80762
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80286
-----------------		fold 7		----

[I 2023-10-18 13:44:31,173] Trial 23 finished with value: 0.8049171280527264 and parameters: {'min_child_samples': 10, 'colsample_bytree': 0.7004821330576786, 'learning_rate': 0.2805276100707245, 'n_estimators': 969}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80458
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80295
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79957
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80428
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79608
-----------------		fold 4		-----------------
acc : 0.75, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.81017
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80583
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80267
-----------------		fold 7		----

[I 2023-10-18 13:45:08,767] Trial 24 finished with value: 0.8046405134588397 and parameters: {'min_child_samples': 21, 'colsample_bytree': 0.7304539033359408, 'learning_rate': 0.29976729582915485, 'n_estimators': 901}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80379
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80242
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.79990
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80335
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79428
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80888
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.71, auc : 0.80683
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80266
-----------------		fold 7		----

[I 2023-10-18 13:45:51,891] Trial 25 finished with value: 0.804207269162852 and parameters: {'min_child_samples': 31, 'colsample_bytree': 0.7641287163344752, 'learning_rate': 0.2608852368419267, 'n_estimators': 995}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80224
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80385
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79995
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80390
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79565
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80977
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80666
-----------------		fold 6		-----------------
acc : 0.74, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80228
-----------------		fold 7		----

[I 2023-10-18 13:46:30,701] Trial 26 finished with value: 0.8043819545719991 and parameters: {'min_child_samples': 51, 'colsample_bytree': 0.7203565389724178, 'learning_rate': 0.2808280318573597, 'n_estimators': 929}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80355
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80299
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79892
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80333
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.68, auc : 0.79511
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80848
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80608
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80111
-----------------		fold 7		----

[I 2023-10-18 13:47:07,999] Trial 27 finished with value: 0.8039616081209844 and parameters: {'min_child_samples': 10, 'colsample_bytree': 0.7421431471229686, 'learning_rate': 0.2626194187105712, 'n_estimators': 881}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80361
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80300
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80026
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80402
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79600
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80917
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80650
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80189
-----------------		fold 7		----

[I 2023-10-18 13:47:47,990] Trial 28 finished with value: 0.804253273430138 and parameters: {'min_child_samples': 39, 'colsample_bytree': 0.7047921147194156, 'learning_rate': 0.27954155756513194, 'n_estimators': 954}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80426
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80281
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79829
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80358
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79382
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80923
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80588
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80138
-----------------		fold 7		----

[I 2023-10-18 13:48:27,203] Trial 29 finished with value: 0.8035669347932931 and parameters: {'min_child_samples': 44, 'colsample_bytree': 0.7699936843778872, 'learning_rate': 0.24976968906255434, 'n_estimators': 926}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80255
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80098
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.70, auc : 0.79727
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80112
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79346
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80622
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80468
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79968
-----------------		fold 7		----

[I 2023-10-18 13:49:01,601] Trial 30 finished with value: 0.8018695841277349 and parameters: {'min_child_samples': 31, 'colsample_bytree': 0.7429101766230033, 'learning_rate': 0.2309467426906313, 'n_estimators': 818}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80117
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80432
-----------------		fold 1		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80116
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80423
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79608
-----------------		fold 4		-----------------
acc : 0.75, precision : 0.76, recall : 0.68, f1-score : 0.72, auc : 0.81072
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80756
-----------------		fold 6		-----------------
acc : 0.74, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80281
-----------------		fold 7		----

[I 2023-10-18 13:49:43,068] Trial 31 finished with value: 0.8052264988783188 and parameters: {'min_child_samples': 24, 'colsample_bytree': 0.7180471597611658, 'learning_rate': 0.2893990820797321, 'n_estimators': 987}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80427
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80414
-----------------		fold 1		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80104
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80381
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79615
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.81041
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80727
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.80230
-----------------		fold 7		----

[I 2023-10-18 13:50:23,450] Trial 32 finished with value: 0.8051175419605131 and parameters: {'min_child_samples': 21, 'colsample_bytree': 0.7193553302155956, 'learning_rate': 0.28908563144792015, 'n_estimators': 972}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80469
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80496
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80082
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80423
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79686
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.81020
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.71, auc : 0.80665
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80311
-----------------		fold 7		----

[I 2023-10-18 13:51:04,244] Trial 33 finished with value: 0.8053133185698386 and parameters: {'min_child_samples': 9, 'colsample_bytree': 0.7000649570701558, 'learning_rate': 0.28613647579551077, 'n_estimators': 978}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80389
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80249
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79950
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80287
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79422
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80923
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80477
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80096
-----------------		fold 7		----

[I 2023-10-18 13:51:40,898] Trial 34 finished with value: 0.8038267616517714 and parameters: {'min_child_samples': 6, 'colsample_bytree': 0.7285403014267324, 'learning_rate': 0.26814465486223776, 'n_estimators': 865}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80383
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80364
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79968
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80423
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79549
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80882
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80625
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80214
-----------------		fold 7		----

[I 2023-10-18 13:52:19,128] Trial 35 finished with value: 0.8046808537563852 and parameters: {'min_child_samples': 7, 'colsample_bytree': 0.7016964632059635, 'learning_rate': 0.28601748544852096, 'n_estimators': 932}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80501
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.71, auc : 0.80419
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.79917
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80379
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79657
-----------------		fold 4		-----------------
acc : 0.75, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80996
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80681
-----------------		fold 6		-----------------
acc : 0.74, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80202
-----------------		fold 7		----

[I 2023-10-18 13:52:56,157] Trial 36 finished with value: 0.8044098421117014 and parameters: {'min_child_samples': 12, 'colsample_bytree': 0.7436031666506459, 'learning_rate': 0.2725340500741113, 'n_estimators': 894}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80394
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80131
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.69, auc : 0.79666
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80166
-----------------		fold 3		-----------------
acc : 0.72, precision : 0.74, recall : 0.64, f1-score : 0.68, auc : 0.79306
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80751
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80450
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.64, f1-score : 0.69, auc : 0.79959
-----------------		fold 7		----

[I 2023-10-18 13:53:28,648] Trial 37 finished with value: 0.8020992732802913 and parameters: {'min_child_samples': 19, 'colsample_bytree': 0.7231090995776962, 'learning_rate': 0.2573557879911583, 'n_estimators': 749}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80110
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80499
-----------------		fold 1		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80086
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80479
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79577
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.81014
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80636
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80321
-----------------		fold 7		----

[I 2023-10-18 13:54:08,877] Trial 38 finished with value: 0.8055234814803636 and parameters: {'min_child_samples': 7, 'colsample_bytree': 0.7516832546655631, 'learning_rate': 0.28805230651046293, 'n_estimators': 971}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80576
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80349
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80042
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80383
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79486
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.80960
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80679
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80283
-----------------		fold 7		----

[I 2023-10-18 13:54:48,000] Trial 39 finished with value: 0.8049658675172197 and parameters: {'min_child_samples': 1, 'colsample_bytree': 0.7520804038149584, 'learning_rate': 0.27584978575143354, 'n_estimators': 949}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80477
-----------------		fold 0		-----------------
acc : 0.73, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80346
-----------------		fold 1		-----------------
acc : 0.74, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80092
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.65, f1-score : 0.70, auc : 0.80259
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79532
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.81025
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80689
-----------------		fold 6		-----------------
acc : 0.74, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80141
-----------------		fold 7		----

[I 2023-10-18 13:55:29,691] Trial 40 finished with value: 0.8046188880209403 and parameters: {'min_child_samples': 14, 'colsample_bytree': 0.7794836355209831, 'learning_rate': 0.2874842334416395, 'n_estimators': 913}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.70, auc : 0.80486
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80591
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80052
-----------------		fold 2		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80373
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79536
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.81001
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80735
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.70, auc : 0.80328
-----------------		fold 7		----

[I 2023-10-18 13:56:13,807] Trial 41 finished with value: 0.8054514071192848 and parameters: {'min_child_samples': 5, 'colsample_bytree': 0.7203834695326645, 'learning_rate': 0.2880911670810712, 'n_estimators': 978}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80508
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80402
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.79943
-----------------		fold 2		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80449
-----------------		fold 3		-----------------
acc : 0.73, precision : 0.74, recall : 0.64, f1-score : 0.69, auc : 0.79580
-----------------		fold 4		-----------------
acc : 0.74, precision : 0.75, recall : 0.68, f1-score : 0.71, auc : 0.81029
-----------------		fold 5		-----------------
acc : 0.74, precision : 0.75, recall : 0.66, f1-score : 0.70, auc : 0.80737
-----------------		fold 6		-----------------
acc : 0.73, precision : 0.75, recall : 0.65, f1-score : 0.69, auc : 0.80151
-----------------		fold 7		----

[I 2023-10-18 13:56:59,015] Trial 42 finished with value: 0.8048036382375509 and parameters: {'min_child_samples': 6, 'colsample_bytree': 0.7349942320973194, 'learning_rate': 0.26538283365701776, 'n_estimators': 975}. Best is trial 13 with value: 0.8059573226432913.


-----------------		fold 14		-----------------
acc : 0.74, precision : 0.75, recall : 0.67, f1-score : 0.71, auc : 0.80443
-----------------		fold 0		-----------------
acc : 0.74, precision : 0.74, recall : 0.67, f1-score : 0.70, auc : 0.80336
-----------------		fold 1		-----------------
acc : 0.73, precision : 0.74, recall : 0.66, f1-score : 0.70, auc : 0.80159


In [None]:
print(f"Best roc_auc_score: {study.best_value:.5f}")
print("Best params: ", study.best_trial.params)
print(study.trials_dataframe()['duration'].sum())

# Best roc_auc_score: 0.78978 (K=15)
# Best roc_auc_score: 0.79246 (K=5)

##### best model

In [None]:
def oof_predict(best_model):
    folds = StratifiedKFold(n_splits=K, random_state=61, shuffle=True)
    predicted_probas = []

    for i, (train_idx, val_idx) in enumerate(folds.split(X, y)):
        best_model.fit(X.iloc[train_idx], y.iloc[train_idx])
        y_true = y.iloc[val_idx]
        y_pred = best_model.predict(X.iloc[val_idx])
        y_proba = best_model.predict_proba(X_test)[:, 1]
        print(f'-----------------\t\tfold {i}\t\t-----------------')
        metrics(y_true, y_pred, y_proba)

        predicted_probas.append()
    return np.mean(predicted_probas, axis=0)

In [None]:
model_best = LGBMClassifier(
    **study.best_trial.params,
    random_state=61,
    n_jobs=-1,
    force_row_wise=True,
    verbose=-100,
)
model_best.fit(X, y)
y_proba = model_best.predict_proba(X_test)[:, 1]
# y_proba_oof = oof_predict(model_best)

In [None]:
# save model
pickle.dump(model_best, open(base_path + "lgbm_best.pickle", "wb"))

In [None]:
# optuna가 시도했던 모든 실험 관련 데이터
param_analysis = study.trials_dataframe().sort_values(by=['value'], ascending=False)
param_analysis.to_csv(base_path + 'LGBM_param_analysis.csv')

fig, axes = plt.subplots(2, 3, figsize=(12, 8))
for col, ax in zip(param_analysis.columns[5:-1], axes.ravel()):
    sns.scatterplot(param_analysis, x='value', y=col, ax=ax)
plt.tight_layout()
plt.show()
fig.savefig(base_path + 'LGBM_param_analysis.png')

In [None]:
# 실험 기록 시각화
optuna.visualization.plot_optimization_history(study)

In [None]:
# hyper-parameter들의 중요도
optuna.visualization.plot_param_importances(study)

# 제출

In [None]:
submission_df['defects'] = y_proba
submission_df.to_csv(base_path + 'submission_lgbm.csv')
# submission_df['defects'] = y_proba_oof
# submission_df.to_csv(base_path + 'submission_lgbm_oof.csv')
submission_df