In [1]:
# !pip install lightgbm optuna

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 데이터 로드

In [3]:
data_path = '../data/'
train_df = pd.read_csv(data_path+'train.csv', index_col='id')
test_df = pd.read_csv(data_path+'test.csv', index_col='id')

# 전처리

In [4]:
X = train_df.drop(columns=['defects'])
y = train_df['defects']

# 학습

### 1. Light GBM

In [5]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=61, train_size=0.8)

from lightgbm.sklearn import LGBMClassifier
lgbm = LGBMClassifier()
lgbm.fit(X_train, y_train)
lgbm.score(X_train, y_train), lgbm.score(X_val, y_val) # accuracy

[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007789 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


(0.8244687384842156, 0.8122635483712475)

### 2. Light GBM + optuna

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_val, y_train, y_val = train_test_split(X, y, random_state=61, train_size=0.8)

from lightgbm.sklearn import LGBMClassifier
def optimizer(trial):
    # 변경할 parameter는 찾아야 함.
    num_leaves = trial.suggest_categorical('num_leaves', [4, 8, 16, 32])
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3)
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)

    lgbm = LGBMClassifier(num_leaves=num_leaves,
                          learning_rate=learning_rate,
                          n_estimators=n_estimators,
                          random_state=61)
    lgbm.fit(X_train, y_train)
    return lgbm.score(X_val, y_val) # accuracy  -->  area under the ROC curve로 바꾸어야 함.

In [7]:
import optuna
study = optuna.create_study(direction="maximize")
study.optimize(optimizer, n_trials=10) # 50번 정도를 추천

[I 2023-10-11 02:17:34,650] A new study created in memory with name: no-name-14b47cbd-1998-48d4-9930-f253138eb068


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007845 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:36,061] Trial 0 finished with value: 0.8088733847590036 and parameters: {'num_leaves': 32, 'learning_rate': 0.0988087181756155, 'n_estimators': 361}. Best is trial 0 with value: 0.8088733847590036.


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008211 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:39,022] Trial 1 finished with value: 0.7945757382204097 and parameters: {'num_leaves': 32, 'learning_rate': 0.2823978704996192, 'n_estimators': 864}. Best is trial 0 with value: 0.8088733847590036.


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007959 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:41,658] Trial 2 finished with value: 0.8116248218935783 and parameters: {'num_leaves': 8, 'learning_rate': 0.08002518216335827, 'n_estimators': 931}. Best is trial 2 with value: 0.8116248218935783.


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007367 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:43,718] Trial 3 finished with value: 0.805778017982607 and parameters: {'num_leaves': 16, 'learning_rate': 0.20086700642086644, 'n_estimators': 696}. Best is trial 2 with value: 0.8116248218935783.


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007686 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:44,305] Trial 4 finished with value: 0.811673954699553 and parameters: {'num_leaves': 4, 'learning_rate': 0.2163743641976117, 'n_estimators': 188}. Best is trial 4 with value: 0.811673954699553.


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008082 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:46,848] Trial 5 finished with value: 0.8092173144008254 and parameters: {'num_leaves': 16, 'learning_rate': 0.08815475800716352, 'n_estimators': 841}. Best is trial 4 with value: 0.811673954699553.


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008264 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:49,351] Trial 6 finished with value: 0.7996855500417629 and parameters: {'num_leaves': 32, 'learning_rate': 0.2542212916285083, 'n_estimators': 727}. Best is trial 4 with value: 0.811673954699553.


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008141 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:51,255] Trial 7 finished with value: 0.8090699159829018 and parameters: {'num_leaves': 16, 'learning_rate': 0.12940891180630873, 'n_estimators': 622}. Best is trial 4 with value: 0.811673954699553.


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008241 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:53,769] Trial 8 finished with value: 0.8099543064904436 and parameters: {'num_leaves': 8, 'learning_rate': 0.2032880746499721, 'n_estimators': 955}. Best is trial 4 with value: 0.811673954699553.


[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005504 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


[I 2023-10-11 02:17:55,343] Trial 9 finished with value: 0.8114774234756547 and parameters: {'num_leaves': 16, 'learning_rate': 0.06284464896573895, 'n_estimators': 416}. Best is trial 4 with value: 0.811673954699553.


In [8]:
print("Best Accuracy: %.4f" % study.best_value)
print("Best params: ", study.best_trial.params) # best score일 때의 하이퍼파라미터들

Best Accuracy: 0.8117
Best params:  {'num_leaves': 4, 'learning_rate': 0.2163743641976117, 'n_estimators': 188}


##### best model

In [9]:
lgbm_best = LGBMClassifier(**study.best_trial.params,
                           random_state=61)
lgbm_best.fit(X_train, y_train)
preds = lgbm_best.predict(test_df)

[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007551 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153


### 3. Light GBM + optuna + KFold

In [10]:
from sklearn.model_selection import StratifiedKFold
from lightgbm.sklearn import LGBMClassifier
def optimizer(trial):
    num_leaves = trial.suggest_categorical('num_leaves', [4, 8, 16, 32])
    learning_rate = trial.suggest_float('learning_rate', 0.01, 0.3)
    n_estimators = trial.suggest_int('n_estimators', 100, 300)
    
    # 매번 for loop 마다 객체를 생성할 필요 없음
    lgbm = LGBMClassifier(num_leaves=num_leaves,
                      learning_rate=learning_rate,
                      n_estimators=n_estimators,
                      random_state=61)
    
    folds = StratifiedKFold(n_splits=5) # n_splits=5 (default값)
    scores = []
    for train_idx, val_idx in folds.split(X, y):
        lgbm.fit(X.iloc[train_idx], y.iloc[train_idx])
        scores.append(lgbm.score(X.iloc[val_idx], y.iloc[val_idx]))
    return np.mean(scores)

In [11]:
import optuna
study = optuna.create_study(direction="maximize")
study.optimize(optimizer, n_trials=10)

[I 2023-10-11 02:17:56,021] A new study created in memory with name: no-name-c900b969-2079-479f-a1da-ba7b014c3b53


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007719 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007333 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [

[I 2023-10-11 02:17:59,451] Trial 0 finished with value: 0.8142939770287138 and parameters: {'num_leaves': 8, 'learning_rate': 0.11576601855035158, 'n_estimators': 195}. Best is trial 0 with value: 0.8142939770287138.


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008066 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007958 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [

[I 2023-10-11 02:18:05,154] Trial 1 finished with value: 0.8127905020607967 and parameters: {'num_leaves': 32, 'learning_rate': 0.09546001311009013, 'n_estimators': 274}. Best is trial 0 with value: 0.8142939770287138.


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007883 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007389 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [

[I 2023-10-11 02:18:08,270] Trial 2 finished with value: 0.8144905193577076 and parameters: {'num_leaves': 16, 'learning_rate': 0.07898528006269555, 'n_estimators': 138}. Best is trial 2 with value: 0.8144905193577076.


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007263 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007980 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [

[I 2023-10-11 02:18:10,776] Trial 3 finished with value: 0.8148541050189007 and parameters: {'num_leaves': 4, 'learning_rate': 0.07759817369299836, 'n_estimators': 148}. Best is trial 3 with value: 0.8148541050189007.


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008228 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007957 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [

[I 2023-10-11 02:18:14,297] Trial 4 finished with value: 0.8102158633607395 and parameters: {'num_leaves': 32, 'learning_rate': 0.25601517092637543, 'n_estimators': 161}. Best is trial 3 with value: 0.8148541050189007.


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.007965 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008368 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [

[I 2023-10-11 02:18:19,111] Trial 5 finished with value: 0.8109823752088531 and parameters: {'num_leaves': 16, 'learning_rate': 0.24198624623625434, 'n_estimators': 290}. Best is trial 3 with value: 0.8148541050189007.


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008829 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008951 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [

[I 2023-10-11 02:18:22,747] Trial 6 finished with value: 0.8130558110049451 and parameters: {'num_leaves': 16, 'learning_rate': 0.16569906485516292, 'n_estimators': 173}. Best is trial 3 with value: 0.8148541050189007.


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008743 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008486 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [

[I 2023-10-11 02:18:27,228] Trial 7 finished with value: 0.8120829790324977 and parameters: {'num_leaves': 16, 'learning_rate': 0.2759813815999637, 'n_estimators': 168}. Best is trial 3 with value: 0.8148541050189007.


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.004894 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009369 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[L

[I 2023-10-11 02:18:31,549] Trial 8 finished with value: 0.8146084380920465 and parameters: {'num_leaves': 16, 'learning_rate': 0.13190733390229817, 'n_estimators': 124}. Best is trial 3 with value: 0.8148541050189007.


[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008009 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3550
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [Info] Number of positive: 18451, number of negative: 62959
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.008527 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3546
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.226643 -> initscore=-1.227365
[LightGBM] [Info] Start training from score -1.227365
[LightGBM] [

[I 2023-10-11 02:18:35,828] Trial 9 finished with value: 0.814185891132363 and parameters: {'num_leaves': 8, 'learning_rate': 0.1525230826982767, 'n_estimators': 232}. Best is trial 3 with value: 0.8148541050189007.


In [12]:
print("Best Accuracy: %.4f" % study.best_value)
print("Best params: ", study.best_trial.params) # best score일 때의 하이퍼파라미터들

Best Accuracy: 0.8149
Best params:  {'num_leaves': 4, 'learning_rate': 0.07759817369299836, 'n_estimators': 148}


##### best model

In [13]:
lgbm_best = LGBMClassifier(**study.best_trial.params,
                           random_state=61)
lgbm_best.fit(X_train, y_train)
preds = lgbm_best.predict(test_df)

[LightGBM] [Info] Number of positive: 18397, number of negative: 63013
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.009187 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3544
[LightGBM] [Info] Number of data points in the train set: 81410, number of used features: 21
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.225980 -> initscore=-1.231153
[LightGBM] [Info] Start training from score -1.231153
