<a href="https://colab.research.google.com/github/sohyunwriter/ai-project/blob/main/(update)_%E1%84%8F%E1%85%A9%E1%84%90%E1%85%A6%E1%84%8B%E1%85%A7%E1%86%AB%E1%84%89%E1%85%B3%E1%86%B8_telco_customer_churn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 코테연습 1차

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy
from scipy import stats
import json
import sklearn
import re

#sklearn library
# 1.model_selection
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score, KFold

# 2.preprocessing
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, StandardScaler

# 3.base setting
%matplotlib inline
plt.rcParams['figure.dpi'] = 300
plt.rcParams['savefig.dpi'] = 300
font = {'weight':'normal',
        'size':12}
plt.rc('font', **font)
sns.set(rc={"figure.dpi": 300, 'savefig.dpi': 300})
sns.set_context('notebook')
sns.set_style("ticks")
FIG_FONT = dict(weight="bold", color="#7f7f7f")
sns.set_palette('Spectral')

# 4.metrics
# from sklearn.metrics import accuracy_score, roc_auc_score, recall_score, precision_scoore, f1_score, roc_curve, classification_report, confusion_matrix
from sklearn import metrics

# 5.model
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

import warnings
warnings.filterwarnings('ignore')

## 1.Data 확인

### Load Data

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
df = pd.read_csv("WA_Fn-UseC_-Telco-Customer-Churn.csv")
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


### 데이터 구조 확인

In [None]:
df.shape
# 7043행 21열

(7043, 21)

In [None]:
# 데이터 구조 파악
# object 문자형
# int/float 숫자형
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [None]:
# 문자형(범주형) 데이터 확인
df.describe(include=['object']).T

Unnamed: 0,count,unique,top,freq
customerID,7043,7043,7590-VHVEG,1
gender,7043,2,Male,3555
Partner,7043,2,No,3641
Dependents,7043,2,No,4933
PhoneService,7043,2,Yes,6361
MultipleLines,7043,3,No,3390
InternetService,7043,3,Fiber optic,3096
OnlineSecurity,7043,3,No,3498
OnlineBackup,7043,3,No,3088
DeviceProtection,7043,3,No,3095


In [None]:
# 숫자형 데이터 확인
df.describe()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges
count,7043.0,7043.0,7043.0
mean,0.162147,32.371149,64.761692
std,0.368612,24.559481,30.090047
min,0.0,0.0,18.25
25%,0.0,9.0,35.5
50%,0.0,29.0,70.35
75%,0.0,55.0,89.85
max,1.0,72.0,118.75


### 데이터 형태 변경
데이터 구조상 숫자형이지만 문자로 구성된 데이터를 변형

In [None]:
# 문자를 숫자형으로 변형 (errors -> 숫자로 바꿀수 없는 데이터를 NaN으로 강제 변경 후 숫자형태 변경)
df['TotalCharges'] = pd.to_numeric(df.TotalCharges, errors='coerce')


## 2.데이터 EDA




### EDA 기본 내용
  1. (결측치) 각 피처(=컬럼)의 결측값 비율
  2. (이상치) 각 피처별 이상치 확인
  3. (unique) 각 피처의 unique값과 unique값별 빈도
  4. (기술통계) 각 피처의 기술통계 (평균값, 최대값, 최솟값, 최빈값, 분위수 등)
  5. (상관관계) 피처-피처, 피처-타겟 간 상관관계 매트릭스
  6. 히스토그램, bar graph, box plot 등 (인사이트 도출)
  7. 각 피처별 Information Value 등 확인

In [None]:
## 코드 채워주세요

## 3.데이터 전처리

### 결측치 처리

In [None]:
df.isnull().sum()

Unnamed: 0,0
customerID,0
gender,0
SeniorCitizen,0
Partner,0
Dependents,0
tenure,0
PhoneService,0
MultipleLines,0
InternetService,0
OnlineSecurity,0


In [None]:
# 결측치 데이터 확인
df[np.isnan(df['TotalCharges'])]

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
488,4472-LVYGI,Female,0,Yes,Yes,0,No,No phone service,DSL,Yes,...,Yes,Yes,Yes,No,Two year,Yes,Bank transfer (automatic),52.55,,No
753,3115-CZMZD,Male,0,No,Yes,0,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,20.25,,No
936,5709-LVOEQ,Female,0,Yes,Yes,0,Yes,No,DSL,Yes,...,Yes,No,Yes,Yes,Two year,No,Mailed check,80.85,,No
1082,4367-NUYAO,Male,0,Yes,Yes,0,Yes,Yes,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,25.75,,No
1340,1371-DWPAZ,Female,0,Yes,Yes,0,No,No phone service,DSL,Yes,...,Yes,Yes,Yes,No,Two year,No,Credit card (automatic),56.05,,No
3331,7644-OMVMY,Male,0,Yes,Yes,0,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,19.85,,No
3826,3213-VVOLG,Male,0,Yes,Yes,0,Yes,Yes,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,25.35,,No
4380,2520-SGTTA,Female,0,Yes,Yes,0,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,Two year,No,Mailed check,20.0,,No
5218,2923-ARZLG,Male,0,Yes,Yes,0,Yes,No,No,No internet service,...,No internet service,No internet service,No internet service,No internet service,One year,Yes,Mailed check,19.7,,No
6670,4075-WKNIU,Female,0,Yes,Yes,0,Yes,Yes,DSL,No,...,Yes,Yes,Yes,No,Two year,No,Mailed check,73.35,,No


In [None]:
# 평균값으로 대체
df = df.fillna(df["TotalCharges"].mean())

# 결측치 처리할 수 있는 다른 방법이 있다면 코드 채워주세요

In [None]:
df.isnull().sum()

### 이상치 처리

In [None]:
## 코드 채워주세요

### 문자형 데이터 더미화

In [None]:
df['Churn'].replace(to_replace='Yes', value=1, inplace=True)
df['Churn'].replace(to_replace='No',  value=0, inplace=True)

#Let's convert all the categorical variables into dummy variables
df_dummies = pd.get_dummies(df)
df_dummies.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,Churn,customerID_0002-ORFBO,customerID_0003-MKNFE,customerID_0004-TLHLJ,customerID_0011-IGKFF,customerID_0013-EXCHZ,...,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,1,29.85,29.85,0,False,False,False,False,False,...,False,True,False,False,False,True,False,False,True,False
1,0,34,56.95,1889.5,0,False,False,False,False,False,...,False,False,True,False,True,False,False,False,False,True
2,0,2,53.85,108.15,1,False,False,False,False,False,...,False,True,False,False,False,True,False,False,False,True
3,0,45,42.3,1840.75,0,False,False,False,False,False,...,False,False,True,False,True,False,True,False,False,False
4,0,2,70.7,151.65,1,False,False,False,False,False,...,False,True,False,False,False,True,False,False,True,False


### 연속형 데이터 Scale

In [None]:
y = df_dummies['Churn'].values
X = df_dummies.drop(columns = ['Churn'])

# Scaling all the variables to a range of 0 to 1
from sklearn.preprocessing import MinMaxScaler
features = X.columns.values
scaler = MinMaxScaler(feature_range = (0,1))
scaler.fit(X)
X = pd.DataFrame(scaler.transform(X))
X.columns = features

In [None]:
X.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,customerID_0002-ORFBO,customerID_0003-MKNFE,customerID_0004-TLHLJ,customerID_0011-IGKFF,customerID_0013-EXCHZ,customerID_0013-MHZWF,...,StreamingMovies_Yes,Contract_Month-to-month,Contract_One year,Contract_Two year,PaperlessBilling_No,PaperlessBilling_Yes,PaymentMethod_Bank transfer (automatic),PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0.0,0.013889,0.115423,0.001275,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
1,0.0,0.472222,0.385075,0.215867,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.027778,0.354229,0.01031,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0
3,0.0,0.625,0.239303,0.210241,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0
4,0.0,0.027778,0.521891,0.01533,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0


In [None]:
y

array([0, 0, 1, ..., 0, 1, 0])

In [None]:
# test set 10% (우리는 답을 알지만 답을 모른다고 가정)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y,
                                                    random_state=42)

In [None]:
# train -> train / valid set 구분
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.3, stratify=y_train,
                                                      random_state=42)

## 4.Baseline Model 학습 및 성능 비교 (8개)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification

# 모델 리스트
models = {
    'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Support Vector Classifier': SVC(random_state=42),
    'Decision Tree': DecisionTreeClassifier(random_state=42),
    'Random Forest': RandomForestClassifier(random_state=42),
    'Gradient Boosting': GradientBoostingClassifier(random_state=42),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42),
    'LightGBM': LGBMClassifier(random_state=42)
}

In [None]:
# 모델 학습 및 평가
results = {}
for model_name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    accuracy = accuracy_score(y_valid, preds)
    results[model_name] = accuracy
    print(f"{model_name} accuracy is: {accuracy:.4f}")

Logistic Regression accuracy is: 0.8013
K-Nearest Neighbors accuracy is: 0.7555
Support Vector Classifier accuracy is: 0.7981
Decision Tree accuracy is: 0.7624
Random Forest accuracy is: 0.7839
Gradient Boosting accuracy is: 0.8044
XGBoost accuracy is: 0.7755
[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000734 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452
LightGBM accuracy is: 0.7897


In [None]:
# 결과 출력
print("\nModel Evaluation Summary:")
for model_name, acc in results.items():
    print(f"{model_name}: {acc:.4f}")


Model Evaluation Summary:
Logistic Regression: 0.8013
K-Nearest Neighbors: 0.7555
Support Vector Classifier: 0.7981
Decision Tree: 0.7624
Random Forest: 0.7839
Gradient Boosting: 0.8044
XGBoost: 0.7755
LightGBM: 0.7897


## 5.하이퍼파라미터 튜닝

In [None]:
# LGBM 외 다른 모델에 대해서도 하이퍼파라미터 튜닝 해보세요.
# Grid Search, Random Search, Bayesian Optimization 모두 실험해보세요.

In [None]:
!pip install optuna



### Optuna 패키지 이용

In [None]:
import optuna
import lightgbm as lgb
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

def objective(trial):
    # 하이퍼파라미터 설정 범위를 지정
    param = {
        'objective': 'binary',
        'metric': 'binary_logloss',
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 20, 100),
        'max_depth': trial.suggest_int('max_depth', 3, 9),
        'learning_rate': trial.suggest_loguniform('learning_rate', 1e-4, 1e-1),
        'n_estimators': trial.suggest_int('n_estimators', 50, 300),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 10.0)
    }

    # 모델 학습
    model = lgb.LGBMClassifier(**param)
    model.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=50, verbose=False)
    y_pred = model.predict(X_test)

    # 모델 평가
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)


In [None]:
import optuna
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score

# Objective function for Optuna
def objective(trial):
    # Define the hyperparameters to optimize
    param = {
        'num_leaves': trial.suggest_int('num_leaves', 31, 60),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1),
        'n_estimators': trial.suggest_int('n_estimators', 100, 300),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 20, 40),
        'random_state': 42
    }

    # Model
    model = LGBMClassifier(**param)
    model.fit(X_train, y_train)

    # Evaluate on validation set
    val_preds = model.predict(X_valid)
    val_accuracy = accuracy_score(y_valid, val_preds)

    return val_accuracy

# 1) optuna 이용 하이퍼파라미터 튜닝
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
print("Best parameters found: ", study.best_params)
print("Best validation accuracy: ", study.best_value)

# 2) Train the best model
best_params = study.best_params
best_model = LGBMClassifier(**best_params, random_state=42)
best_model.fit(X_train, y_train)

# 3) Valid Set 성능 확인
val_preds = best_model.predict(X_valid)
val_accuracy = accuracy_score(y_valid, val_preds)
print("Final validation accuracy with best model: ", val_accuracy)

[I 2024-12-23 08:35:39,713] A new study created in memory with name: no-name-69f91254-8eb7-4706-94a4-9d470cbf04b5


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001409 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:41,346] Trial 0 finished with value: 0.795478443743428 and parameters: {'num_leaves': 38, 'learning_rate': 0.02716745151871723, 'n_estimators': 148, 'min_data_in_leaf': 24}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001010 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:43,217] Trial 1 finished with value: 0.7770767613038907 and parameters: {'num_leaves': 45, 'learning_rate': 0.08684040539089545, 'n_estimators': 287, 'min_data_in_leaf': 20}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000767 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:44,254] Trial 2 finished with value: 0.7770767613038907 and parameters: {'num_leaves': 42, 'learning_rate': 0.062489504059567234, 'n_estimators': 208, 'min_data_in_leaf': 35}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000965 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:45,687] Trial 3 finished with value: 0.7928496319663512 and parameters: {'num_leaves': 50, 'learning_rate': 0.048918922905285954, 'n_estimators': 154, 'min_data_in_leaf': 35}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000941 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:47,925] Trial 4 finished with value: 0.7844374342797056 and parameters: {'num_leaves': 50, 'learning_rate': 0.0444883950893754, 'n_estimators': 240, 'min_data_in_leaf': 20}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000966 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:49,578] Trial 5 finished with value: 0.7833859095688749 and parameters: {'num_leaves': 54, 'learning_rate': 0.07553880940610769, 'n_estimators': 206, 'min_data_in_leaf': 30}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000928 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:51,298] Trial 6 finished with value: 0.7733964248159831 and parameters: {'num_leaves': 50, 'learning_rate': 0.07608592277577164, 'n_estimators': 295, 'min_data_in_leaf': 23}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000961 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:52,474] Trial 7 finished with value: 0.7912723449001051 and parameters: {'num_leaves': 48, 'learning_rate': 0.05074331681510972, 'n_estimators': 195, 'min_data_in_leaf': 31}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000933 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:54,033] Trial 8 finished with value: 0.7807570977917981 and parameters: {'num_leaves': 38, 'learning_rate': 0.05784106544042085, 'n_estimators': 252, 'min_data_in_leaf': 21}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000874 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:55,239] Trial 9 finished with value: 0.7818086225026288 and parameters: {'num_leaves': 31, 'learning_rate': 0.07145949776098295, 'n_estimators': 224, 'min_data_in_leaf': 37}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000752 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:55,984] Trial 10 finished with value: 0.7912723449001051 and parameters: {'num_leaves': 34, 'learning_rate': 0.015152955073842908, 'n_estimators': 103, 'min_data_in_leaf': 26}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000988 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:57,640] Trial 11 finished with value: 0.795478443743428 and parameters: {'num_leaves': 57, 'learning_rate': 0.028276295085449647, 'n_estimators': 142, 'min_data_in_leaf': 39}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000909 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:35:59,326] Trial 12 finished with value: 0.7923238696109358 and parameters: {'num_leaves': 60, 'learning_rate': 0.02461186584372433, 'n_estimators': 146, 'min_data_in_leaf': 28}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001025 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:01,463] Trial 13 finished with value: 0.7891692954784437 and parameters: {'num_leaves': 59, 'learning_rate': 0.03243122460549287, 'n_estimators': 157, 'min_data_in_leaf': 39}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000957 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:02,705] Trial 14 finished with value: 0.786540483701367 and parameters: {'num_leaves': 39, 'learning_rate': 0.010142326017625623, 'n_estimators': 112, 'min_data_in_leaf': 26}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000922 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:03,903] Trial 15 finished with value: 0.7902208201892744 and parameters: {'num_leaves': 55, 'learning_rate': 0.03531667426875146, 'n_estimators': 176, 'min_data_in_leaf': 40}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000919 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:05,179] Trial 16 finished with value: 0.7944269190325972 and parameters: {'num_leaves': 38, 'learning_rate': 0.023119908519354367, 'n_estimators': 129, 'min_data_in_leaf': 32}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000935 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:06,439] Trial 17 finished with value: 0.7907465825446898 and parameters: {'num_leaves': 44, 'learning_rate': 0.03856714903097575, 'n_estimators': 174, 'min_data_in_leaf': 24}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000710 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:07,665] Trial 18 finished with value: 0.7939011566771819 and parameters: {'num_leaves': 34, 'learning_rate': 0.024087222138025062, 'n_estimators': 130, 'min_data_in_leaf': 33}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000748 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:08,519] Trial 19 finished with value: 0.7781282860147214 and parameters: {'num_leaves': 56, 'learning_rate': 0.09716828196251845, 'n_estimators': 181, 'min_data_in_leaf': 28}. Best is trial 0 with value: 0.795478443743428.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000697 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:09,248] Trial 20 finished with value: 0.7975814931650894 and parameters: {'num_leaves': 40, 'learning_rate': 0.03033344998624441, 'n_estimators': 130, 'min_data_in_leaf': 23}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000705 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:10,067] Trial 21 finished with value: 0.7949526813880127 and parameters: {'num_leaves': 41, 'learning_rate': 0.02879544807290091, 'n_estimators': 134, 'min_data_in_leaf': 24}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000717 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:11,047] Trial 22 finished with value: 0.7917981072555205 and parameters: {'num_leaves': 35, 'learning_rate': 0.01730052007564924, 'n_estimators': 115, 'min_data_in_leaf': 22}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000773 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:11,994] Trial 23 finished with value: 0.7939011566771819 and parameters: {'num_leaves': 41, 'learning_rate': 0.041544830858137184, 'n_estimators': 162, 'min_data_in_leaf': 26}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000911 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:12,960] Trial 24 finished with value: 0.795478443743428 and parameters: {'num_leaves': 47, 'learning_rate': 0.030518486321136355, 'n_estimators': 140, 'min_data_in_leaf': 28}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000989 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:14,045] Trial 25 finished with value: 0.7944269190325972 and parameters: {'num_leaves': 36, 'learning_rate': 0.01880288019191182, 'n_estimators': 112, 'min_data_in_leaf': 24}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001006 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:15,930] Trial 26 finished with value: 0.7949526813880127 and parameters: {'num_leaves': 31, 'learning_rate': 0.03676418822649448, 'n_estimators': 123, 'min_data_in_leaf': 34}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000892 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:16,931] Trial 27 finished with value: 0.7944269190325972 and parameters: {'num_leaves': 43, 'learning_rate': 0.04747031729298114, 'n_estimators': 101, 'min_data_in_leaf': 37}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000739 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:17,901] Trial 28 finished with value: 0.7849631966351209 and parameters: {'num_leaves': 53, 'learning_rate': 0.05712823518834454, 'n_estimators': 168, 'min_data_in_leaf': 22}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001514 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:18,796] Trial 29 finished with value: 0.7912723449001051 and parameters: {'num_leaves': 45, 'learning_rate': 0.010898490844644029, 'n_estimators': 190, 'min_data_in_leaf': 20}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000733 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:19,594] Trial 30 finished with value: 0.7944269190325972 and parameters: {'num_leaves': 39, 'learning_rate': 0.02840012661765156, 'n_estimators': 149, 'min_data_in_leaf': 27}. Best is trial 20 with value: 0.7975814931650894.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000977 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:21,964] Trial 31 finished with value: 0.7986330178759201 and parameters: {'num_leaves': 46, 'learning_rate': 0.030535691295017174, 'n_estimators': 140, 'min_data_in_leaf': 29}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000720 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:23,046] Trial 32 finished with value: 0.7902208201892744 and parameters: {'num_leaves': 57, 'learning_rate': 0.020412443707333416, 'n_estimators': 140, 'min_data_in_leaf': 30}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000953 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:24,478] Trial 33 finished with value: 0.7944269190325972 and parameters: {'num_leaves': 52, 'learning_rate': 0.0399009265455227, 'n_estimators': 156, 'min_data_in_leaf': 25}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000906 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:25,641] Trial 34 finished with value: 0.7939011566771819 and parameters: {'num_leaves': 47, 'learning_rate': 0.033034558580418816, 'n_estimators': 121, 'min_data_in_leaf': 37}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000962 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:27,989] Trial 35 finished with value: 0.7818086225026288 and parameters: {'num_leaves': 42, 'learning_rate': 0.06483538860082028, 'n_estimators': 263, 'min_data_in_leaf': 22}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000921 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:29,857] Trial 36 finished with value: 0.7960042060988434 and parameters: {'num_leaves': 37, 'learning_rate': 0.04354928782895689, 'n_estimators': 219, 'min_data_in_leaf': 29}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000737 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:31,246] Trial 37 finished with value: 0.7917981072555205 and parameters: {'num_leaves': 37, 'learning_rate': 0.04376750041760796, 'n_estimators': 230, 'min_data_in_leaf': 29}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000717 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:32,110] Trial 38 finished with value: 0.7939011566771819 and parameters: {'num_leaves': 40, 'learning_rate': 0.045587463485065115, 'n_estimators': 204, 'min_data_in_leaf': 31}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000732 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:33,169] Trial 39 finished with value: 0.7912723449001051 and parameters: {'num_leaves': 33, 'learning_rate': 0.051843310722411426, 'n_estimators': 219, 'min_data_in_leaf': 30}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000981 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:34,748] Trial 40 finished with value: 0.7854889589905363 and parameters: {'num_leaves': 49, 'learning_rate': 0.05288568104903302, 'n_estimators': 242, 'min_data_in_leaf': 25}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000935 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:36,171] Trial 41 finished with value: 0.795478443743428 and parameters: {'num_leaves': 36, 'learning_rate': 0.02605197723479978, 'n_estimators': 268, 'min_data_in_leaf': 35}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002108 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:37,513] Trial 42 finished with value: 0.7970557308096741 and parameters: {'num_leaves': 43, 'learning_rate': 0.03417227576177659, 'n_estimators': 189, 'min_data_in_leaf': 32}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000708 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:38,524] Trial 43 finished with value: 0.7975814931650894 and parameters: {'num_leaves': 45, 'learning_rate': 0.03531642147316949, 'n_estimators': 192, 'min_data_in_leaf': 32}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000742 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:39,514] Trial 44 finished with value: 0.7960042060988434 and parameters: {'num_leaves': 46, 'learning_rate': 0.03495965790325587, 'n_estimators': 212, 'min_data_in_leaf': 32}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000762 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:40,357] Trial 45 finished with value: 0.7928496319663512 and parameters: {'num_leaves': 44, 'learning_rate': 0.04098779499840698, 'n_estimators': 190, 'min_data_in_leaf': 34}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000993 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:41,551] Trial 46 finished with value: 0.7870662460567823 and parameters: {'num_leaves': 43, 'learning_rate': 0.061149672575186366, 'n_estimators': 195, 'min_data_in_leaf': 31}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001104 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:42,923] Trial 47 finished with value: 0.786540483701367 and parameters: {'num_leaves': 51, 'learning_rate': 0.04953406735149238, 'n_estimators': 212, 'min_data_in_leaf': 29}. Best is trial 31 with value: 0.7986330178759201.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000977 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:44,196] Trial 48 finished with value: 0.8007360672975815 and parameters: {'num_leaves': 41, 'learning_rate': 0.03194131081742237, 'n_estimators': 226, 'min_data_in_leaf': 33}. Best is trial 48 with value: 0.8007360672975815.


[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000726 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452


[I 2024-12-23 08:36:45,115] Trial 49 finished with value: 0.7933753943217665 and parameters: {'num_leaves': 46, 'learning_rate': 0.014538961755092783, 'n_estimators': 230, 'min_data_in_leaf': 33}. Best is trial 48 with value: 0.8007360672975815.


Best parameters found:  {'num_leaves': 41, 'learning_rate': 0.03194131081742237, 'n_estimators': 226, 'min_data_in_leaf': 33}
Best validation accuracy:  0.8007360672975815
[LightGBM] [Info] Number of positive: 1177, number of negative: 3259
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000749 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 667
[LightGBM] [Info] Number of data points in the train set: 4436, number of used features: 45
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.265329 -> initscore=-1.018452
[LightGBM] [Info] Start training from score -1.018452
Final validation accuracy with best model:  0.8007360672975815


## 6.결과파일생성

In [None]:
test_preds = best_model.predict(X_test)



In [None]:
submission = pd.DataFrame(test_preds, index = X_test.index, columns=['y'])
submission.head()

Unnamed: 0,y
402,0
1292,0
1175,0
4600,0
3464,1


In [None]:
submission.to_csv("submission.csv")