In [10]:
# 모델의 종류가 관건일 듯.
# 0823: 10:45 ~ 11:45, 17:40 ~ 18:30
# 0824: 10:20 ~ 11:30, 11:50 ~ 1:00, 2:00 ~ 2:30
# 0825: 2:10 ~ 4:00

# 현황
# 0824 현재 LGBM 모델 적용한 상태고, 제출한 파일은 HistGradientBoostingClassifier
# 0825 - 1. LGBM 모델 적용 O, 2. 그리드 서치 3. 랜덤 서치 사용 => 1번 모델이 가장 성능 좋게 나옴

import pandas as pd
import numpy as np
import random
import os

from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
from sklearn.model_selection import cross_val_predict

from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn import svm

In [11]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(42) # Seed 고정

In [12]:
# 데이터를 읽어오는 코드
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')

# 데이터 확인
train.head(5)

Unnamed: 0,ID,나이,키(cm),몸무게(kg),BMI,시력,충치,공복 혈당,혈압,중성 지방,혈청 크레아티닌,콜레스테롤,고밀도지단백,저밀도지단백,헤모글로빈,요 단백,간 효소율,label
0,TRAIN_0000,35,170,70,24.22,1.1,1,98,40,80,1.3,211,75,120,15.9,1,1.53,1
1,TRAIN_0001,40,150,55,24.44,1.0,0,173,39,104,0.6,251,46,184,11.8,1,1.45,0
2,TRAIN_0002,60,170,50,17.3,0.75,0,96,40,61,0.8,144,43,89,15.3,1,1.04,0
3,TRAIN_0003,40,150,45,20.0,0.5,0,92,40,46,0.7,178,66,110,13.4,1,1.18,0
4,TRAIN_0004,55,155,65,27.06,1.0,0,87,42,95,0.9,232,62,151,13.8,1,1.32,0


In [13]:
test.head(5)

Unnamed: 0,ID,나이,키(cm),몸무게(kg),BMI,시력,충치,공복 혈당,혈압,중성 지방,혈청 크레아티닌,콜레스테롤,고밀도지단백,저밀도지단백,헤모글로빈,요 단백,간 효소율
0,TEST_0000,40,165,55,20.2,0.9,1,98,47,75,0.5,229,59,155,13.7,1,1.73
1,TEST_0001,65,145,50,23.78,0.5,0,99,59,98,0.6,200,65,115,12.2,1,1.0
2,TEST_0002,40,160,75,29.3,1.0,0,105,34,232,0.6,170,50,73,15.1,1,1.33
3,TEST_0003,30,180,90,27.78,1.35,0,78,45,218,0.9,197,55,98,15.2,1,0.89
4,TEST_0004,50,155,55,22.89,0.75,0,116,67,139,1.0,230,66,136,15.0,1,0.85


In [14]:
x_train = train.drop(['ID', 'label'], axis = 1)
y_train = train['label']

x_test = test.drop('ID', axis = 1)
# test에 label 없음

In [15]:
# model = XGBClassifier(tree_method='hist', random_state=42)
model = LGBMClassifier(random_state=42)

In [16]:
from sklearn.model_selection import cross_val_score

# 교차 검증을 통한 정확도 평가
accuracy = cross_val_score(model, x_train, y_train, cv=5)
print(accuracy)

[LightGBM] [Info] Number of positive: 2056, number of negative: 3544
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1410
[LightGBM] [Info] Number of data points in the train set: 5600, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367143 -> initscore=-0.544494
[LightGBM] [Info] Start training from score -0.544494
[LightGBM] [Info] Number of positive: 2057, number of negative: 3543
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1404
[LightGBM] [Info] Number of data points in the train set: 5600, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367321 -> initscore=-0.543725
[LightGBM] [Info] Start training from score -0.543725
[LightGBM] [Info] Number of positive: 2057, number of negative: 3543
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1411
[LightGBM] [Info] Number of data points in the train set: 5600, n

In [17]:
from sklearn.model_selection import RandomizedSearchCV

# 하이퍼파라미터 범위 설정
param_dist = {
    'num_leaves': np.arange(20, 200, 10),
    'learning_rate': [0.01, 0.1, 0.2, 0.3],
    'n_estimators': [50, 100, 200, 300],
    'subsample': [0.5, 0.8, 1.0],
    'colsample_bytree': [0.5, 0.8, 1.0]
}

model = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=100, scoring='accuracy', cv=3, random_state=42)

In [18]:
# 훈련
model.fit(x_train, y_train)

[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you ca

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[



[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992




[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992




[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[



[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992




[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992




[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992




[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[

[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you ca



[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, n

[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you ca

[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992


[LightGBM] [Info] Number of positive: 1714, number of negative: 2952
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1370
[LightGBM] [Info] Number of data points in the train set: 4666, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367338 -> initscore=-0.543653
[LightGBM] [Info] Start training from score -0.543653
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1375
[LightGBM] [Info] Number of data points in the train set: 4667, number of used features: 16
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.367259 -> initscore=-0.543992
[LightGBM] [Info] Start training from score -0.543992
[LightGBM] [Info] Number of positive: 1714, number of negative: 2953
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1384
[

In [19]:
# 최적의 파라미터와 최고 성능 출력
print("Best Parameters:", model.best_params_)
print("Best Accuracy:", model.best_score_)

Best Parameters: {'subsample': 0.8, 'num_leaves': 30, 'n_estimators': 50, 'learning_rate': 0.1, 'colsample_bytree': 0.8}
Best Accuracy: 0.7318575196138314


In [20]:
final_pred = model.predict(x_test)

In [21]:
# read_csv
submit = pd.read_csv('sample_submission.csv')

In [22]:
# 최종 target 값에 할당
submit['label'] = final_pred
submit.head()

Unnamed: 0,ID,label
0,TEST_0000,0
1,TEST_0001,0
2,TEST_0002,1
3,TEST_0003,0
4,TEST_0004,0


In [23]:
# result 추출
submit.to_csv('submission.csv', index = False)