In [1]:

from lightgbm import LGBMClassifier

import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split


In [2]:

dataset = load_breast_cancer()
ftr = dataset.data
target = dataset.target


# 전체 데이터 중 80%는 학습용 데이터, 20% 테스트용 데이터 추출

X_train, X_test, y_train, y_test = train_test_split(ftr, target, test_size = 0.2, random_state = 156)


# 앞서 XGBoost 와 동일하게 n_estimators 는 400 설정

lgbm_wrapper = LGBMClassifier(n_estimators = 400)



# LightGBM 도 XGBoost 와 동일하게 조기 중단 수행 가능

evals = [(X_test, y_test)]
lgbm_wrapper.fit(X_train, y_train, eval_metric = "logloss",
                eval_set = evals, verbose = True)
preds = lgbm_wrapper.predict(X_test)
pred_proba = lgbm_wrapper.predict_proba(X_test)[:, 1]



[1]	valid_0's binary_logloss: 0.565079
[2]	valid_0's binary_logloss: 0.507451
[3]	valid_0's binary_logloss: 0.458489
[4]	valid_0's binary_logloss: 0.417481
[5]	valid_0's binary_logloss: 0.385507
[6]	valid_0's binary_logloss: 0.355773
[7]	valid_0's binary_logloss: 0.329587
[8]	valid_0's binary_logloss: 0.308478
[9]	valid_0's binary_logloss: 0.285395
[10]	valid_0's binary_logloss: 0.267055
[11]	valid_0's binary_logloss: 0.252013
[12]	valid_0's binary_logloss: 0.237018
[13]	valid_0's binary_logloss: 0.224756
[14]	valid_0's binary_logloss: 0.213383
[15]	valid_0's binary_logloss: 0.203058
[16]	valid_0's binary_logloss: 0.194015
[17]	valid_0's binary_logloss: 0.186412
[18]	valid_0's binary_logloss: 0.179108
[19]	valid_0's binary_logloss: 0.174004
[20]	valid_0's binary_logloss: 0.167155
[21]	valid_0's binary_logloss: 0.162494
[22]	valid_0's binary_logloss: 0.156886
[23]	valid_0's binary_logloss: 0.152855
[24]	valid_0's binary_logloss: 0.151113
[25]	valid_0's binary_logloss: 0.148395
[26]	vali

In [3]:
import lightgbm
print(lightgbm.__version__)

3.3.5


In [9]:
pip install catboost

Collecting catboost
  Downloading catboost-1.2-cp310-cp310-win_amd64.whl (101.0 MB)
     -------------------------------------- 101.0/101.0 MB 8.7 MB/s eta 0:00:00
Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
     ---------------------------------------- 47.0/47.0 kB 2.3 MB/s eta 0:00:00
Collecting plotly
  Downloading plotly-5.15.0-py2.py3-none-any.whl (15.5 MB)
     --------------------------------------- 15.5/15.5 MB 14.5 MB/s eta 0:00:00
Collecting tenacity>=6.2.0
  Downloading tenacity-8.2.2-py3-none-any.whl (24 kB)
Installing collected packages: tenacity, graphviz, plotly, catboost
Successfully installed catboost-1.2 graphviz-0.20.1 plotly-5.15.0 tenacity-8.2.2
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from catboost import CatBoostClassifier, Pool

# 예제 데이터 로드
data = load_iris()
X, y = data.data, data.target

# DataFrame으로 변환
feature_names = data.feature_names
target_names = data.target_names
df = pd.DataFrame(X, columns=feature_names)
df['target'] = y

# 데이터셋을 훈련용과 테스트용으로 나눔
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CatBoost 데이터 형식으로 변환
train_pool = Pool(data=X_train, label=y_train)
test_pool = Pool(data=X_test, label=y_test)

# CatBoost 모델 생성 및 학습
cat_model = CatBoostClassifier(iterations=1000, learning_rate=0.1, depth=6, loss_function='MultiClass')
cat_model.fit(train_pool, eval_set=test_pool, verbose=100)

# 모델 평가
accuracy = cat_model.score(X_test, y_test)
print(f'Accuracy: {accuracy:.4f}')

# 특성 중요도 출력
feature_importance = cat_model.feature_importances_
for i, importance in enumerate(feature_importance):
    print(f'{feature_names[i]} importance: {importance:.4f}')


0:	learn: 0.9813365	test: 0.9803429	best: 0.9803429 (0)	total: 157ms	remaining: 2m 36s
100:	learn: 0.0482433	test: 0.0336882	best: 0.0336882 (100)	total: 446ms	remaining: 3.96s
200:	learn: 0.0208260	test: 0.0163628	best: 0.0163628 (200)	total: 745ms	remaining: 2.96s
300:	learn: 0.0129658	test: 0.0109436	best: 0.0109436 (300)	total: 1.23s	remaining: 2.85s
400:	learn: 0.0092262	test: 0.0081960	best: 0.0081960 (400)	total: 1.48s	remaining: 2.21s
500:	learn: 0.0071433	test: 0.0065558	best: 0.0065558 (500)	total: 1.68s	remaining: 1.67s
600:	learn: 0.0058393	test: 0.0055275	best: 0.0055275 (600)	total: 1.89s	remaining: 1.26s
700:	learn: 0.0049524	test: 0.0047934	best: 0.0047934 (700)	total: 2.1s	remaining: 898ms
800:	learn: 0.0043034	test: 0.0042338	best: 0.0042338 (800)	total: 2.37s	remaining: 588ms
900:	learn: 0.0037637	test: 0.0037667	best: 0.0037667 (900)	total: 2.73s	remaining: 300ms
999:	learn: 0.0033542	test: 0.0033947	best: 0.0033947 (999)	total: 3.15s	remaining: 0us

bestTest = 0.00