In [2]:
from catboost import Pool
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import warnings
import pandas as pd

warnings.filterwarnings("ignore")

In [3]:
train = pd.read_csv('../Database/train_tried.csv', index_col='ID')
X = train.drop(columns=['대출등급'])
y = train['대출등급']


In [4]:
X

Unnamed: 0_level_0,대출금액,연간소득,부채_대비_소득_비율,총계좌수,최근_2년간_연체_횟수,총상환원금,총상환이자,총연체금액,연체계좌수,대출기간,근로기간,주택소유상태,대출목적
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
TRAIN_00000,-0.563848,-0.220218,-0.014287,-0.852449,-0.376102,-0.800303,-0.972784,-0.038438,-0.072595,36 months,6 years,RENT,부채 통합
TRAIN_00001,-0.377964,0.370332,0.087890,-0.356109,-0.376102,-0.436814,-0.441082,-0.038438,-0.072595,60 months,10+ years,MORTGAGE,주택 개선
TRAIN_00002,-0.610319,0.020823,-0.321114,-0.935172,-0.376102,0.103276,-0.627621,-0.038438,-0.072595,36 months,5 years,MORTGAGE,부채 통합
TRAIN_00003,-0.377964,0.382384,-0.127783,-0.852449,-0.376102,-0.483274,-0.624977,-0.038438,-0.072595,36 months,8 years,MORTGAGE,부채 통합
TRAIN_00004,-0.029431,-0.222870,0.179044,-0.521556,-0.376102,-0.577932,-0.634409,-0.038438,-0.072595,60 months,Unknown,RENT,주요 구매
...,...,...,...,...,...,...,...,...,...,...,...,...,...
TRAIN_96289,-0.377964,1.165767,-0.299368,0.636570,-0.376102,0.147972,0.145249,-0.038438,-0.072595,36 months,10+ years,MORTGAGE,신용 카드
TRAIN_96290,1.016166,0.382384,-0.423588,-0.025216,-0.376102,-0.232331,0.969667,-0.038438,-0.072595,60 months,10+ years,MORTGAGE,주택 개선
TRAIN_96291,-0.377964,-0.099698,-0.242471,-0.273386,-0.376102,0.648632,-0.424781,-0.038438,-0.072595,36 months,1 year,MORTGAGE,신용 카드
TRAIN_96292,-0.261786,-0.277164,-0.061949,-0.356109,1.799906,0.540861,0.885597,-0.038438,-0.072595,36 months,5 years,MORTGAGE,부채 통합


In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
cat_features=[i for i in range(9,13)]

train_pool = Pool(data=X_train, label=y_train, cat_features=cat_features)
val_pool = Pool(data=X_test, label=y_test, cat_features=cat_features)

In [6]:
cat_model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.1,
    depth = 10,
    l2_leaf_reg=2,
    task_type = 'CPU',
    loss_function = 'MultiClassOneVsAll',
    boosting_type = 'Ordered'
)
cat_model.fit(train_pool, eval_set=val_pool, verbose=2)

0:	learn: 0.6382198	test: 0.6382447	best: 0.6382447 (0)	total: 6.38s	remaining: 1h 46m 16s
2:	learn: 0.5521194	test: 0.5521258	best: 0.5521258 (2)	total: 20.8s	remaining: 1h 55m 16s
4:	learn: 0.4852361	test: 0.4852266	best: 0.4852266 (4)	total: 38.4s	remaining: 2h 7m 24s
6:	learn: 0.4446338	test: 0.4446620	best: 0.4446620 (6)	total: 39.9s	remaining: 1h 34m 23s



KeyboardInterrupt



In [62]:
print("CatBoost Accuracy:", accuracy_score(y_test, cat_model.predict(X_test)))

CatBoost Accuracy: 0.8583519393530298


In [63]:
cat_model.save_model('../Files/cat_model.bin')

In [64]:
loaded_model = CatBoostClassifier()
loaded_model.load_model('../Files/cat_model.bin')
loaded_model.fit(train_pool, eval_set=val_pool, verbose=2)

0:	learn: 0.6366766	test: 0.6366857	best: 0.6366857 (0)	total: 5.5s	remaining: 1h 31m 38s
2:	learn: 0.5483135	test: 0.5483425	best: 0.5483425 (2)	total: 7.65s	remaining: 42m 22s
4:	learn: 0.4873472	test: 0.4873972	best: 0.4873972 (4)	total: 9.88s	remaining: 32m 46s
6:	learn: 0.4358618	test: 0.4359404	best: 0.4359404 (6)	total: 13.4s	remaining: 31m 36s
8:	learn: 0.3951242	test: 0.3952551	best: 0.3952551 (8)	total: 16.9s	remaining: 30m 58s
10:	learn: 0.3655918	test: 0.3658047	best: 0.3658047 (10)	total: 20.2s	remaining: 30m 15s
12:	learn: 0.3404294	test: 0.3407176	best: 0.3407176 (12)	total: 23.6s	remaining: 29m 55s
14:	learn: 0.3191448	test: 0.3195833	best: 0.3195833 (14)	total: 27.3s	remaining: 29m 52s
16:	learn: 0.2999630	test: 0.3004349	best: 0.3004349 (16)	total: 30.9s	remaining: 29m 43s
18:	learn: 0.2857266	test: 0.2863691	best: 0.2863691 (18)	total: 34.3s	remaining: 29m 30s


KeyboardInterrupt: 

In [ ]:
cat_model = CatBoostClassifier(random_state=2024,
                               n_estimators=1000,
                               learning_rate=0.01,
                               depth=15,
                               l2_leaf_reg=3,
                               metric_period=1000,
                               task_type='GPU')



cat_model.fit(X_train, y_train, cat_features=cat_features)
print("CatBoost Accuracy:", accuracy_score(y_test, cat_model.predict(X_test)))