In [1]:
from catboost import Pool
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

import warnings
import pandas as pd

warnings.filterwarnings("ignore")

In [2]:
train = pd.read_csv('../Database/train_modified.csv', index_col='ID')
X = train.drop(columns=['대출등급'])
y = train['대출등급']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
cat_features=[i for i in range(9,27)]

train_pool = Pool(data=X_train, label=y_train, cat_features=cat_features)
val_pool = Pool(data=X_test, label=y_test, cat_features=cat_features)

In [61]:
cat_model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.1,
    depth = 10,
    l2_leaf_reg=2,
    task_type = 'CPU',
    loss_function = 'MultiClassOneVsAll',
    boosting_type = 'Ordered'
)
cat_model.fit(train_pool, eval_set=val_pool, verbose=2)

0:	learn: 0.6366766	test: 0.6366857	best: 0.6366857 (0)	total: 5.68s	remaining: 1h 34m 39s
2:	learn: 0.5483135	test: 0.5483425	best: 0.5483425 (2)	total: 7.87s	remaining: 43m 34s
4:	learn: 0.4873472	test: 0.4873972	best: 0.4873972 (4)	total: 10.2s	remaining: 33m 54s
6:	learn: 0.4358618	test: 0.4359404	best: 0.4359404 (6)	total: 13.7s	remaining: 32m 22s
8:	learn: 0.3951242	test: 0.3952551	best: 0.3952551 (8)	total: 17.3s	remaining: 31m 47s
10:	learn: 0.3655918	test: 0.3658047	best: 0.3658047 (10)	total: 21s	remaining: 31m 30s
12:	learn: 0.3404294	test: 0.3407176	best: 0.3407176 (12)	total: 24.5s	remaining: 30m 56s
14:	learn: 0.3191448	test: 0.3195833	best: 0.3195833 (14)	total: 27.9s	remaining: 30m 35s
16:	learn: 0.2999630	test: 0.3004349	best: 0.3004349 (16)	total: 31.3s	remaining: 30m 12s
18:	learn: 0.2857266	test: 0.2863691	best: 0.2863691 (18)	total: 34.9s	remaining: 30m 1s
20:	learn: 0.2743932	test: 0.2750937	best: 0.2750937 (20)	total: 38.2s	remaining: 29m 40s
22:	learn: 0.2641139

<catboost.core.CatBoostClassifier at 0x1bf2b4ff690>

In [62]:
print("CatBoost Accuracy:", accuracy_score(y_test, cat_model.predict(X_test)))

CatBoost Accuracy: 0.8583519393530298


In [63]:
cat_model.save_model('../Files/cat_model.bin')

In [64]:
loaded_model = CatBoostClassifier()
loaded_model.load_model('../Files/cat_model.bin')
loaded_model.fit(train_pool, eval_set=val_pool, verbose=2)

0:	learn: 0.6366766	test: 0.6366857	best: 0.6366857 (0)	total: 5.5s	remaining: 1h 31m 38s
2:	learn: 0.5483135	test: 0.5483425	best: 0.5483425 (2)	total: 7.65s	remaining: 42m 22s
4:	learn: 0.4873472	test: 0.4873972	best: 0.4873972 (4)	total: 9.88s	remaining: 32m 46s
6:	learn: 0.4358618	test: 0.4359404	best: 0.4359404 (6)	total: 13.4s	remaining: 31m 36s
8:	learn: 0.3951242	test: 0.3952551	best: 0.3952551 (8)	total: 16.9s	remaining: 30m 58s
10:	learn: 0.3655918	test: 0.3658047	best: 0.3658047 (10)	total: 20.2s	remaining: 30m 15s
12:	learn: 0.3404294	test: 0.3407176	best: 0.3407176 (12)	total: 23.6s	remaining: 29m 55s
14:	learn: 0.3191448	test: 0.3195833	best: 0.3195833 (14)	total: 27.3s	remaining: 29m 52s
16:	learn: 0.2999630	test: 0.3004349	best: 0.3004349 (16)	total: 30.9s	remaining: 29m 43s
18:	learn: 0.2857266	test: 0.2863691	best: 0.2863691 (18)	total: 34.3s	remaining: 29m 30s


KeyboardInterrupt: 

In [ ]:
cat_model = CatBoostClassifier(random_state=2024,
                               n_estimators=1000,
                               learning_rate=0.01,
                               depth=15,
                               l2_leaf_reg=3,
                               metric_period=1000,
                               task_type='GPU')



cat_model.fit(X_train, y_train, cat_features=cat_features)
print("CatBoost Accuracy:", accuracy_score(y_test, cat_model.predict(X_test)))