In [1]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import joblib
import warnings
import numpy as np
import pandas as pd
import xgboost as xgb

In [2]:
warnings.filterwarnings("ignore")
original = pd.read_csv('../Database/train.csv')
train = pd.read_csv('../Database/train_preprocessed.csv', index_col='ID')
test = pd.read_csv('../Database/test_preprocessed.csv', index_col='ID')
X = train.drop(columns=['대출등급'])
y = train['대출등급']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_test_XG = xgb.DMatrix(X_test)
test_XG = xgb.DMatrix(test)

In [4]:
gbm_model = joblib.load('../Files/gbm_model.pkl')
xgb_model = joblib.load('../Files/xgb_model.pkl')
# cat_model = joblib.load('../Files/cat_model.pkl')

proba1 = gbm_model.predict(X_test, num_iteration=gbm_model.best_iteration)
proba2 = xgb_model.predict(X_test_XG)
# proba3 = cat_model.predict_proba(X_test)

gbm_result = [int(pred.argmax()) for pred in proba1]
xgb_result = np.argmax(proba2, axis=1)
# cat_result = np.argmax(proba3, axis=1)

print("lightGBM Accuracy:", accuracy_score(y_test, gbm_result))
print("XGBoost Accuracy:", accuracy_score(y_test, xgb_result))
# print("CatBoost Accuracy:", accuracy_score(y_test, cat_result))

average_proba = (proba1 + proba2) / 2
soft_voting_result = np.argmax(average_proba, axis=1)
print("Soft Voting Accuracy:", accuracy_score(y_test, soft_voting_result))

lightGBM Accuracy: 0.8538345708499923
XGBoost Accuracy: 0.8504076016407913
Soft Voting Accuracy: 0.8560153694376655


In [ ]:
proba1 = gbm_model.predict(test, num_iteration=gbm_model.best_iteration)
proba2 = xgb_model.predict(test_XG)
proba3 = cat_model.predict_proba(test)

average_proba = (proba1 + proba2 + proba3) / 3
soft_voting_result = np.argmax(average_proba, axis=1)

label_encoder = LabelEncoder()
encoded_data = label_encoder.fit_transform(original['대출등급'])
decoded_data = label_encoder.inverse_transform(soft_voting_result)

answer = pd.read_csv('../Database/sample_submission.csv')
answer['대출등급'] = decoded_data
answer.to_csv('../Files/answer.csv', index=None)