In [94]:
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import joblib
import warnings
import numpy as np
import pandas as pd
import xgboost as xgb

warnings.filterwarnings("ignore")

In [106]:
original = pd.read_csv('../Database/train.csv')
original_defg = original.loc[original['대출등급'].isin(['D','E', 'F', 'G']), :]
abcd = False

In [107]:
if abcd:
    train = pd.read_csv('../Database/train_abcd.csv', index_col='ID')
    test = pd.read_csv('../Database/test_abcd.csv', index_col='ID')

    X = train.drop(columns=['대출등급'])
    y = train['대출등급']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    X_test_XG = xgb.DMatrix(X_test)
    test_XG = xgb.DMatrix(test)

    lgb_model = joblib.load('../Files/lgb_model_abcd.pkl')
    xgb_model = joblib.load('../Files/xgb_model_abcd.pkl')
    cat_model = joblib.load('../Files/cat_model_abcd.pkl')

In [108]:
if not abcd:
    train = pd.read_csv('../Database/train_defg.csv', index_col='ID')
    test = pd.read_csv('../Database/test_defg.csv', index_col='ID')

    X = train.drop(columns=['대출등급'])
    y = train['대출등급']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    X_test_XG = xgb.DMatrix(X_test)
    test_XG = xgb.DMatrix(test)

    lgb_model = joblib.load('../Files/lgb_model_defg.pkl')
    xgb_model = joblib.load('../Files/xgb_model_defg.pkl')
    cat_model = joblib.load('../Files/cat_model_defg.pkl')

In [109]:
proba1 = lgb_model.predict(X_test, num_iteration=lgb_model.best_iteration)
proba2 = xgb_model.predict(X_test_XG)
proba3 = cat_model.predict_proba(X_test)

lgb_result = [int(pred.argmax()) for pred in proba1]
xgb_result = np.argmax(proba2, axis=1)
cat_result = np.argmax(proba3, axis=1)

print("lightGBM Accuracy:", accuracy_score(y_test, lgb_result))
print("XGBoost Accuracy:", accuracy_score(y_test, xgb_result))
print("CatBoost Accuracy:", accuracy_score(y_test, cat_result))

average_proba = (proba2 + proba3) / 2
average_proba = (proba1 + average_proba) / 2
soft_voting_result = np.argmax(average_proba, axis=1)
print("Soft Voting Accuracy:", accuracy_score(y_test, soft_voting_result))

proba1 = lgb_model.predict(test, num_iteration=lgb_model.best_iteration)
proba2 = xgb_model.predict(test_XG)
proba3 = cat_model.predict_proba(test)

average_proba = (proba2 + proba3) / 2
average_proba = (proba1 + average_proba) / 2
soft_voting_result = np.argmax(average_proba, axis=1)

lightGBM Accuracy: 0.863114576564869
XGBoost Accuracy: 0.8531513970110461
CatBoost Accuracy: 0.8538011695906432
Soft Voting Accuracy: 0.8654970760233918


In [111]:
if abcd:
    label_encoder = LabelEncoder()
    encoded_data = label_encoder.fit_transform(original['대출등급'])
    decoded_data = label_encoder.inverse_transform(soft_voting_result)
    
    test['대출등급'] = decoded_data
    answer_abcd=pd.DataFrame(test['대출등급'].loc[test['대출등급'].isin(['A', 'B', 'C'])])
    answer_abcd.to_csv('../Files/answer_abcd.csv')
    
    test_defg = test[test['대출등급'] == 'D']
    test_defg.drop(columns='대출등급', inplace=True)
    test_defg.to_csv('../Database/test_defg.csv')

In [113]:
if not abcd:
    label_encoder = LabelEncoder()
    encoded_data = label_encoder.fit_transform(original_defg['대출등급'])
    decoded_data = label_encoder.inverse_transform(soft_voting_result)
    
    answer_abcd= pd.read_csv('../Files/answer_abcd.csv')
    answer_defg = pd.DataFrame(decoded_data)
    answer_defg.reset_index(inplace=True)

    answer=pd.concat([answer_defg, answer_abcd], axis=0)
    answer.index=answer['ID']
    answer.drop(columns='ID', inplace=True)
    answer.sort_index(inplace=True)
    answer.to_csv('../Files/answer.csv')