In [2]:
import pandas as pd
import numpy as np
import random
import os

def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

seed_everything()

from sklearn.preprocessing import LabelEncoder, OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.feature_selection import RFECV

import warnings
warnings.filterwarnings('ignore')

train_raw = pd.read_csv('train.csv')
test_raw = pd.read_csv('test.csv')

def preprocess(train, test):

    ############################################### train 전처리 ###############################################
    train = train.drop(columns=['ID'])
    train['대출기간'] = train['대출기간'].str.replace('[^0-9]', '', regex=True)
    train['대출기간'] = (train['대출기간'].astype(int)) // 12

    # 범주형 변수 인코딩
    le = LabelEncoder()
    train['대출등급'] = le.fit_transform(train['대출등급'])

    # 파생변수
    train['총상환원금/총상환이자'] = train['총상환원금'] / (train['총상환이자'] + 1)
    train['상환비율'] = train['총상환원금'] / train['대출금액']


    ############################################### test 전처리 ###############################################
    test = test.drop(columns=['ID'])
    test['대출기간'] = test['대출기간'].str.replace('[^0-9]', '', regex=True)
    test['대출기간'] = (test['대출기간'].astype(int)) // 12

    # 파생변수
    test['총상환원금/총상환이자'] = test['총상환원금'] / (test['총상환이자'] + 1)
    test['상환비율'] = test['총상환원금'] / test['대출금액']

    return train, test

train, test = preprocess(train_raw, test_raw)


In [3]:
train_raw = pd.read_csv('train.csv')
test_raw = pd.read_csv('test.csv')

In [4]:
train,test = preprocess(train_raw, test_raw)

In [5]:
X = train[['대출기간','총상환원금/총상환이자','상환비율']]; y = train['대출등급']
test = test[['대출기간','총상환원금/총상환이자','상환비율']]

In [6]:
rf = RandomForestClassifier(random_state = 42
                         , n_estimators = 305
                         , criterion = 'gini'
                         , max_depth = 62
                         , min_samples_split = 7
                         , min_samples_leaf = 1)
dt = DecisionTreeClassifier(random_state = 42
                         , criterion = 'entropy'
                         , max_depth = 25
                         , min_samples_split = 2
                         , min_samples_leaf = 1)
et = ExtraTreesClassifier(random_state = 42
                         , n_estimators = 930
                         , criterion = 'entropy'
                         , max_depth = 65
                         , min_samples_split = 6
                         , min_samples_leaf = 1
                         )
xgb = XGBClassifier(random_state = 42
                   , n_estimators = 665
                   , reg_lambda = 0.04614513317156364
                   , reg_alpha = 0.8831857977740336
                   , tree_method = "exact"
                   , colsample_bytree = 0.7664006730032823
                   , subsample = 0.6579847353498132
                   , learning_rate = 0.4046062291148477
                   , max_depth = 64
                   , min_child_weight = 2
                   )

In [7]:
scale = StandardScaler()
X = scale.fit_transform(X)
test = scale.transform(test)

In [8]:
estimators = [('et',et), ('xgb',xgb), ('dt',dt), ('rf',rf)]
stack = StackingClassifier(estimators, final_estimator=LogisticRegression(), verbose=1)
stack.fit(X,y)
pred = stack.predict(test)

In [9]:
sub = pd.read_csv('sample_submission.csv')
sub['대출등급'] = pred
sub['대출등급'] = sub['대출등급'].map({0:'A',1:'B',2:'C',3:'D',4:'E',5:'F',6:'G'})
sub.head()

Unnamed: 0,ID,대출등급
0,TEST_00000,B
1,TEST_00001,B
2,TEST_00002,A
3,TEST_00003,C
4,TEST_00004,C
