# Stacking Test-Sklearn, XGBoost, CatBoost, LightGBM
* 참고 Notebook : https://www.kaggle.com/eliotbarr/stacking-test-sklearn-xgboost-catboost-lightgbm

In [9]:
import pandas as pd
import numpy as np
import gc
import xgboost as xgb

from sklearn.model_selection import KFold
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier

from math import sqrt
from scipy.stats import skew

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


# Global Variable

In [11]:
NFOLDS = 3
SEED = 0 
NROWS = None

# Data Load

In [24]:
train = pd.read_csv('home_credit_data/application_train.csv')
test = pd.read_csv('home_credit_data/application_test.csv')
prev = pd.read_csv('home_credit_data/previous_application.csv')

print(train.shape)
print(test.shape)
print(prev.shape)

(307511, 122)
(48744, 121)
(1670214, 37)


In [25]:
categorical_features = [col for col in train.columns if train[col].dtype == 'object']
categorical_features

['NAME_CONTRACT_TYPE',
 'CODE_GENDER',
 'FLAG_OWN_CAR',
 'FLAG_OWN_REALTY',
 'NAME_TYPE_SUITE',
 'NAME_INCOME_TYPE',
 'NAME_EDUCATION_TYPE',
 'NAME_FAMILY_STATUS',
 'NAME_HOUSING_TYPE',
 'OCCUPATION_TYPE',
 'WEEKDAY_APPR_PROCESS_START',
 'ORGANIZATION_TYPE',
 'FONDKAPREMONT_MODE',
 'HOUSETYPE_MODE',
 'WALLSMATERIAL_MODE',
 'EMERGENCYSTATE_MODE']

In [26]:
train['NAME_CONTRACT_TYPE'].head()

0         Cash loans
1         Cash loans
2    Revolving loans
3         Cash loans
4         Cash loans
Name: NAME_CONTRACT_TYPE, dtype: object

In [33]:
# 카테고리 컬럼을 숫자로 변환
for x in categorical_features:
    train[x], indexer = pd.factorize(train[x])
    test[x] = indexer.get_indexer(test[x])
    
gc.enable()

train['NAME_CONTRACT_TYPE'].head()

0    0
1    0
2    1
3    0
4    0
Name: NAME_CONTRACT_TYPE, dtype: int64

In [34]:
y_train = train['TARGET']
del train['TARGET']

print(train.shape)

(307511, 121)


In [35]:
prev_cat_features = [col for col in prev.columns if prev[col].dtype == 'object']
prev_cat_features

['NAME_CONTRACT_TYPE',
 'WEEKDAY_APPR_PROCESS_START',
 'FLAG_LAST_APPL_PER_CONTRACT',
 'NAME_CASH_LOAN_PURPOSE',
 'NAME_CONTRACT_STATUS',
 'NAME_PAYMENT_TYPE',
 'CODE_REJECT_REASON',
 'NAME_TYPE_SUITE',
 'NAME_CLIENT_TYPE',
 'NAME_GOODS_CATEGORY',
 'NAME_PORTFOLIO',
 'NAME_PRODUCT_TYPE',
 'CHANNEL_TYPE',
 'NAME_SELLER_INDUSTRY',
 'NAME_YIELD_GROUP',
 'PRODUCT_COMBINATION']

In [40]:
for x in prev_cat_features:
    prev[x], _ = pd.factorize(prev[x])

prev['NAME_CONTRACT_TYPE'].head()

0    0
1    1
2    1
3    1
4    1
Name: NAME_CONTRACT_TYPE, dtype: int64