In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import time
import lightgbm as lgb
from lightgbm import early_stopping, log_evaluation

## <b>1.  Importing Data to Python</font></b>
---

In [3]:
feature_application_train = pd.read_csv('D:/Code Skripsi - RL and Ensemble/data/raw/outlier_application_train.csv')

In [4]:
feature_application_train.shape

(307511, 45)

In [5]:
feature_application_train.head()

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,...,ACTIVE_LOANS,OVERDUE_MONTHS,NUM_PREV_LOANS,SUM_PREV_CREDIT,AVG_PREV_CREDIT,RECENT_LOAN_DAYS,MONTHS_BALANCE_min,MONTHS_BALANCE_max,AMT_PAYMENT,DAYS_ENTRY_PAYMENT
0,100002,1,Cash loans,N,Y,0,41947875.0,84226670.0,5116708.575,72709650.0,...,2.0,65.0,1.0,37091240.0,37091240.0,-606.0,-19.0,-1.0,2394498.0,0
1,100003,0,Cash loans,N,N,0,55930500.0,267949000.0,7394944.275,233975925.0,...,1.0,0.0,3.0,300900500.0,100300200.0,-2341.0,-77.0,-18.0,16273390.0,0
2,100004,0,Revolving loans,Y,Y,0,13982625.0,27965250.0,1398262.5,27965250.0,...,0.0,0.0,1.0,4164958.0,4164958.0,-815.0,-27.0,-24.0,1469969.0,0
3,100006,0,Cash loans,N,Y,0,27965250.0,64772180.0,6149558.475,61523550.0,...,0.713354,0.28985,9.0,543822500.0,60424720.0,-617.0,-20.0,-1.0,36806040.0,0
4,100007,0,Cash loans,N,Y,0,25168725.0,106268000.0,4529438.325,106267950.0,...,0.0,0.0,6.0,207115300.0,34519220.0,-2357.0,-77.0,-1.0,2417760.0,0


## <b>2.  Financial Ratios</font></b>
---

In [6]:
# Financial Ratios

feature_application_train['CREDIT_INCOME_RATIO'] = feature_application_train['AMT_CREDIT'] / feature_application_train['AMT_INCOME_TOTAL']
feature_application_train['ANNUITY_INCOME_RATIO'] = feature_application_train['AMT_ANNUITY'] / feature_application_train['AMT_INCOME_TOTAL']
feature_application_train['CREDIT_ANNUITY_RATIO'] = feature_application_train['AMT_CREDIT'] / feature_application_train['AMT_ANNUITY']
feature_application_train['CREDIT_GOODS_RATIO'] = feature_application_train['AMT_CREDIT'] / feature_application_train['AMT_GOODS_PRICE']
feature_application_train['ANNUITY_GOODS_RATIO'] = feature_application_train['AMT_ANNUITY'] / feature_application_train['AMT_GOODS_PRICE']
feature_application_train['PAYMENT_CREDIT_RATIO'] = feature_application_train['AMT_PAYMENT'] / feature_application_train['AMT_CREDIT']

## <b>3.  Age & Employment Features</font></b>
---

In [7]:
# Age & Employment Features

feature_application_train['YEARS_EMPLOYED'] = feature_application_train['DAYS_EMPLOYED'] / -365
feature_application_train['YEARS_BIRTH'] = feature_application_train['DAYS_BIRTH'] / -365
feature_application_train['EMPLOYMENT_TO_AGE_RATIO'] = feature_application_train['YEARS_EMPLOYED'] / feature_application_train['YEARS_BIRTH']
feature_application_train['CREDIT_TO_EMPLOYMENT_RATIO'] = feature_application_train['AMT_CREDIT'] / (feature_application_train['YEARS_EMPLOYED'] + 1)
feature_application_train['CREDIT_TO_AGE_RATIO'] = feature_application_train['AMT_CREDIT'] / feature_application_train['YEARS_BIRTH']

## <b>4.  Family Responsibility Indicators</font></b>
---

In [8]:
# Family Responsibility Indicators

feature_application_train['INCOME_PER_FAMILY_MEMBER'] = feature_application_train['AMT_INCOME_TOTAL'] / (feature_application_train['CNT_FAM_MEMBERS'] + 1)
feature_application_train['CHILD_DEPENDENT_RATIO'] = feature_application_train['CNT_CHILDREN'] / (feature_application_train['CNT_FAM_MEMBERS'] + 1)
feature_application_train['HAS_CHILDREN'] = (feature_application_train['CNT_CHILDREN'] > 0).astype(int)

## <b>5.  Housing & Ownership Stability</font></b>
---


In [11]:
# # Housing & Ownership Stability
#
# feature_application_train['HAS_CAR_AND_REALTY'] = (feature_application_train['FLAG_OWN_CAR'] & feature_application_train['FLAG_OWN_REALTY']).astype(int)
# feature_application_train['NO_PROPERTY_OR_CAR'] = ((feature_application_train['FLAG_OWN_CAR'] == 0) & (feature_application_train['FLAG_OWN_REALTY'] == 0)).astype(int)
# feature_application_train['HOUSING_STABILITY'] = feature_application_train['YEARS_REGISTRATION'] / feature_application_train['YEARS_BIRTH']

In [16]:
feature_application_train.shape #NAMBAH LOAN TENURE (307511, 60) -> JADI (307511, 48) setelah seleksi kolom manual

(307511, 59)

In [15]:
# feature_application_train.to_csv("feature_application_train.csv", index=False)