In [39]:
import numpy as np
import pandas as pd
import gc
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, precision_recall_curve, roc_curve, average_precision_score
import warnings

In [2]:
import lightgbm as lgb
from lightgbm import LGBMClassifier

In [4]:
bankdata  = pd.read_csv("cleanbankdata.csv")
testdata = pd.read_csv("cleantestdata.csv")
data = pd.concat([bankdata,testdata]).reset_index(drop=True)

In [19]:
data = data.iloc[:,1:]

In [20]:
data

Unnamed: 0,loan_id,user_id,total_loan,year_of_loan,interest,monthly_payment,class,employer_type,industry,work_year,...,f3,f4,early_return,early_return_amount,early_return_amount_3mon,isDefault,issue_date_month,issue_date_dayofweek,earliesCreditMon,earliesCreditYear
0,1040418,240418,31818.181820,3,11.466,1174.91,3,3,13,3.0,...,5.0,4.0,3,9927,0.000000,0.0,10,5,12,2001
1,1025197,225197,28000.000000,5,16.841,670.69,3,3,13,10.0,...,45.0,22.0,0,0,0.000000,0.0,6,5,4,1990
2,1009360,209360,17272.727270,3,8.900,603.32,1,3,3,10.0,...,28.0,19.0,0,0,0.000000,0.0,1,2,10,1991
3,1039708,239708,20000.000000,3,4.788,602.30,1,1,10,6.0,...,15.0,9.0,0,0,0.000000,0.0,7,2,6,2001
4,1027483,227483,15272.727270,3,12.790,470.31,3,3,2,0.0,...,15.0,4.0,0,0,0.000000,0.0,7,4,5,2001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14993,1008856,208856,9454.545455,5,12.015,183.47,3,3,2,0.0,...,14.0,8.0,0,0,0.000000,,1,5,7,2001
14994,1016651,216651,5500.000000,3,7.970,172.28,1,3,8,5.0,...,5.0,3.0,3,1564,0.000000,,5,0,4,2001
14995,1024140,224140,30545.454550,3,8.900,889.09,1,0,8,10.0,...,20.0,14.0,2,5456,1510.892308,,12,6,10,1986
14996,1014316,214316,4090.909091,3,6.030,152.18,1,3,10,10.0,...,10.0,10.0,3,223,41.169231,,9,5,3,1999


In [30]:
train = data[data["isDefault"].notna()]
test = data[data["isDefault"].isna()]
y = train["isDefault"]

In [31]:
features = [f for f in train.columns if f not in ['loan_id', 'user_id', 'isDefault']]
features

['total_loan',
 'year_of_loan',
 'interest',
 'monthly_payment',
 'class',
 'employer_type',
 'industry',
 'work_year',
 'house_exist',
 'censor_status',
 'use',
 'post_code',
 'region',
 'debt_loan_ratio',
 'del_in_18month',
 'scoring_low',
 'scoring_high',
 'pub_dero_bankrup',
 'recircle_b',
 'recircle_u',
 'initial_list_status',
 'title',
 'policy_code',
 'f0',
 'f1',
 'f2',
 'f3',
 'f4',
 'early_return',
 'early_return_amount',
 'early_return_amount_3mon',
 'issue_date_month',
 'issue_date_dayofweek',
 'earliesCreditMon',
 'earliesCreditYear']

In [35]:
ordata_preds = np.zeros(train.shape[0])
preds = np.zeros(test.shape[0])

In [36]:
kfold = StratifiedKFold(n_splits = 5,shuffle=True ,random_state=0)

In [60]:
for k, (train_index, valuation_index) in enumerate(kfold.split(train,y)):
    train_x, train_y = train[features].iloc[train_index], y.iloc[train_index]
    valuation_x, valuation_y = train[features].iloc[valuation_index], y.iloc[valuation_index]
    lgbmodel = LGBMClassifier(n_estimators=2500,
                              learning_rate=0.08,
                              num_leaves= 20,
                              colsample_bytree=0.65,
                              subsample=0.9,
                              max_depth=10,
                              reg_alpha=0.3,
                              reg_lambda=0.3,           
                              min_split_gain=0.01,
                              min_child_weight=2,
                              silent=-1,
                              verbose=-1)
    lgbmodel.fit(train_x,train_y)
    ordata_preds[valuation_index]  = lgbmodel.predict(valuation_x)
    print("Fold %2d AUC : %.6f" % (k + 1, roc_auc_score(valuation_y, ordata_preds[valuation_index])))
    preds  += lgbmodel.predict(test[features]) / kfold.n_splits
    del lgbmodel, train_x, train_y, valuation_x, valuation_y 
    gc.collect()
print(" Full AUC score %.6f" % roc_auc_score(y, ordata_preds)) 



Fold  1 AUC : 0.637463




Fold  2 AUC : 0.637491




Fold  3 AUC : 0.656655




Fold  4 AUC : 0.669864




Fold  5 AUC : 0.674359
 Full AUC score 0.655162
