In [1]:
import gc
import time
from datetime import date
import pandas as pd
import pandas_profiling as pdp
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.metrics import roc_auc_score, precision_recall_curve, roc_curve, average_precision_score
from lightgbm import LGBMClassifier

## データ読込

In [3]:
df_train = pd.read_csv("./data/train.csv")
df_test = pd.read_csv("./data/test.csv")
df_all = pd.concat([df_train, df_test], sort=False).reset_index(drop=True)
len_train = len(df_train)

## データ探索

In [4]:
profile = pdp.ProfileReport(df_train, check_correlation=False)
profile.to_file(outputfile="output_df_train.html")

In [5]:
profile

0,1
Number of variables,18
Number of observations,27128
Total Missing (%),0.0%
Total size in memory,3.7 MiB
Average record size in memory,144.0 B

0,1
Numeric,8
Categorical,9
Boolean,1
Date,0
Text (Unique),0
Rejected,0
Unsupported,0

0,1
Distinct count,77
Unique (%),0.3%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,40.951
Minimum,18
Maximum,95
Zeros (%),0.0%

0,1
Minimum,18
5-th percentile,27
Q1,33
Median,39
Q3,48
95-th percentile,59
Maximum,95
Range,77
Interquartile range,15

0,1
Standard deviation,10.609
Coef of variation,0.25905
Kurtosis,0.28431
Mean,40.951
MAD,8.7421
Skewness,0.67179
Sum,1110919
Variance,112.54
Memory size,212.0 KiB

Value,Count,Frequency (%),Unnamed: 3
32,1232,4.5%,
33,1211,4.5%,
31,1205,4.4%,
35,1175,4.3%,
34,1129,4.2%,
36,1081,4.0%,
30,1047,3.9%,
37,988,3.6%,
38,886,3.3%,
39,882,3.3%,

Value,Count,Frequency (%),Unnamed: 3
18,7,0.0%,
19,25,0.1%,
20,28,0.1%,
21,40,0.1%,
22,84,0.3%,

Value,Count,Frequency (%),Unnamed: 3
90,1,0.0%,
92,2,0.0%,
93,1,0.0%,
94,1,0.0%,
95,1,0.0%,

0,1
Distinct count,5890
Unique (%),21.7%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,1355.8
Minimum,-6847
Maximum,102127
Zeros (%),7.7%

0,1
Minimum,-6847
5-th percentile,-176
Q1,72
Median,449
Q3,1428
95-th percentile,5704
Maximum,102127
Range,108974
Interquartile range,1356

0,1
Standard deviation,3003.3
Coef of variation,2.2152
Kurtosis,121.22
Mean,1355.8
MAD,1542.1
Skewness,7.7954
Sum,36780166
Variance,9019800
Memory size,212.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0,2081,7.7%,
1,125,0.5%,
3,97,0.4%,
2,83,0.3%,
4,79,0.3%,
5,64,0.2%,
6,56,0.2%,
23,48,0.2%,
7,48,0.2%,
8,48,0.2%,

Value,Count,Frequency (%),Unnamed: 3
-6847,1,0.0%,
-3313,1,0.0%,
-3058,1,0.0%,
-2712,1,0.0%,
-2604,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
58932,1,0.0%,
59649,1,0.0%,
64343,1,0.0%,
66721,1,0.0%,
102127,1,0.0%,

0,1
Distinct count,48
Unique (%),0.2%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,2.7518
Minimum,1
Maximum,63
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,1
Q1,1
Median,2
Q3,3
95-th percentile,8
Maximum,63
Range,62
Interquartile range,2

0,1
Standard deviation,3.1266
Coef of variation,1.1362
Kurtosis,44.137
Mean,2.7518
MAD,1.7843
Skewness,5.1532
Sum,74650
Variance,9.7756
Memory size,212.0 KiB

Value,Count,Frequency (%),Unnamed: 3
1,10555,38.9%,
2,7599,28.0%,
3,3295,12.1%,
4,2070,7.6%,
5,1021,3.8%,
6,762,2.8%,
7,445,1.6%,
8,319,1.2%,
9,195,0.7%,
10,161,0.6%,

Value,Count,Frequency (%),Unnamed: 3
1,10555,38.9%,
2,7599,28.0%,
3,3295,12.1%,
4,2070,7.6%,
5,1021,3.8%,

Value,Count,Frequency (%),Unnamed: 3
50,1,0.0%,
51,1,0.0%,
55,1,0.0%,
58,1,0.0%,
63,1,0.0%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
cellular,17580
unknown,7861
telephone,1687

Value,Count,Frequency (%),Unnamed: 3
cellular,17580,64.8%,
unknown,7861,29.0%,
telephone,1687,6.2%,

0,1
Distinct count,31
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,15.806
Minimum,1
Maximum,31
Zeros (%),0.0%

0,1
Minimum,1
5-th percentile,3
Q1,8
Median,16
Q3,21
95-th percentile,29
Maximum,31
Range,30
Interquartile range,13

0,1
Standard deviation,8.3379
Coef of variation,0.52751
Kurtosis,-1.0656
Mean,15.806
MAD,7.0739
Skewness,0.09429
Sum,428791
Variance,69.521
Memory size,212.0 KiB

Value,Count,Frequency (%),Unnamed: 3
20,1612,5.9%,
18,1345,5.0%,
21,1233,4.5%,
17,1192,4.4%,
6,1145,4.2%,
5,1123,4.1%,
8,1099,4.1%,
14,1095,4.0%,
28,1091,4.0%,
7,1091,4.0%,

Value,Count,Frequency (%),Unnamed: 3
1,188,0.7%,
2,781,2.9%,
3,672,2.5%,
4,878,3.2%,
5,1123,4.1%,

Value,Count,Frequency (%),Unnamed: 3
27,689,2.5%,
28,1091,4.0%,
29,1060,3.9%,
30,956,3.5%,
31,378,1.4%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
no,26644
yes,484

Value,Count,Frequency (%),Unnamed: 3
no,26644,98.2%,
yes,484,1.8%,

0,1
Distinct count,1424
Unique (%),5.2%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,260.71
Minimum,0
Maximum,4918
Zeros (%),0.0%

0,1
Minimum,0
5-th percentile,35
Q1,104
Median,182
Q3,323
95-th percentile,757
Maximum,4918
Range,4918
Interquartile range,219

0,1
Standard deviation,260.09
Coef of variation,0.99762
Kurtosis,17.967
Mean,260.71
MAD,172.83
Skewness,3.1261
Sum,7072576
Variance,67648
Memory size,212.0 KiB

Value,Count,Frequency (%),Unnamed: 3
104,114,0.4%,
124,113,0.4%,
119,112,0.4%,
114,111,0.4%,
136,111,0.4%,
122,110,0.4%,
81,109,0.4%,
102,107,0.4%,
88,106,0.4%,
139,105,0.4%,

Value,Count,Frequency (%),Unnamed: 3
0,2,0.0%,
2,2,0.0%,
3,2,0.0%,
4,7,0.0%,
5,21,0.1%,

Value,Count,Frequency (%),Unnamed: 3
3102,1,0.0%,
3284,1,0.0%,
3322,1,0.0%,
3422,1,0.0%,
4918,1,0.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
secondary,13882
tertiary,7959
primary,4150

Value,Count,Frequency (%),Unnamed: 3
secondary,13882,51.2%,
tertiary,7959,29.3%,
primary,4150,15.3%,
unknown,1137,4.2%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
yes,15125
no,12003

Value,Count,Frequency (%),Unnamed: 3
yes,15125,55.8%,
no,12003,44.2%,

0,1
Distinct count,27128
Unique (%),100.0%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,13564
Minimum,1
Maximum,27128
Zeros (%),0.0%

0,1
Minimum,1.0
5-th percentile,1357.4
Q1,6782.8
Median,13564.0
Q3,20346.0
95-th percentile,25772.0
Maximum,27128.0
Range,27127.0
Interquartile range,13564.0

0,1
Standard deviation,7831.3
Coef of variation,0.57734
Kurtosis,-1.2
Mean,13564
MAD,6782
Skewness,0
Sum,367977756
Variance,61330000
Memory size,212.0 KiB

Value,Count,Frequency (%),Unnamed: 3
2047,1,0.0%,
5480,1,0.0%,
1322,1,0.0%,
3371,1,0.0%,
13612,1,0.0%,
15661,1,0.0%,
9518,1,0.0%,
11567,1,0.0%,
21824,1,0.0%,
23873,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
1,1,0.0%,
2,1,0.0%,
3,1,0.0%,
4,1,0.0%,
5,1,0.0%,

Value,Count,Frequency (%),Unnamed: 3
27124,1,0.0%,
27125,1,0.0%,
27126,1,0.0%,
27127,1,0.0%,
27128,1,0.0%,

0,1
Distinct count,12
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
blue-collar,5886
management,5620
technician,4491
Other values (9),11131

Value,Count,Frequency (%),Unnamed: 3
blue-collar,5886,21.7%,
management,5620,20.7%,
technician,4491,16.6%,
admin.,3085,11.4%,
services,2506,9.2%,
retired,1391,5.1%,
self-employed,945,3.5%,
entrepreneur,914,3.4%,
unemployed,790,2.9%,
housemaid,765,2.8%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
no,22788
yes,4340

Value,Count,Frequency (%),Unnamed: 3
no,22788,84.0%,
yes,4340,16.0%,

0,1
Distinct count,3
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
married,16411
single,7662
divorced,3055

Value,Count,Frequency (%),Unnamed: 3
married,16411,60.5%,
single,7662,28.2%,
divorced,3055,11.3%,

0,1
Distinct count,12
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
may,8317
jul,4136
aug,3718
Other values (9),10957

Value,Count,Frequency (%),Unnamed: 3
may,8317,30.7%,
jul,4136,15.2%,
aug,3718,13.7%,
jun,3204,11.8%,
nov,2342,8.6%,
apr,1755,6.5%,
feb,1586,5.8%,
jan,846,3.1%,
oct,439,1.6%,
sep,356,1.3%,

0,1
Distinct count,491
Unique (%),1.8%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,40.528
Minimum,-1
Maximum,871
Zeros (%),0.0%

0,1
Minimum,-1
5-th percentile,-1
Q1,-1
Median,-1
Q3,-1
95-th percentile,318
Maximum,871
Range,872
Interquartile range,0

0,1
Standard deviation,100.38
Coef of variation,2.4769
Kurtosis,6.5687
Mean,40.528
MAD,68.074
Skewness,2.5752
Sum,1099445
Variance,10077
Memory size,212.0 KiB

Value,Count,Frequency (%),Unnamed: 3
-1,22146,81.6%,
182,98,0.4%,
92,89,0.3%,
91,73,0.3%,
183,72,0.3%,
181,66,0.2%,
370,60,0.2%,
184,50,0.2%,
95,47,0.2%,
364,47,0.2%,

Value,Count,Frequency (%),Unnamed: 3
-1,22146,81.6%,
1,12,0.0%,
2,23,0.1%,
3,1,0.0%,
4,2,0.0%,

Value,Count,Frequency (%),Unnamed: 3
808,1,0.0%,
826,1,0.0%,
831,1,0.0%,
838,1,0.0%,
871,1,0.0%,

0,1
Distinct count,4
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
unknown,22150
failure,2969
other,1123

Value,Count,Frequency (%),Unnamed: 3
unknown,22150,81.6%,
failure,2969,10.9%,
other,1123,4.1%,
success,886,3.3%,

0,1
Distinct count,35
Unique (%),0.1%
Missing (%),0.0%
Missing (n),0
Infinite (%),0.0%
Infinite (n),0

0,1
Mean,0.57973
Minimum,0
Maximum,275
Zeros (%),81.6%

0,1
Minimum,0
5-th percentile,0
Q1,0
Median,0
Q3,0
95-th percentile,3
Maximum,275
Range,275
Interquartile range,0

0,1
Standard deviation,2.5037
Coef of variation,4.3186
Kurtosis,5353.7
Mean,0.57973
MAD,0.94653
Skewness,51.74
Sum,15727
Variance,6.2683
Memory size,212.0 KiB

Value,Count,Frequency (%),Unnamed: 3
0,22146,81.6%,
1,1684,6.2%,
2,1273,4.7%,
3,689,2.5%,
4,445,1.6%,
5,264,1.0%,
6,166,0.6%,
7,117,0.4%,
8,82,0.3%,
9,50,0.2%,

Value,Count,Frequency (%),Unnamed: 3
0,22146,81.6%,
1,1684,6.2%,
2,1273,4.7%,
3,689,2.5%,
4,445,1.6%,

Value,Count,Frequency (%),Unnamed: 3
40,1,0.0%,
41,1,0.0%,
51,1,0.0%,
55,1,0.0%,
275,1,0.0%,

0,1
Distinct count,2
Unique (%),0.0%
Missing (%),0.0%
Missing (n),0

0,1
Mean,0.117

0,1
0,23954
1,3174

Value,Count,Frequency (%),Unnamed: 3
0,23954,88.3%,
1,3174,11.7%,

Unnamed: 0,id,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,1,39,blue-collar,married,secondary,no,1756,yes,no,cellular,3,apr,939,1,-1,0,unknown,1
1,2,51,entrepreneur,married,primary,no,1443,no,no,cellular,18,feb,172,10,-1,0,unknown,1
2,3,36,management,single,tertiary,no,436,no,no,cellular,13,apr,567,1,595,2,failure,1
3,4,63,retired,married,secondary,no,474,no,no,cellular,25,jan,423,1,-1,0,unknown,1
4,5,31,management,single,tertiary,no,354,no,no,cellular,30,apr,502,1,9,2,success,1


## データ前処理

In [6]:
# 対数変換
## ヒストグラムでプロットしたときに、分布に偏りがある項目
df_all["log_balance"] = np.log(df_all.balance - df_all.balance.min() + 1)
df_all["log_duration"] = np.log(df_all.duration + 1)
df_all["log_campaign"] = np.log(df_all.campaign + 1)
df_all["log_pdays"] = np.log(df_all.pdays - df_all.pdays.min() + 1)
df_all = df_all.drop(["balance", "duration", "campaign", "pdays"], axis=1)

In [7]:
# month を文字列から数値に変換
month_dict = {"jan": 1, "feb": 2, "mar": 3, "apr": 4, "may": 5, "jun": 6, 
              "jul": 7, "aug": 8, "sep": 9, "oct": 10, "nov": 11, "dec": 12}
df_all["month_int"] = df_all["month"].map(month_dict)

# month と day を datetime に変換
data_datetime = df_all \
    .assign(ymd_str=lambda x: "2014" + "-" + x["month_int"].astype(str) + "-" + x["day"].astype(str)) \
    .assign(datetime=lambda x: pd.to_datetime(x["ymd_str"])) \
    ["datetime"].values

# datetime を int に変換する
index = pd.DatetimeIndex(data_datetime)
df_all["datetime_int"] = np.log(index.astype(np.int64))

# 不要な列を削除
df_all = df_all.drop(["month", "day", "month_int"], axis=1)
del data_datetime
del index

In [8]:
# One Hot Encoding
cat_cols = ["job", "marital", "education", "default", "housing", "loan", "contact", "poutcome"]
df_dummy = pd.get_dummies(df_all[cat_cols])

In [9]:
# 分析に必要な列を結合
df_tmp = df_all[["age", "datetime_int", "log_balance", "log_duration", "log_campaign", "log_pdays", "y"]]
df = pd.concat([df_tmp, df_dummy], axis=1)

## モデリング

lightGBM を試す  
Kaggle の [Home Credit Default Risk](https://www.kaggle.com/ogrellier/good-fun-with-ligthgbm/code) の kernel を参考にして実装

In [10]:
gc.enable()

In [11]:
# Create Folds
folds = KFold(n_splits=5, shuffle=True, random_state=546789)

In [12]:
data = df[:len_train].drop(["y"], axis=1)
test = df[len_train:].drop(["y"], axis=1)
y = df[:len_train]["y"]

In [13]:
def train_model(data_, test_, y_, folds_):

    oof_preds = np.zeros(data_.shape[0])
    sub_preds = np.zeros(test_.shape[0])
    
    feature_importance_df = pd.DataFrame()
    
    feats = [f for f in data_.columns]
    
    for n_fold, (trn_idx, val_idx) in enumerate(folds_.split(data_)):
        trn_x, trn_y = data_[feats].iloc[trn_idx], y_.iloc[trn_idx]
        val_x, val_y = data_[feats].iloc[val_idx], y_.iloc[val_idx]
        
        clf = LGBMClassifier(
            n_estimators=4000,
            learning_rate=0.03,
            num_leaves=30,
            colsample_bytree=.8,
            subsample=.9,
            max_depth=7,
            reg_alpha=.1,
            reg_lambda=.1,
            min_split_gain=.01,
            min_child_weight=2,
            silent=-1,
            verbose=-1,
        )
        
        clf.fit(trn_x, trn_y, 
                eval_set= [(trn_x, trn_y), (val_x, val_y)], 
                eval_metric='auc', verbose=100, early_stopping_rounds=100  #30
               )
        
        oof_preds[val_idx] = clf.predict_proba(val_x, num_iteration=clf.best_iteration_)[:, 1]
        sub_preds += clf.predict_proba(test_[feats], num_iteration=clf.best_iteration_)[:, 1] / folds_.n_splits
        
        fold_importance_df = pd.DataFrame()
        fold_importance_df["feature"] = feats
        fold_importance_df["importance"] = clf.feature_importances_
        fold_importance_df["fold"] = n_fold + 1
        feature_importance_df = pd.concat([feature_importance_df, fold_importance_df], axis=0)
        
        print('Fold %2d AUC : %.6f' % (n_fold + 1, roc_auc_score(val_y, oof_preds[val_idx])))
        del clf, trn_x, trn_y, val_x, val_y
        gc.collect()
        
    print('Full AUC score %.6f' % roc_auc_score(y, oof_preds)) 

    return oof_preds, sub_preds, feature_importance_df

In [14]:
def display_importances(feature_importance_df_):
    # Plot feature importances
    cols = feature_importance_df_[["feature", "importance"]].groupby("feature").mean().sort_values(
        by="importance", ascending=False)[:50].index
    
    best_features = feature_importance_df_.loc[feature_importance_df_.feature.isin(cols)]
    
    plt.figure(figsize=(8,10))
    sns.barplot(x="importance", y="feature", 
                data=best_features.sort_values(by="importance", ascending=False))
    plt.title('LightGBM Features (avg over folds)')
    plt.tight_layout()
    plt.savefig('lgbm_importances.png')

In [15]:
def display_roc_curve(y_, oof_preds_, folds_idx_):
    # Plot ROC curves
    plt.figure(figsize=(6,6))
    scores = [] 
    for n_fold, (_, val_idx) in enumerate(folds_idx_):  
        # Plot the roc curve
        fpr, tpr, thresholds = roc_curve(y_.iloc[val_idx], oof_preds_[val_idx])
        score = roc_auc_score(y_.iloc[val_idx], oof_preds_[val_idx])
        scores.append(score)
        plt.plot(fpr, tpr, lw=1, alpha=0.3, label='ROC fold %d (AUC = %0.4f)' % (n_fold + 1, score))
    
    plt.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Luck', alpha=.8)
    fpr, tpr, thresholds = roc_curve(y_, oof_preds_)
    score = roc_auc_score(y_, oof_preds_)
    plt.plot(fpr, tpr, color='b',
             label='Avg ROC (AUC = %0.4f $\pm$ %0.4f)' % (score, np.std(scores)),
             lw=2, alpha=.8)
    
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('LightGBM ROC Curve')
    plt.legend(loc="lower right")
    plt.tight_layout()
    
    plt.savefig('roc_curve.png')

In [16]:
def display_precision_recall(y_, oof_preds_, folds_idx_):
    # Plot ROC curves
    plt.figure(figsize=(6,6))
    
    scores = [] 
    for n_fold, (_, val_idx) in enumerate(folds_idx_):  
        # Plot the roc curve
        fpr, tpr, thresholds = roc_curve(y_.iloc[val_idx], oof_preds_[val_idx])
        score = average_precision_score(y_.iloc[val_idx], oof_preds_[val_idx])
        scores.append(score)
        plt.plot(fpr, tpr, lw=1, alpha=0.3, label='AP fold %d (AUC = %0.4f)' % (n_fold + 1, score))
    
    precision, recall, thresholds = precision_recall_curve(y_, oof_preds_)
    score = average_precision_score(y_, oof_preds_)
    plt.plot(precision, recall, color='b',
             label='Avg ROC (AUC = %0.4f $\pm$ %0.4f)' % (score, np.std(scores)),
             lw=2, alpha=.8)
    
    plt.xlim([-0.05, 1.05])
    plt.ylim([-0.05, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('LightGBM Recall / Precision')
    plt.legend(loc="best")
    plt.tight_layout()
    
    plt.savefig('recall_precision_curve.png')

In [17]:
oof_preds, test_preds, importances = train_model(data, test, y, folds)

Training until validation scores don't improve for 100 rounds.
[100]	training's auc: 0.944782	valid_1's auc: 0.921366
[200]	training's auc: 0.956276	valid_1's auc: 0.928553
[300]	training's auc: 0.963963	valid_1's auc: 0.931106
[400]	training's auc: 0.969925	valid_1's auc: 0.931787
[500]	training's auc: 0.974875	valid_1's auc: 0.932272
[600]	training's auc: 0.978679	valid_1's auc: 0.932179
Early stopping, best iteration is:
[521]	training's auc: 0.975704	valid_1's auc: 0.932404
Fold  1 AUC : 0.932404
Training until validation scores don't improve for 100 rounds.
[100]	training's auc: 0.943206	valid_1's auc: 0.934895
[200]	training's auc: 0.954996	valid_1's auc: 0.939831
[300]	training's auc: 0.96305	valid_1's auc: 0.94135
[400]	training's auc: 0.968867	valid_1's auc: 0.941393
Early stopping, best iteration is:
[377]	training's auc: 0.967675	valid_1's auc: 0.941529
Fold  2 AUC : 0.941529
Training until validation scores don't improve for 100 rounds.
[100]	training's auc: 0.941692	valid_

In [18]:
df_submission = df_test[["id"]].assign(y=test_preds)
df_submission.to_csv("./submission.csv", index=False, header=None, encoding="utf-8")

In [19]:
# Display a few graphs
folds_idx = [(trn_idx, val_idx) for trn_idx, val_idx in folds.split(data)]
display_importances(feature_importance_df_=importances)
display_roc_curve(y_=y, oof_preds_=oof_preds, folds_idx_=folds_idx)
display_precision_recall(y_=y, oof_preds_=oof_preds, folds_idx_=folds_idx)