In [1]:
from collections import Counter
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, roc_curve
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import lightgbm as lgb
import lazypredict
from lazypredict.Supervised import LazyClassifier

In [2]:
home = 'C:\\Users\\iksri\\Documents\\Data Science\\Projects\\Credit Risk\\Home Loan Level Data - Freddie Mac\\Datasets'

In [3]:
y2010 = pd.DataFrame()
y2011 = pd.DataFrame()
y2012 = pd.DataFrame()
y2013 = pd.DataFrame()
y2014 = pd.DataFrame()
y2015 = pd.DataFrame()
y2016 = pd.DataFrame()
y2017 = pd.DataFrame()

data = [y2010,
        y2011,
        y2012,
        y2013,
        y2014,
        y2015,
        y2016,
        y2017]

In [4]:
i = 0
for yr in range(2010, 2018):
    data[i] = pd.read_csv(home + f"\\sample_{yr}.csv", index_col = 'Loan Sequence Number')
    i += 1

In [5]:
df = pd.concat(data[:-1]).sample(200000)

## Model Training

In [6]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df.drop('Delinquent', axis = 1))

In [7]:
X = scaled_features
y = df['Delinquent'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [8]:
clf = LazyClassifier(verbose = 2,ignore_warnings = False, custom_metric = None)

In [9]:
# used smaller subsets of data for quick overview results
models, predictions = clf.fit(X_train[:10000], X_test[:2000], y_train[:10000], y_test[:2000])

  3%|▎         | 1/29 [00:00<00:17,  1.57it/s]

{'Model': 'AdaBoostClassifier', 'Accuracy': 0.9065, 'Balanced Accuracy': 0.5074268470245644, 'ROC AUC': 0.5074268470245643, 'F1 Score': 0.86441952171552, 'Time taken': 0.6380515098571777}


  7%|▋         | 2/29 [00:01<00:18,  1.48it/s]

{'Model': 'BaggingClassifier', 'Accuracy': 0.905, 'Balanced Accuracy': 0.5161335775679865, 'ROC AUC': 0.5161335775679864, 'F1 Score': 0.8671032459348335, 'Time taken': 0.7005038261413574}
{'Model': 'BernoulliNB', 'Accuracy': 0.903, 'Balanced Accuracy': 0.5054952796956461, 'ROC AUC': 0.5054952796956461, 'F1 Score': 0.8625663157894737, 'Time taken': 0.03886079788208008}


 21%|██        | 6/29 [00:08<00:28,  1.26s/it]

{'Model': 'CalibratedClassifierCV', 'Accuracy': 0.906, 'Balanced Accuracy': 0.5, 'ROC AUC': 0.5, 'F1 Score': 0.8613179433368309, 'Time taken': 6.560213804244995}
CategoricalNB model failed to execute
Negative values in data passed to CategoricalNB (input X)
{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.8325, 'Balanced Accuracy': 0.5357134469963835, 'ROC AUC': 0.5357134469963835, 'F1 Score': 0.8367927733802711, 'Time taken': 0.12136983871459961}
{'Model': 'DummyClassifier', 'Accuracy': 0.906, 'Balanced Accuracy': 0.5, 'ROC AUC': 0.5, 'F1 Score': 0.8613179433368309, 'Time taken': 0.02700495719909668}
{'Model': 'ExtraTreeClassifier', 'Accuracy': 0.835, 'Balanced Accuracy': 0.522791320276173, 'ROC AUC': 0.522791320276173, 'F1 Score': 0.8361589955979266, 'Time taken': 0.035361528396606445}


 31%|███       | 9/29 [00:09<00:16,  1.22it/s]

{'Model': 'ExtraTreesClassifier', 'Accuracy': 0.905, 'Balanced Accuracy': 0.5042153961767883, 'ROC AUC': 0.5042153961767883, 'F1 Score': 0.8627095578874375, 'Time taken': 1.092029094696045}
{'Model': 'GaussianNB', 'Accuracy': 0.5865, 'Balanced Accuracy': 0.581108214738622, 'ROC AUC': 0.581108214738622, 'F1 Score': 0.672081058288002, 'Time taken': 0.03718447685241699}


 38%|███▊      | 11/29 [00:09<00:10,  1.68it/s]

{'Model': 'KNeighborsClassifier', 'Accuracy': 0.9, 'Balanced Accuracy': 0.5086069231130524, 'ROC AUC': 0.5086069231130526, 'F1 Score': 0.8626661640909663, 'Time taken': 0.20762228965759277}


 41%|████▏     | 12/29 [00:16<00:31,  1.83s/it]

{'Model': 'LabelPropagation', 'Accuracy': 0.845, 'Balanced Accuracy': 0.5473791742989996, 'ROC AUC': 0.5473791742989996, 'F1 Score': 0.845367258873609, 'Time taken': 7.1649839878082275}


 48%|████▊     | 14/29 [00:25<00:37,  2.51s/it]

{'Model': 'LabelSpreading', 'Accuracy': 0.845, 'Balanced Accuracy': 0.5473791742989996, 'ROC AUC': 0.5473791742989996, 'F1 Score': 0.845367258873609, 'Time taken': 8.408612966537476}
{'Model': 'LinearDiscriminantAnalysis', 'Accuracy': 0.9075, 'Balanced Accuracy': 0.5127459959607346, 'ROC AUC': 0.5127459959607347, 'F1 Score': 0.8667705785235352, 'Time taken': 0.16219258308410645}


 59%|█████▊    | 17/29 [00:27<00:17,  1.44s/it]

{'Model': 'LinearSVC', 'Accuracy': 0.9065, 'Balanced Accuracy': 0.5026595744680851, 'ROC AUC': 0.5026595744680851, 'F1 Score': 0.8625386607134179, 'Time taken': 2.0585544109344482}
{'Model': 'LogisticRegression', 'Accuracy': 0.9075, 'Balanced Accuracy': 0.510362359682495, 'ROC AUC': 0.510362359682495, 'F1 Score': 0.8658695803067977, 'Time taken': 0.07758140563964844}
{'Model': 'NearestCentroid', 'Accuracy': 0.6455, 'Balanced Accuracy': 0.651807101592222, 'ROC AUC': 0.651807101592222, 'F1 Score': 0.7192671135528342, 'Time taken': 0.09158086776733398}
NuSVC model failed to execute
specified nu is infeasible


 69%|██████▉   | 20/29 [00:27<00:07,  1.28it/s]

{'Model': 'PassiveAggressiveClassifier', 'Accuracy': 0.862, 'Balanced Accuracy': 0.4971701657977549, 'ROC AUC': 0.4971701657977549, 'F1 Score': 0.844281815478485, 'Time taken': 0.057778120040893555}
{'Model': 'Perceptron', 'Accuracy': 0.8365, 'Balanced Accuracy': 0.5117009534545113, 'ROC AUC': 0.5117009534545113, 'F1 Score': 0.8351067882036574, 'Time taken': 0.04336285591125488}
{'Model': 'QuadraticDiscriminantAnalysis', 'Accuracy': 0.7155, 'Balanced Accuracy': 0.571256634258607, 'ROC AUC': 0.5712566342586068, 'F1 Score': 0.768377786585814, 'Time taken': 0.08000349998474121}


 83%|████████▎ | 24/29 [00:29<00:02,  1.85it/s]

{'Model': 'RandomForestClassifier', 'Accuracy': 0.905, 'Balanced Accuracy': 0.49944812362030905, 'ROC AUC': 0.49944812362030905, 'F1 Score': 0.8608188976377953, 'Time taken': 1.2373666763305664}
{'Model': 'RidgeClassifier', 'Accuracy': 0.906, 'Balanced Accuracy': 0.5, 'ROC AUC': 0.5, 'F1 Score': 0.8613179433368309, 'Time taken': 0.056729793548583984}
{'Model': 'RidgeClassifierCV', 'Accuracy': 0.906, 'Balanced Accuracy': 0.5, 'ROC AUC': 0.5, 'F1 Score': 0.8613179433368309, 'Time taken': 0.11687898635864258}


 86%|████████▌ | 25/29 [00:29<00:01,  2.13it/s]

{'Model': 'SGDClassifier', 'Accuracy': 0.9035, 'Balanced Accuracy': 0.5010039453290123, 'ROC AUC': 0.5010039453290122, 'F1 Score': 0.8610093062434717, 'Time taken': 0.13434362411499023}


 90%|████████▉ | 26/29 [00:32<00:03,  1.00s/it]

{'Model': 'SVC', 'Accuracy': 0.906, 'Balanced Accuracy': 0.5, 'ROC AUC': 0.5, 'F1 Score': 0.8613179433368309, 'Time taken': 3.0809333324432373}
StackingClassifier model failed to execute
__init__() missing 1 required positional argument: 'estimators'


100%|██████████| 29/29 [00:33<00:00,  1.38it/s]

{'Model': 'XGBClassifier', 'Accuracy': 0.901, 'Balanced Accuracy': 0.5186933446057019, 'ROC AUC': 0.5186933446057019, 'F1 Score': 0.8664263565891474, 'Time taken': 1.2616851329803467}
{'Model': 'LGBMClassifier', 'Accuracy': 0.9045, 'Balanced Accuracy': 0.5087067305434221, 'ROC AUC': 0.5087067305434221, 'F1 Score': 0.8642524540619435, 'Time taken': 0.19086313247680664}


100%|██████████| 29/29 [00:33<00:00,  1.16s/it]


## Balancing Target Classes

It is clear from Balanced Accuracy scores of around 0.5 that none of these algorithms are able to overcome the class imbalance of the target features to make accurate predictions. Almost all models are overpredicting values of 0(no delinquency).

### Undersampling

In [10]:
delinquent = df[df['Delinquent'] == 1]
non_delinquent = df[df['Delinquent'] == 0].sample(n = len(delinquent))
undersample = pd.concat([delinquent, non_delinquent])

undersample_features = scaler.fit_transform(undersample.drop('Delinquent', axis = 1))
X = undersample_features
y = undersample['Delinquent'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [11]:
clf = LazyClassifier(verbose = 2,ignore_warnings = False, custom_metric = None)

In [12]:
# used smaller subsets of data for quick overview results
models, predictions = clf.fit(X_train[:10000], X_test[:2000], y_train[:10000], y_test[:2000])

  3%|▎         | 1/29 [00:00<00:16,  1.66it/s]

{'Model': 'AdaBoostClassifier', 'Accuracy': 0.6535, 'Balanced Accuracy': 0.6538985256939751, 'ROC AUC': 0.653898525693975, 'F1 Score': 0.653593654010743, 'Time taken': 0.6006007194519043}


  7%|▋         | 2/29 [00:01<00:16,  1.60it/s]

{'Model': 'BaggingClassifier', 'Accuracy': 0.591, 'Balanced Accuracy': 0.5887462141122876, 'ROC AUC': 0.5887462141122876, 'F1 Score': 0.5892490997456623, 'Time taken': 0.6453878879547119}
{'Model': 'BernoulliNB', 'Accuracy': 0.6125, 'Balanced Accuracy': 0.6134089259342694, 'ROC AUC': 0.6134089259342695, 'F1 Score': 0.6124311204801898, 'Time taken': 0.047092437744140625}


 21%|██        | 6/29 [00:07<00:26,  1.17s/it]

{'Model': 'CalibratedClassifierCV', 'Accuracy': 0.649, 'Balanced Accuracy': 0.6489549698380517, 'ROC AUC': 0.6489549698380517, 'F1 Score': 0.6490927174052253, 'Time taken': 6.074893951416016}
CategoricalNB model failed to execute
Negative values in data passed to CategoricalNB (input X)
{'Model': 'DecisionTreeClassifier', 'Accuracy': 0.5405, 'Balanced Accuracy': 0.5407949738429576, 'ROC AUC': 0.5407949738429576, 'F1 Score': 0.5406182194086898, 'Time taken': 0.10046720504760742}
{'Model': 'DummyClassifier', 'Accuracy': 0.4825, 'Balanced Accuracy': 0.5, 'ROC AUC': 0.5, 'F1 Score': 0.3140725126475548, 'Time taken': 0.03000640869140625}
{'Model': 'ExtraTreeClassifier', 'Accuracy': 0.549, 'Balanced Accuracy': 0.5492177917949488, 'ROC AUC': 0.5492177917949488, 'F1 Score': 0.5491290078023185, 'Time taken': 0.0394132137298584}


 38%|███▊      | 11/29 [00:09<00:10,  1.70it/s]

{'Model': 'ExtraTreesClassifier', 'Accuracy': 0.622, 'Balanced Accuracy': 0.6219218542714826, 'ROC AUC': 0.6219218542714825, 'F1 Score': 0.6220998495133196, 'Time taken': 1.3502740859985352}
{'Model': 'GaussianNB', 'Accuracy': 0.54, 'Balanced Accuracy': 0.5256639383244475, 'ROC AUC': 0.5256639383244475, 'F1 Score': 0.4452738438032556, 'Time taken': 0.031620025634765625}
{'Model': 'KNeighborsClassifier', 'Accuracy': 0.569, 'Balanced Accuracy': 0.5693124076994318, 'ROC AUC': 0.5693124076994318, 'F1 Score': 0.5691137977695301, 'Time taken': 0.17728328704833984}


 41%|████▏     | 12/29 [00:15<00:29,  1.72s/it]

{'Model': 'LabelPropagation', 'Accuracy': 0.53, 'Balanced Accuracy': 0.5297739731170684, 'ROC AUC': 0.5297739731170684, 'F1 Score': 0.5301175734834271, 'Time taken': 6.610023498535156}


 48%|████▊     | 14/29 [00:23<00:33,  2.24s/it]

{'Model': 'LabelSpreading', 'Accuracy': 0.528, 'Balanced Accuracy': 0.5277364771845511, 'ROC AUC': 0.5277364771845512, 'F1 Score': 0.5281105227133541, 'Time taken': 7.178976774215698}
{'Model': 'LinearDiscriminantAnalysis', 'Accuracy': 0.6485, 'Balanced Accuracy': 0.6482616204850942, 'ROC AUC': 0.6482616204850943, 'F1 Score': 0.648557080549348, 'Time taken': 0.1434943675994873}


 59%|█████▊    | 17/29 [00:25<00:15,  1.28s/it]

{'Model': 'LinearSVC', 'Accuracy': 0.6455, 'Balanced Accuracy': 0.6452579409776977, 'ROC AUC': 0.6452579409776976, 'F1 Score': 0.6455575677233112, 'Time taken': 1.8632218837738037}
{'Model': 'LogisticRegression', 'Accuracy': 0.6455, 'Balanced Accuracy': 0.6453981126880428, 'ROC AUC': 0.6453981126880428, 'F1 Score': 0.6455859335032605, 'Time taken': 0.0509493350982666}
{'Model': 'NearestCentroid', 'Accuracy': 0.6225, 'Balanced Accuracy': 0.6230707616830617, 'ROC AUC': 0.6230707616830619, 'F1 Score': 0.6225612512278497, 'Time taken': 0.07728457450866699}


 69%|██████▉   | 20/29 [00:34<00:17,  1.92s/it]

{'Model': 'NuSVC', 'Accuracy': 0.5825, 'Balanced Accuracy': 0.5835473454982354, 'ROC AUC': 0.5835473454982354, 'F1 Score': 0.5823234859446066, 'Time taken': 9.389828205108643}
{'Model': 'PassiveAggressiveClassifier', 'Accuracy': 0.545, 'Balanced Accuracy': 0.5439863833195665, 'ROC AUC': 0.5439863833195665, 'F1 Score': 0.544715424337674, 'Time taken': 0.05870318412780762}
{'Model': 'Perceptron', 'Accuracy': 0.5845, 'Balanced Accuracy': 0.5842181672548872, 'ROC AUC': 0.5842181672548873, 'F1 Score': 0.5845770339458225, 'Time taken': 0.042452335357666016}
{'Model': 'QuadraticDiscriminantAnalysis', 'Accuracy': 0.5175, 'Balanced Accuracy': 0.500245300493104, 'ROC AUC': 0.500245300493104, 'F1 Score': 0.3590945353921933, 'Time taken': 0.06578874588012695}


 83%|████████▎ | 24/29 [00:36<00:05,  1.01s/it]

{'Model': 'RandomForestClassifier', 'Accuracy': 0.6245, 'Balanced Accuracy': 0.6245826137017847, 'ROC AUC': 0.6245826137017847, 'F1 Score': 0.6246135380478427, 'Time taken': 1.4096121788024902}
{'Model': 'RidgeClassifier', 'Accuracy': 0.6485, 'Balanced Accuracy': 0.6482616204850942, 'ROC AUC': 0.6482616204850943, 'F1 Score': 0.648557080549348, 'Time taken': 0.045708656311035156}
{'Model': 'RidgeClassifierCV', 'Accuracy': 0.6485, 'Balanced Accuracy': 0.6482616204850942, 'ROC AUC': 0.6482616204850943, 'F1 Score': 0.648557080549348, 'Time taken': 0.07962393760681152}


 86%|████████▌ | 25/29 [00:36<00:03,  1.17it/s]

{'Model': 'SGDClassifier', 'Accuracy': 0.6035, 'Balanced Accuracy': 0.6031363420189733, 'ROC AUC': 0.6031363420189733, 'F1 Score': 0.6035437575434223, 'Time taken': 0.23702359199523926}


 90%|████████▉ | 26/29 [00:41<00:05,  1.86s/it]

{'Model': 'SVC', 'Accuracy': 0.634, 'Balanced Accuracy': 0.6336562288803784, 'ROC AUC': 0.6336562288803785, 'F1 Score': 0.6340351720161446, 'Time taken': 5.472633123397827}
StackingClassifier model failed to execute
__init__() missing 1 required positional argument: 'estimators'


100%|██████████| 29/29 [00:42<00:00,  1.02s/it]

{'Model': 'XGBClassifier', 'Accuracy': 0.6245, 'Balanced Accuracy': 0.6248980000500612, 'ROC AUC': 0.6248980000500614, 'F1 Score': 0.6245966080260348, 'Time taken': 0.5247461795806885}
{'Model': 'LGBMClassifier', 'Accuracy': 0.636, 'Balanced Accuracy': 0.6362193687266902, 'ROC AUC': 0.6362193687266902, 'F1 Score': 0.6361106843351897, 'Time taken': 0.19700932502746582}


100%|██████████| 29/29 [00:42<00:00,  1.47s/it]


We see from the drastic drops in various metric scores that models across the board are underfitting.

### Oversampling - SMOTE

In [13]:
# Oversampling Minority Class with SMOTE

X = scaled_features
y = df['Delinquent'].values
oversample = SMOTE()
print("BEFORE:", Counter(y))
X, y = oversample.fit_resample(X, y)
print("AFTER:", Counter(y))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

BEFORE: Counter({0: 180052, 1: 19948})
AFTER: Counter({0: 180052, 1: 180052})


#### Benchmark - Logistic Regression

In [14]:
reg = LogisticRegression(max_iter = 5000)
reg.fit(X_train, y_train)
y_pred = reg.predict(X_test)
y_pred_labels = (y_pred > 0.5).astype(int)
print(confusion_matrix(y_test, y_pred_labels))
print(classification_report(y_test, y_pred_labels))

[[23834 12145]
 [12301 23741]]
              precision    recall  f1-score   support

           0       0.66      0.66      0.66     35979
           1       0.66      0.66      0.66     36042

    accuracy                           0.66     72021
   macro avg       0.66      0.66      0.66     72021
weighted avg       0.66      0.66      0.66     72021



#### LGBM

In [15]:
train_data = lgb.Dataset(X_train, label = y_train)
params = {
    'objective': 'binary',
    'metric': 'auc',
}

In [16]:
cv_results = lgb.cv(params, train_data, num_boost_round = 100, nfold = 5, stratified = True, seed = 42)
cv_results

[LightGBM] [Info] Number of positive: 115208, number of negative: 115258
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6790
[LightGBM] [Info] Number of data points in the train set: 230466, number of used features: 76
[LightGBM] [Info] Number of positive: 115208, number of negative: 115258
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6790
[LightGBM] [Info] Number of data points in the train set: 230466, number of used features: 76
[LightGBM] [Info] Number of positive: 115208, number of negative: 115258
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 6790
[LightGBM] [Info] Number of data points in the train set: 230466, number of used features: 76
[LightGBM] [Info] Number of pos

{'auc-mean': [0.8373476488997937,
  0.8755962078011145,
  0.8944261755380538,
  0.9072412145298794,
  0.9125560359841316,
  0.9158633981355455,
  0.9194806726460435,
  0.9224588266580372,
  0.926066881273235,
  0.9287843000501897,
  0.9303667816804264,
  0.9324364682396163,
  0.9346563005303313,
  0.9366571479980917,
  0.9390034518211227,
  0.9402542520809785,
  0.9429878333531075,
  0.9455662609086921,
  0.9472969419647626,
  0.9500145195653372,
  0.9520688264914348,
  0.9533098482134704,
  0.9542881556934033,
  0.9554432127569905,
  0.9563346359091602,
  0.9573950620899782,
  0.9578255010723924,
  0.9586235286896843,
  0.9591427012588077,
  0.9599035482728822,
  0.9605478086928402,
  0.9607973457002192,
  0.9611568354284543,
  0.9614296908916191,
  0.9617876503673916,
  0.962040207221175,
  0.9622515328148153,
  0.9624730963182948,
  0.9626332933854739,
  0.9628470712344935,
  0.9630573169006971,
  0.9631833694825873,
  0.9633612442976999,
  0.9635348166655865,
  0.9636871372659609,


### Fine Tuning LGBM With Grid Search

In [17]:
train_data = lgb.Dataset(X_train, label = y_train)
param_grid = {
    'boosting_type': ['gbdt', 'dart', 'goss'],
    'num_leaves': [10, 20, 30],
    'max_depth': [8, 16],
    'learning_rate': [0.01, 0.1, 0.5],
    'n_estimators': [50, 100, 200],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'reg_alpha': [0.0, 0.1, 0.5],
    'reg_lambda': [0.0, 0.1, 0.5],
    'min_split_gain': [0.0, 0.1, 0.5],
    'min_child_weight': [0.001, 0.01, 0.1],
    'is_unbalance': [False, True],
    'metric': ['binary_logloss', 'auc'],
    'verbosity': [-1]
}

In [18]:
model = lgb.LGBMClassifier()
random_search = RandomizedSearchCV(model, param_distributions = param_grid, n_iter = 30, scoring = 'roc_auc', cv = 5, verbose = 2)
random_search.fit(X_train, y_train)
best_params = random_search.best_params_
best_score = random_search.best_score_

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[CV] END boosting_type=goss, colsample_bytree=0.8, is_unbalance=True, learning_rate=0.01, max_depth=8, metric=auc, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=10, reg_alpha=0.0, reg_lambda=0.1, subsample=0.8, verbosity=-1; total time=   0.6s
[CV] END boosting_type=goss, colsample_bytree=0.8, is_unbalance=True, learning_rate=0.01, max_depth=8, metric=auc, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=10, reg_alpha=0.0, reg_lambda=0.1, subsample=0.8, verbosity=-1; total time=   0.5s
[CV] END boosting_type=goss, colsample_bytree=0.8, is_unbalance=True, learning_rate=0.01, max_depth=8, metric=auc, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=10, reg_alpha=0.0, reg_lambda=0.1, subsample=0.8, verbosity=-1; total time=   0.4s
[CV] END boosting_type=goss, colsample_bytree=0.8, is_unbalance=True, learning_rate=0.01, max_depth=8, metric=auc, min_child_weight

[CV] END boosting_type=gbdt, colsample_bytree=1.0, is_unbalance=True, learning_rate=0.1, max_depth=16, metric=binary_logloss, min_child_weight=0.1, min_split_gain=0.0, n_estimators=200, num_leaves=20, reg_alpha=0.1, reg_lambda=0.5, subsample=0.8, verbosity=-1; total time=   1.7s
[CV] END boosting_type=gbdt, colsample_bytree=1.0, is_unbalance=True, learning_rate=0.1, max_depth=16, metric=binary_logloss, min_child_weight=0.1, min_split_gain=0.0, n_estimators=200, num_leaves=20, reg_alpha=0.1, reg_lambda=0.5, subsample=0.8, verbosity=-1; total time=   1.6s
[CV] END boosting_type=gbdt, colsample_bytree=1.0, is_unbalance=True, learning_rate=0.1, max_depth=16, metric=binary_logloss, min_child_weight=0.1, min_split_gain=0.0, n_estimators=200, num_leaves=20, reg_alpha=0.1, reg_lambda=0.5, subsample=0.8, verbosity=-1; total time=   1.8s
[CV] END boosting_type=gbdt, colsample_bytree=1.0, is_unbalance=True, learning_rate=0.1, max_depth=16, metric=binary_logloss, min_child_weight=0.1, min_split_ga

[CV] END boosting_type=goss, colsample_bytree=1.0, is_unbalance=False, learning_rate=0.1, max_depth=16, metric=binary_logloss, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=20, reg_alpha=0.5, reg_lambda=0.0, subsample=0.8, verbosity=-1; total time=   0.8s
[CV] END boosting_type=goss, colsample_bytree=1.0, is_unbalance=False, learning_rate=0.1, max_depth=16, metric=binary_logloss, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=20, reg_alpha=0.5, reg_lambda=0.0, subsample=0.8, verbosity=-1; total time=   0.8s
[CV] END boosting_type=goss, colsample_bytree=1.0, is_unbalance=False, learning_rate=0.1, max_depth=16, metric=binary_logloss, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=20, reg_alpha=0.5, reg_lambda=0.0, subsample=0.8, verbosity=-1; total time=   0.8s
[CV] END boosting_type=goss, colsample_bytree=1.0, is_unbalance=False, learning_rate=0.1, max_depth=16, metric=binary_logloss, min_child_weight=0.01, min_spl

[CV] END boosting_type=gbdt, colsample_bytree=0.8, is_unbalance=False, learning_rate=0.5, max_depth=16, metric=auc, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=10, reg_alpha=0.0, reg_lambda=0.0, subsample=0.8, verbosity=-1; total time=   0.4s
[CV] END boosting_type=gbdt, colsample_bytree=0.8, is_unbalance=False, learning_rate=0.5, max_depth=16, metric=auc, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=10, reg_alpha=0.0, reg_lambda=0.0, subsample=0.8, verbosity=-1; total time=   0.5s
[CV] END boosting_type=gbdt, colsample_bytree=0.8, is_unbalance=False, learning_rate=0.5, max_depth=16, metric=auc, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=10, reg_alpha=0.0, reg_lambda=0.0, subsample=0.8, verbosity=-1; total time=   0.5s
[CV] END boosting_type=gbdt, colsample_bytree=0.8, is_unbalance=False, learning_rate=0.5, max_depth=16, metric=auc, min_child_weight=0.01, min_split_gain=0.0, n_estimators=50, num_leaves=10,

[CV] END boosting_type=gbdt, colsample_bytree=1.0, is_unbalance=True, learning_rate=0.5, max_depth=16, metric=binary_logloss, min_child_weight=0.01, min_split_gain=0.5, n_estimators=50, num_leaves=10, reg_alpha=0.0, reg_lambda=0.5, subsample=1.0, verbosity=-1; total time=   0.4s
[CV] END boosting_type=gbdt, colsample_bytree=1.0, is_unbalance=True, learning_rate=0.5, max_depth=16, metric=binary_logloss, min_child_weight=0.01, min_split_gain=0.5, n_estimators=50, num_leaves=10, reg_alpha=0.0, reg_lambda=0.5, subsample=1.0, verbosity=-1; total time=   0.4s
[CV] END boosting_type=gbdt, colsample_bytree=1.0, is_unbalance=True, learning_rate=0.5, max_depth=16, metric=binary_logloss, min_child_weight=0.01, min_split_gain=0.5, n_estimators=50, num_leaves=10, reg_alpha=0.0, reg_lambda=0.5, subsample=1.0, verbosity=-1; total time=   0.4s
[CV] END boosting_type=gbdt, colsample_bytree=1.0, is_unbalance=True, learning_rate=0.5, max_depth=16, metric=binary_logloss, min_child_weight=0.01, min_split_g

In [19]:
best_params

{'verbosity': -1,
 'subsample': 0.8,
 'reg_lambda': 0.5,
 'reg_alpha': 0.1,
 'num_leaves': 20,
 'n_estimators': 200,
 'min_split_gain': 0.0,
 'min_child_weight': 0.1,
 'metric': 'binary_logloss',
 'max_depth': 16,
 'learning_rate': 0.1,
 'is_unbalance': True,
 'colsample_bytree': 1.0,
 'boosting_type': 'gbdt'}

In [20]:
model = lgb.LGBMClassifier(**best_params)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred_labels = (y_pred > 0.5).astype(int)
print(confusion_matrix(y_test, y_pred_labels))
print(classification_report(y_test, y_pred_labels))

[[35862   117]
 [ 4159 31883]]
              precision    recall  f1-score   support

           0       0.90      1.00      0.94     35979
           1       1.00      0.88      0.94     36042

    accuracy                           0.94     72021
   macro avg       0.95      0.94      0.94     72021
weighted avg       0.95      0.94      0.94     72021

