In [27]:
import pandas as pd
import numpy as np
import seaborn as sns
import os
import joblib


from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, roc_curve, confusion_matrix
from sklearn.linear_model import LogisticRegression
import lightgbm as lgb
from lightgbm import LGBMClassifier
from sklearn.preprocessing import LabelEncoder
# from catboost import CatBoostClassifier
from sklearn.ensemble import HistGradientBoostingClassifier, VotingClassifier

In [57]:
X_train = pd.read_csv("../outputs/imputed_X_train_SMOTE_0_35.csv")
y_train = pd.read_csv("../outputs/imputed_y_train_SMOTE_0_35.csv")
X_test = pd.read_csv("../outputs/imputed_X_test_scaled_0_35.csv")
y_test = pd.read_csv("../outputs/imputed_y_test.csv")

numerical_col = ["Age"]
categorical_col = list(X_train.columns)
categorical_col.remove("Age")
X_train[categorical_col] = X_train[categorical_col].astype('category')

categorical_features = X_train.columns[X_train.dtypes == 'category']
cat_col_numbers = [X_train.columns.get_loc(col) for col in X_train.select_dtypes(include="category")]
categorical_features

lbl = LabelEncoder()
for col in categorical_col:
    X_train[col] = lbl.fit_transform(X_train[col])
    X_test[col] = lbl.transform(X_test[col])

def eval(y_true, y_pred):
    y_pred_proba = y_pred
    y_pred= np.where(y_pred > 0.5, 1, 0)

    acc_score = accuracy_score(y_true, y_pred)
    conf_mat = confusion_matrix(y_true,y_pred)

    prec_score = precision_score(y_true, y_pred) 
    rec_score = recall_score(y_true, y_pred)
    F1_score = f1_score(y_true, y_pred)

    roc_auc = roc_auc_score(y_true, y_pred)
    roc_auc_predict_proba = roc_auc_score(y_true, y_pred_proba)

    print(f'Accuracy: {acc_score}')

    print("Precision_score :", prec_score)
    print("recall_score :", rec_score)
    print("f1_score:", F1_score)

    print(f'ROC AUC score: {roc_auc}')
    print(f'ROC AUC PROBA: {roc_auc_predict_proba}')

    print(conf_mat)

    print("==" * 25)

In [37]:
all_estimators = []
for files in os.listdir("../models/HGB"):
    estimator = (files.split(".sav")[0], joblib.load("../models/HGB/"+files))
    all_estimators.append(estimator)

for files in os.listdir("../models/LGBM"):
    estimator = (files.split(".sav")[0], joblib.load("../models/LGBM/"+files))
    all_estimators.append(estimator)

all_estimators

[('HGB_ROC AUC PROBA: 0.848',
  HistGradientBoostingClassifier(early_stopping=True,
                                 l2_regularization=0.10632888567718654,
                                 learning_rate=0.031839425208113385, max_bins=10,
                                 max_depth=3, max_leaf_nodes=8,
                                 min_samples_leaf=6)),
 ('HGB_ROC AUC PROBA: 0.836',
  HistGradientBoostingClassifier(early_stopping=True,
                                 l2_regularization=1.2353934731449698,
                                 learning_rate=0.01454401470899228, max_bins=8,
                                 max_depth=4, max_leaf_nodes=10,
                                 min_samples_leaf=4)),
 ('HGB_ROC AUC PROBA: 0.8551',
  HistGradientBoostingClassifier(early_stopping=True,
                                 l2_regularization=2.226260922772902,
                                 learning_rate=0.013043078423741325, max_bins=8,
                                 max_depth=4, max_le

In [38]:
ens_model = VotingClassifier(estimators=all_estimators, voting='soft')

ens_model.fit(X_train,y_train.values.ravel())

y_pred = ens_model.predict(X_train)
# y_pred_proba = ens_model.predict_proba(X_train)[:,1]
eval(y_train, y_pred)


y_pred = ens_model.predict(X_test)
# y_pred_proba = ens_model.predict_proba(X_test)[:,1]
eval(y_test, y_pred)

Accuracy: 0.8744884038199181
Precision_score : 0.8841567291311755
recall_score : 0.9558011049723757
f1_score: 0.9185840707964602
ROC AUC score: 0.7989531840651353
ROC AUC PROBA: 0.9140593195696424
[[122  68]
 [ 24 519]]
Accuracy: 0.8545454545454545
Precision_score : 0.9074074074074074
recall_score : 0.914179104477612
f1_score: 0.9107806691449815
ROC AUC score: 0.7554766490129996
ROC AUC PROBA: 0.8521906596051997
[[ 37  25]
 [ 23 245]]


In [39]:
lgb_1 = lgb.Booster(model_file="../models/LGBM_ROC AUC PROBA: 0.8601.txt")

In [58]:
y_pred = lgb_1.predict(X_test)
eval(y_test, y_pred)

Accuracy: 0.8363636363636363
Precision_score : 0.9349593495934959
recall_score : 0.8582089552238806
f1_score: 0.8949416342412452
ROC AUC score: 0.8000722195474242
ROC AUC PROBA: 0.8601348098218584
[[ 46  16]
 [ 38 230]]
