# Import all libraries

In [1]:
RANDOM_STATE = 12345

#Manipulate data
import pandas as pd
import numpy as np

#Scale and split data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

#Score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import classification_report, confusion_matrix

#Classifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

#Ensembles
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

#Cross-Validation
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV

#Easier to read
from pprint import pprint

#Draw ROC curve
from sklearn.metrics import roc_curve
import matplotlib.pyplot as plt

# Read data

In [2]:
df = pd.read_csv("Heart_disease.csv")

In [3]:
df.head(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [4]:
df = df.rename(columns={'target':'outcome'})

In [5]:
df.head(5)

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,outcome
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [6]:
one = 0
zero = 0

for index, row in df.iterrows():
    if row['outcome'] == 0:
        one+=1
    else:
        zero+=1
        
print("Class balance:")
print("1",one)
print("0",zero)

Class balance:
1 138
0 165


In [7]:
df.describe()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,outcome
count,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0,303.0
mean,54.366337,0.683168,0.966997,131.623762,246.264026,0.148515,0.528053,149.646865,0.326733,1.039604,1.39934,0.729373,2.313531,0.544554
std,9.082101,0.466011,1.032052,17.538143,51.830751,0.356198,0.52586,22.905161,0.469794,1.161075,0.616226,1.022606,0.612277,0.498835
min,29.0,0.0,0.0,94.0,126.0,0.0,0.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.5,0.0,0.0,120.0,211.0,0.0,0.0,133.5,0.0,0.0,1.0,0.0,2.0,0.0
50%,55.0,1.0,1.0,130.0,240.0,0.0,1.0,153.0,0.0,0.8,1.0,0.0,2.0,1.0
75%,61.0,1.0,2.0,140.0,274.5,0.0,1.0,166.0,1.0,1.6,2.0,1.0,3.0,1.0
max,77.0,1.0,3.0,200.0,564.0,1.0,2.0,202.0,1.0,6.2,2.0,4.0,3.0,1.0


In [8]:
(df==0).sum(axis=0)

age           0
sex          96
cp          143
trestbps      0
chol          0
fbs         258
restecg     147
thalach       0
exang       204
oldpeak      99
slope        21
ca          175
thal          2
outcome     138
dtype: int64

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  outcome   303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


# Seperate x and y and scale

In [10]:
x = df.drop('outcome', axis=1)
y = df['outcome']
scaler = StandardScaler()
X = scaler.fit_transform(x) 

# Classifiers

SVM

In [11]:
svmparam = SVC()
X_svc_train, X_svc_test, y_svc_train, y_svc_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)

#Random values initial
param_grid = {'C': [0.01,0.1,1,10,100], 'gamma': [10,1,0.1,0.01,0.001,0.0001],'kernel': ['sigmoid', 'poly', 'linear']}
grid_search = GridSearchCV(estimator = svmparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_svc_train, y_svc_train)
print( "Initial:", grid_search.best_params_)

#1st Round
param_grid = {'C': [0.8,0.9,1.0,1.1,1.2], 'gamma': [10,1,0.1,0.01,0.001,0.0001],'kernel': ['sigmoid', 'poly', 'linear']}
grid_search = GridSearchCV(estimator = svmparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_svc_train, y_svc_train)
print( "First Round:", grid_search.best_params_)

#2nd Round
param_grid = {'C': [0.8, 0.9,1,1.1, 1.2], 'gamma': [0.008, 0.009, 0.01, 0.02, 0.03],'kernel': ['sigmoid', 'poly', 'linear']}
grid_search = GridSearchCV(estimator = svmparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_svc_train, y_svc_train)
print( "Second Round:", grid_search.best_params_)

Initial: {'C': 1, 'gamma': 0.01, 'kernel': 'sigmoid'}
First Round: {'C': 1.0, 'gamma': 0.01, 'kernel': 'sigmoid'}
Second Round: {'C': 1, 'gamma': 0.01, 'kernel': 'sigmoid'}


In [12]:
#Without Hyperparametering
#svc = SVC(probability=True)

#Accuracy 0.8351648351648352
#Recall: 0.875
#Precision 0.8235294117647058
#F1_Score: 0.8484848484848485
#AUC: 0.9060077519379846

#=== Confusion Matrix ===
#[[34  9]
# [ 6 42]]

#With Hyperparametering
svc = SVC(probability=True, C=1, gamma=0.01, kernel='sigmoid')

svc.fit(X_svc_train, y_svc_train)
y_svc_pred = svc.predict(X_svc_test)

svc_score = svc.predict_proba(X_svc_test)[: ,1]

recall_score_svc = recall_score(y_svc_test, y_svc_pred)
precision_score_svc = precision_score(y_svc_test, y_svc_pred)
f1_score_svc = f1_score(y_svc_test, y_svc_pred)

accuracy_score_svc = accuracy_score(y_svc_test, y_svc_pred)
recall_score_svc = recall_score(y_svc_test, y_svc_pred)
precision_score_svc = precision_score(y_svc_test, y_svc_pred)
f1_score_svc = f1_score(y_svc_test, y_svc_pred)
auc_svc = roc_auc_score(y_svc_test, svc_score)

print("Accuracy", accuracy_score_svc)
print("Recall:", recall_score_svc)
print("Precision", precision_score_svc)
print("F1_Score:", f1_score_svc)
print("AUC:", auc_svc)
print()

print("=== Confusion Matrix ===")
print(confusion_matrix(y_svc_test, y_svc_pred))

Accuracy 0.8681318681318682
Recall: 0.9166666666666666
Precision 0.8461538461538461
F1_Score: 0.8799999999999999
AUC: 0.9229651162790697

=== Confusion Matrix ===
[[35  8]
 [ 4 44]]


Decision Tree

In [13]:
dctparam = DecisionTreeClassifier(random_state = RANDOM_STATE)
X_dtc_train, X_dtc_test, y_dtc_train, y_dtc_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)

#Random values initial
param_grid = {'max_depth': [None, 1,2,3,4], 'max_features': [None, 'auto', 'sqrt', 'log2'],
              'min_samples_leaf': [1,2,3,4,5], 'criterion' : ['gini', 'entropy'], 'min_samples_split' : [1,2,3,4,5]}
grid_search = GridSearchCV(estimator = dctparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_dtc_train, y_dtc_train)
print( "Initial:", grid_search.best_params_)

Initial: {'criterion': 'entropy', 'max_depth': None, 'max_features': None, 'min_samples_leaf': 3, 'min_samples_split': 2}


In [14]:
#Without Hyperparametering
#dtc = DecisionTreeClassifier(random_state = RANDOM_STATE)

#Accuracy 0.7692307692307693
#Recall: 0.75
#Precision 0.8
#F1_Score: 0.7741935483870969
#AUC: 0.7703488372093024

#=== Confusion Matrix ===
#[[34  9]
# [12 36]]

#With Hyperparametering

dtc = DecisionTreeClassifier(random_state = RANDOM_STATE, max_depth=None, max_features=None, criterion = 'entropy',
                             min_samples_leaf=3, min_samples_split=2)

dtc.fit(X_dtc_train,y_dtc_train)
y_dtc_pred = dtc.predict(X_dtc_test)

dtc_score = dtc.predict_proba(X_dtc_test)[: , 1]

accuracy_score_dtc = accuracy_score(y_dtc_test, y_dtc_pred)
recall_score_dtc = recall_score(y_dtc_test, y_dtc_pred)
precision_score_dtc = precision_score(y_dtc_test, y_dtc_pred)
f1_score_dtc = f1_score(y_dtc_test, y_dtc_pred)
auc_dtc = roc_auc_score(y_dtc_test, dtc_score)

print("Accuracy", accuracy_score_dtc)
print("Recall:", recall_score_dtc)
print("Precision", precision_score_dtc)
print("F1_Score:", f1_score_dtc)
print("AUC:", auc_dtc)
print()

print("=== Confusion Matrix ===")
print(confusion_matrix(y_dtc_test, y_dtc_pred))

Accuracy 0.7582417582417582
Recall: 0.8333333333333334
Precision 0.7407407407407407
F1_Score: 0.7843137254901961
AUC: 0.7674418604651163

=== Confusion Matrix ===
[[29 14]
 [ 8 40]]


Logistic Regression

In [15]:
lrparam = LogisticRegression()
X_lr_train, X_lr_test, y_lr_train, y_lr_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)

#Random values initial
param_grid = {'penalty' : [None, 'l2','l1','elasticnet'], 'C' : [10, 1, 0.1, 0.001, 0.0001], 
              'max_iter' : [250, 500, 1000, 1250, 1500], 'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}
grid_search = GridSearchCV(estimator = lrparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_lr_train, y_lr_train)
print( "Initial:", grid_search.best_params_)

#1st Round
param_grid = {'penalty' : [None, 'l2','l1','elasticnet'], 'C' : [0.08, 0.09, 0.1, 0.2, 0.3], 
              'max_iter' : [50,100,150,200,250], 'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}
grid_search = GridSearchCV(estimator = lrparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_lr_train, y_lr_train)
print( "First round:", grid_search.best_params_)

#2nd round
param_grid = {'penalty' : [None, 'l2','l1','elasticnet'], 'C' : [0.04,0.05,0.06,0.07,0.08], 
              'max_iter' : [50,100,150,200,250], 'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}
grid_search = GridSearchCV(estimator = lrparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_lr_train, y_lr_train)
print( "Second round:", grid_search.best_params_)

#3rd round
param_grid = {'penalty' : [None, 'l2','l1','elasticnet'], 'C' : [0.04,0.05,0.06,0.07,0.08], 
              'max_iter' : [15,20,30,40,50], 'solver' : ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}
grid_search = GridSearchCV(estimator = lrparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_lr_train, y_lr_train)
print( "Third round:", grid_search.best_params_)

Initial: {'C': 0.1, 'max_iter': 250, 'penalty': 'l2', 'solver': 'liblinear'}
First round: {'C': 0.08, 'max_iter': 50, 'penalty': 'l2', 'solver': 'liblinear'}
Second round: {'C': 0.05, 'max_iter': 50, 'penalty': 'l2', 'solver': 'newton-cg'}
Third round: {'C': 0.05, 'max_iter': 15, 'penalty': 'l2', 'solver': 'newton-cg'}


In [16]:
#Without Hyperparametering
#lr = LogisticRegression()

#Accuracy 0.8021978021978022
#Recall: 0.7916666666666666
#Precision 0.8260869565217391
#F1_Score: 0.8085106382978724

#=== Confusion Matrix ===
#[[35  8]
# [10 38]]

#With Hyperparametering
#Trying a new level for C (0.01, which gave a better result than 0.05)
lr = LogisticRegression(C = 0.01, max_iter = 15, penalty = 'l2', solver = 'newton-cg')

lr.fit(X_lr_train,y_lr_train)
y_lr_pred = lr.predict(X_lr_test)

accuracy_score_lr = accuracy_score(y_lr_test, y_lr_pred)
recall_score_lr = recall_score(y_lr_test, y_lr_pred)
precision_score_lr = precision_score(y_lr_test, y_lr_pred)
f1_score_lr = f1_score(y_lr_test, y_lr_pred)

print("Accuracy", accuracy_score_lr)
print("Recall:", recall_score_lr)
print("Precision", precision_score_lr)
print("F1_Score:", f1_score_lr)
print()

print("=== Confusion Matrix ===")
print(confusion_matrix(y_lr_test, y_lr_pred))

Accuracy 0.8461538461538461
Recall: 0.8958333333333334
Precision 0.8269230769230769
F1_Score: 0.86

=== Confusion Matrix ===
[[34  9]
 [ 5 43]]


K-nearest neighbor

In [17]:
knnparam = KNeighborsClassifier()
X_knn_train, X_knn_test, y_knn_train, y_knn_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)

#Random values initial
param_grid = {'n_neighbors': [2,3,4,5,6,7,8,9,10], 'weights': ['distance', 'uniform'],
              'algorithm': ['auto','ball_tree', 'kd_tree', 'brute']}
grid_search = GridSearchCV(estimator = knnparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_knn_train, y_knn_train)
print( "Initial:", grid_search.best_params_)

Initial: {'algorithm': 'auto', 'n_neighbors': 8, 'weights': 'uniform'}


In [18]:
#Without Hyperparametering
#knn = KNeighborsClassifier()

#Accuracy 0.8351648351648352
#Recall: 0.8541666666666666
#Precision 0.8367346938775511
#F1_Score: 0.845360824742268
#AUC: 0.8817829457364341

#=== Confusion Matrix ===
#[[35  8]
# [ 7 41]]

#With Hyperparametering

knn = KNeighborsClassifier(n_neighbors = 8 , weights = 'uniform', algorithm = 'auto')

knn.fit(X_knn_train,y_knn_train)
y_knn_pred = knn.predict(X_knn_test)

knn_score = knn.predict_proba(X_knn_test)[: , 1]

accuracy_score_knn = accuracy_score(y_knn_test, y_knn_pred)
recall_score_knn = recall_score(y_knn_test, y_knn_pred)
precision_score_knn = precision_score(y_knn_test, y_knn_pred)
f1_score_knn = f1_score(y_knn_test, y_knn_pred)
auc_knn = roc_auc_score(y_knn_test, knn_score)

print("Accuracy", accuracy_score_knn)
print("Recall:", recall_score_knn)
print("Precision", precision_score_knn)
print("F1_Score:", f1_score_knn)
print("AUC:", auc_knn)
print()

print("=== Confusion Matrix ===")
print(confusion_matrix(y_knn_test, y_knn_pred))

Accuracy 0.8241758241758241
Recall: 0.8125
Precision 0.8478260869565217
F1_Score: 0.8297872340425533
AUC: 0.8936531007751938

=== Confusion Matrix ===
[[36  7]
 [ 9 39]]


# Ensembles

Random Forest

In [19]:
#Optimizing parameters

rfparam = RandomForestClassifier(random_state=RANDOM_STATE)
X_rf_train, X_rf_test, y_rf_train, y_rf_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)

#Random values initial
param_grid = {'max_depth': [1, 2, 3, 4], 'min_samples_leaf': [1,2,3,4], 
              'criterion' : ['gini', 'entropy'], 
              'min_samples_split' : [1,2,3,4], 'n_estimators' : [25, 50, 100, 150]}
grid_search = GridSearchCV(estimator = rfparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_rf_train, y_rf_train)
print( "Initial:", grid_search.best_params_)

#1st Round
param_grid = {'max_depth': [3,4,5,6], 'min_samples_leaf': [1,2,3], 
              'criterion' : ['gini', 'entropy'], 
              'min_samples_split' : [1,2,3,4], 'n_estimators' : [100,150,200,250]}
grid_search = GridSearchCV(estimator = rfparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_rf_train, y_rf_train)
print( "First round:", grid_search.best_params_)

#2nd Round
param_grid = {'max_depth': [5,6,7,8], 'min_samples_leaf': [1,2], 
              'criterion' : ['gini', 'entropy'], 
              'min_samples_split' : [3,4,5], 'n_estimators' : [200,250,300,350]}
grid_search = GridSearchCV(estimator = rfparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_rf_train, y_rf_train)
print( "Second round:", grid_search.best_params_)

Initial: {'criterion': 'entropy', 'max_depth': 4, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 150}
First round: {'criterion': 'gini', 'max_depth': 6, 'min_samples_leaf': 1, 'min_samples_split': 4, 'n_estimators': 250}
Second round: {'criterion': 'gini', 'max_depth': 7, 'min_samples_leaf': 1, 'min_samples_split': 4, 'n_estimators': 300}


In [20]:
#Without Hyperparametering

#rf = RandomForestClassifier(random_state=RANDOM_STATE)

#Accuracy 0.8021978021978022
#Recall: 0.8333333333333334
#Precision 0.8
#F1_Score: 0.816326530612245
#AUC: 0.8938953488372093

#=== Confusion Matrix ===
#[[33 10]
# [ 8 40]]

#With Hyperparametering
rf = RandomForestClassifier(n_estimators = 300, random_state=RANDOM_STATE, 
                            min_samples_leaf=1, min_samples_split=4, max_depth=7, criterion = 'gini')

rf.fit(X_rf_train, y_rf_train)
y_rf_pred = rf.predict(X_rf_test)

rf_score = rf.predict_proba(X_rf_test)[: , 1]

accuracy_score_rf = accuracy_score(y_rf_test, y_rf_pred)
recall_score_rf = recall_score(y_rf_test, y_rf_pred)
precision_score_rf = precision_score(y_rf_test, y_rf_pred)
f1_score_rf = f1_score(y_rf_test, y_rf_pred)
auc_rf = roc_auc_score(y_rf_test, rf_score)

print("Accuracy", accuracy_score_rf)
print("Recall:", recall_score_rf)
print("Precision", precision_score_rf)
print("F1_Score:", f1_score_rf)
print("AUC:", auc_rf)
print()

print("=== Confusion Matrix ===")
print(confusion_matrix(y_rf_test, y_rf_pred))

Accuracy 0.8461538461538461
Recall: 0.9166666666666666
Precision 0.8148148148148148
F1_Score: 0.8627450980392156
AUC: 0.9152131782945736

=== Confusion Matrix ===
[[33 10]
 [ 4 44]]


AdaBoost

In [21]:
abparam = AdaBoostClassifier(random_state=RANDOM_STATE)
X_ab_train, X_ab_test, y_ab_train, y_ab_test = train_test_split(X, y, test_size=0.3, random_state=RANDOM_STATE)

#Random values initial
param_grid = {'n_estimators': [50, 100, 150, 200, 250], 'learning_rate': [1,0.1,0.01,0.001,0.0001]}
grid_search = GridSearchCV(estimator = abparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_ab_train, y_ab_train)
print( "Initial:", grid_search.best_params_)

#1st Round optimizing
param_grid = {'n_estimators': [150, 200, 250, 300, 350], 'learning_rate': [1,0.1,0.01,0.001,0.0001]}
grid_search = GridSearchCV(estimator = abparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_ab_train, y_ab_train)
print( "First round:", grid_search.best_params_)

#2nd Round optimizing
param_grid = {'n_estimators': [150, 200, 250, 300, 350], 'learning_rate': [0.08, 0.09, 0.1, 0.2, 0.3]}
grid_search = GridSearchCV(estimator = abparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_ab_train, y_ab_train)
print( "Second round:", grid_search.best_params_)

#3rd Round optimizing
param_grid = {'n_estimators': [100, 150, 200, 250, 300], 'learning_rate': [0.06, 0.07, 0.08, 0.09, 0.1]}
grid_search = GridSearchCV(estimator = abparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_ab_train, y_ab_train)
print( "Third round:", grid_search.best_params_)

#4th Round optimizing
param_grid = {'n_estimators': [150, 200, 250, 300, 350], 'learning_rate': [0.05,0.06, 0.07, 0.08, 0.09]}
grid_search = GridSearchCV(estimator = abparam, param_grid = param_grid, cv=3, n_jobs = -1)
grid_search.fit(X_ab_train, y_ab_train)
print( "Fourth round:", grid_search.best_params_)

Initial: {'learning_rate': 0.1, 'n_estimators': 250}
First round: {'learning_rate': 0.1, 'n_estimators': 250}
Second round: {'learning_rate': 0.08, 'n_estimators': 200}
Third round: {'learning_rate': 0.07, 'n_estimators': 250}
Fourth round: {'learning_rate': 0.07, 'n_estimators': 250}


In [22]:
#Without Hyperparametering
#ab = AdaBoostClassifier(random_state=RANDOM_STATE)

#Accuracy 0.7912087912087912
#Recall: 0.8125
#Precision 0.7959183673469388
#F1_Score: 0.8041237113402061
#AUC: 0.8250968992248062

#=== Confusion Matrix ===
#[[33 10]
# [ 9 39]]

#With Hyperparametering
ab = AdaBoostClassifier(n_estimators=250, learning_rate=0.07, random_state=RANDOM_STATE)

ab.fit(X_ab_train, y_ab_train)
y_ab_pred = ab.predict(X_ab_test)

ab_score = ab.predict_proba(X_ab_test)[: , 1]

accuracy_score_ab = accuracy_score(y_ab_test, y_ab_pred)
recall_score_ab = recall_score(y_ab_test, y_ab_pred)
precision_score_ab = precision_score(y_ab_test, y_ab_pred)
f1_score_ab = f1_score(y_ab_test, y_ab_pred)
auc_ab = roc_auc_score(y_ab_test, ab_score)

print("Accuracy", accuracy_score_ab)
print("Recall:", recall_score_ab)
print("Precision", precision_score_ab)
print("F1_Score:", f1_score_ab)
print("AUC:", auc_ab)
print()
print("=== Confusion Matrix ===")
print(confusion_matrix(y_ab_test, y_ab_pred))

Accuracy 0.8241758241758241
Recall: 0.8333333333333334
Precision 0.8333333333333334
F1_Score: 0.8333333333333334
AUC: 0.8832364341085271

=== Confusion Matrix ===
[[35  8]
 [ 8 40]]
