### **Import Libraries**

In [1]:
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
os.environ["OMP_NUM_THREADS"] = "1"

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,roc_auc_score, classification_report, 
    confusion_matrix, ConfusionMatrixDisplay, roc_curve
)
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import (
    RandomForestClassifier, StackingClassifier, GradientBoostingClassifier, 
    HistGradientBoostingClassifier, AdaBoostClassifier
)
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.base import BaseEstimator

from interpret.glassbox import ExplainableBoostingClassifier 
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from ngboost import NGBClassifier
from ngboost.distns import Bernoulli 
from snapml import BoostingMachineClassifier  

from lib.utils import gcForest

from pytorch_tabnet.tab_model import TabNetClassifier

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

from tabulate import tabulate

E0000 00:00:1743877636.037849   74810 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743877636.041421   74810 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


### **Load & Split Data**

In [2]:
df = pd.read_csv('data/preprocessed_data_full_encoded_new_v3.csv')

cols_to_keep = ['age', 
                'height',
                'weight',
                'systolic', 
                'diastolic',
                'bmi',
                'map',
                'pulse_pressure',
                'age_category',
                'bmi_category',
                'map_category',
                'gender',
                'cholesterol', 
                'gluc',
                'smoke', 
                'alco', 
                'active',
                'cardio'
                ]

df = df[cols_to_keep]

print('Sample Data', len(df)) 
display(df.head())

Sample Data 49818


Unnamed: 0,age,height,weight,systolic,diastolic,bmi,map,pulse_pressure,age_category,bmi_category,map_category,gender,cholesterol,gluc,smoke,alco,active,cardio
0,50,168,62.0,110,80,21.97,90.0,30,2,1,1,1,0,0,0,0,1,0
1,55,156,85.0,140,90,34.93,106.67,50,2,3,3,0,2,0,0,0,1,1
2,51,165,64.0,130,70,23.51,90.0,60,2,1,1,0,2,0,0,0,0,1
3,48,169,82.0,150,100,28.71,116.67,50,2,2,3,1,0,0,0,0,1,1
4,60,151,67.0,120,80,29.38,93.33,40,3,2,2,0,1,1,0,0,0,0


In [3]:
X = df.drop('cardio', axis=1)  
y = df['cardio']   

In [4]:
label_mapping = {0: 'Healthy', 1: 'Cardio Risk'}
target_names = [label_mapping[label] for label in y.unique()]

### **Scaling Data**

In [5]:
numerical_features = ['age', 'height', 'weight', 'systolic', 'diastolic', 'bmi', 'map', 'pulse_pressure']
ordinal_features = ['cholesterol', 'gluc', 'age_category', 'bmi_category', 'map_category']
binary_features = ['gender', 'smoke', 'alco', 'active']

assert all(feature in X.columns for feature in numerical_features + ordinal_features + binary_features)

In [6]:
scaler_standard = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('num_scaler', scaler_standard, numerical_features),
         ('ord_scaler', scaler_standard, ordinal_features)
    ],
    remainder='passthrough'  
)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor)
])

X_preprocessed = pipeline.fit_transform(X)

X_preprocessed = pd.DataFrame(X_preprocessed, columns=numerical_features + ordinal_features + binary_features)
X_preprocessed

Unnamed: 0,age,height,weight,systolic,diastolic,bmi,map,pulse_pressure,cholesterol,gluc,age_category,bmi_category,map_category,gender,smoke,alco,active
0,-0.396496,0.497124,-0.936186,-1.077190,-0.16881,-1.155295,-0.655924,-1.389140,-0.575899,-0.421179,-0.362762,-1.024135,-0.880145,1.0,0.0,0.0,1.0
1,0.338776,-1.094188,0.908758,0.930288,1.09822,1.624188,1.092736,0.492485,2.230670,-0.421179,-0.362762,1.053030,0.977427,0.0,0.0,0.0,1.0
2,-0.249442,0.099296,-0.775756,0.261129,-1.43584,-0.825017,-0.655924,1.433297,2.230670,-0.421179,-0.362762,-1.024135,-0.880145,0.0,0.0,0.0,0.0
3,-0.690605,0.629733,0.668113,1.599448,2.36525,0.290208,2.141723,0.492485,-0.575899,-0.421179,-0.362762,0.014447,0.977427,1.0,0.0,0.0,1.0
4,1.074048,-1.757234,-0.535111,-0.408030,-0.16881,0.433900,-0.306612,-0.448328,0.827385,1.234620,1.967915,0.014447,0.048641,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49813,0.485830,-0.696360,-0.134036,0.261129,1.09822,0.240880,0.742375,-0.448328,0.827385,1.234620,-0.362762,0.014447,0.977427,0.0,0.0,0.0,1.0
49814,-0.249442,-0.431141,-1.417476,2.937766,1.09822,-1.234648,2.141723,3.314921,-0.575899,-0.421179,-0.362762,-1.024135,0.977427,0.0,0.0,0.0,1.0
49815,0.632885,0.099296,0.507683,1.599448,-0.16881,0.433900,0.742375,2.374109,-0.575899,-0.421179,-0.362762,0.014447,0.977427,0.0,0.0,0.0,1.0
49816,1.221102,-0.165923,-0.134036,0.595709,-0.16881,-0.055083,0.217882,0.962891,-0.575899,1.234620,1.967915,0.014447,0.977427,0.0,0.0,0.0,0.0


### **Train, Val, Test**

In [7]:
x_train, x_temp, y_train, y_temp = train_test_split(
    X_preprocessed, y, test_size=0.3, random_state=42, stratify=y
)  # Train 80%

x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=(1/3), random_state=42
)  # Val 10%, Test 10%


In [8]:
x_train_np = np.array(x_train)
y_train_np = np.array(y_train)
x_test_np = np.array(x_test)
y_test_np = np.array(y_test)

### **Base Model**

In [9]:
def evaluate_model(model, x_train, y_train, x_test, y_test, model_name):
    
    if hasattr(model, 'cascade_forest'):
        print("Using cascade_forest branch for training and prediction...")
        model.cascade_forest(x_train, y_train)
        predict_func = lambda x: np.argmax(np.mean(model.cascade_forest(x), axis=0), axis=1)
        predict_proba_func = lambda x: np.mean(model.cascade_forest(x), axis=0)
    else:
        print("Using standard branch (fit/predict/predict_proba)...")
        model.fit(x_train, y_train)
        predict_func = lambda x: model.predict(x)
        predict_proba_func = lambda x: model.predict_proba(x)

    y_pred_test = predict_func(x_test)
    y_proba_test = predict_proba_func(x_test)
    
    if y_proba_test.shape[1] > 1:
        y_probs_test = y_proba_test[:, 1]
    else:
        y_probs_test = y_proba_test[:, 0]
    
    y_pred_train = predict_func(x_train)
    y_proba_train = predict_proba_func(x_train)
    if y_proba_train.shape[1] > 1:
        y_probs_train = y_proba_train[:, 1]
    else:
        y_probs_train = y_proba_train[:, 0]
    
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_acc_str = f"{(test_accuracy * 100):.2f}%"
    test_auc = roc_auc_score(y_test, y_probs_test)
    test_auc_str = f"{test_auc:.4f}"
    test_report_dict = classification_report(y_test, y_pred_test, output_dict=True)
    test_precision = test_report_dict['weighted avg']['precision'] 
    test_recall    = test_report_dict['weighted avg']['recall']   
    test_f1        = test_report_dict['weighted avg']['f1-score']
    
    train_accuracy = accuracy_score(y_train, y_pred_train)
    train_acc_str = f"{(train_accuracy * 100):.2f}%"
    train_auc = roc_auc_score(y_train, y_probs_train)
    train_auc_str = f"{train_auc:.4f}"
    train_report_dict = classification_report(y_train, y_pred_train, output_dict=True)
    train_precision = train_report_dict['weighted avg']['precision']
    train_recall    = train_report_dict['weighted avg']['recall']    
    train_f1        = train_report_dict['weighted avg']['f1-score']  

    data = [
        ["Test", test_acc_str, test_auc_str],
        ["Train", train_acc_str, train_auc_str]
    ]

    headers = ["", "Accuracy", "AUC Score"]

    print(f"\n=== {model_name} ===\n")
    print(tabulate(data, headers=headers, tablefmt="grid"))
    
    print("\nOverfitting Check :")
    if train_accuracy > test_accuracy + 5 or train_auc > test_auc + 0.05:
        print("The model might be overfitting.")
    else:
        print("No significant signs of overfitting.\n")
    
    # # Plot Confusion Matrix and ROC Curve
    # fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    # cm = confusion_matrix(y_test, y_pred_test)
    # # If a global variable 'label_mapping' exists, use it for display labels
    # display_labels = list(label_mapping.values()) if 'label_mapping' in globals() else None
    # disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)
    # disp.plot(ax=axes[0], cmap='viridis', colorbar=False)
    # axes[0].set_title(f"{model_name} - Confusion Matrix")
    
    # fpr, tpr, _ = roc_curve(y_test, y_probs_test)
    # axes[1].plot(fpr, tpr, label=f"ROC Curve (AUC = {test_auc:.4f})", linewidth=2)
    # axes[1].plot([0, 1], [0, 1], 'k--', label="Random Guess", linewidth=1)
    # axes[1].set_title(f"{model_name} - ROC Curve")
    # axes[1].legend(loc="lower right")
    # axes[1].grid(alpha=0.3)
    
    # plt.tight_layout()
    # plt.show()
    
    return {
        'model_name': model_name,
        'train_accuracy': train_accuracy,
        'train_precision': train_precision,
        'train_recall': train_recall,
        'train_f1': train_f1,
        'test_accuracy': test_accuracy,
        'test_precision': test_precision,
        'test_recall': test_recall,
        'test_f1': test_f1
    }


def create_summary_table(results):
    test_summary = pd.DataFrame([{
        'Algorithm': r['model_name'],
        'Accuracy':  round(r['test_accuracy'], 4),
        'Precision': round(r['test_precision'], 4),
        'Recall':    round(r['test_recall'], 4),
        'F1-Score':  round(r['test_f1'], 4)
    } for r in results])
    train_summary = pd.DataFrame([{
        'Algorithm': r['model_name'],
        'Accuracy':  round(r['train_accuracy'], 4),
        'Precision': round(r['train_precision'], 4),
        'Recall':    round(r['train_recall'], 4),
        'F1-Score':  round(r['train_f1'], 4)
    } for r in results])
    
    
    print("\nSummary Table - Test Metrics")
    print(tabulate(test_summary, headers='keys', tablefmt='grid', showindex=False))

    print("Summary Table - Training Metrics")
    print(tabulate(train_summary, headers='keys', tablefmt='grid', showindex=False))
    

##### `Logistic Regression`

In [10]:
logreg_model = LogisticRegression()
logreg_results = evaluate_model(logreg_model, x_train_np, y_train_np, x_test_np, y_test_np, "Logistic Regression")
logreg_results;

Using standard branch (fit/predict/predict_proba)...



=== Logistic Regression ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 84.24%     |      0.9033 |
+-------+------------+-------------+
| Train | 84.37%     |      0.9061 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Random Forest`

In [11]:
rf_model = RandomForestClassifier()
rf_results = evaluate_model(rf_model, x_train_np, y_train_np, x_test_np, y_test_np, "Random Forest")
rf_results;

Using standard branch (fit/predict/predict_proba)...

=== Random Forest ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.31%     |      0.942  |
+-------+------------+-------------+
| Train | 99.56%     |      0.9999 |
+-------+------------+-------------+

Overfitting Check :
The model might be overfitting.


##### `Decision Tree`

In [12]:
dt_model = DecisionTreeClassifier()
dt_results = evaluate_model(dt_model, x_train_np, y_train_np, x_test_np, y_test_np, "Decision Tree")
dt_results;

Using standard branch (fit/predict/predict_proba)...

=== Decision Tree ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 83.26%     |      0.8341 |
+-------+------------+-------------+
| Train | 99.56%     |      1      |
+-------+------------+-------------+

Overfitting Check :
The model might be overfitting.


##### `SVM`

In [13]:
svm_model = SVC(probability=True)
svm_results = evaluate_model(svm_model, x_train_np, y_train_np, x_test_np, y_test_np, "SVM")
svm_results;

Using standard branch (fit/predict/predict_proba)...



=== SVM ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.87%     |      0.9347 |
+-------+------------+-------------+
| Train | 86.40%     |      0.9417 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Naive Bayes`

In [14]:
nb_model = GaussianNB()
nb_results = evaluate_model(nb_model, x_train_np, y_train_np, x_test_np, y_test_np, "Naive Bayes")
nb_results;

Using standard branch (fit/predict/predict_proba)...

=== Naive Bayes ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 84.52%     |      0.8998 |
+-------+------------+-------------+
| Train | 84.28%     |      0.9031 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `KNN`

In [15]:
knn_model = KNeighborsClassifier()
knn_results = evaluate_model(knn_model, x_train_np, y_train_np, x_test_np, y_test_np, "KNN")
knn_results;

Using standard branch (fit/predict/predict_proba)...

=== KNN ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 84.42%     |      0.9225 |
+-------+------------+-------------+
| Train | 89.37%     |      0.9686 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `XGBoost`

In [16]:
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_results = evaluate_model(xgb_model, x_train_np, y_train_np, x_test_np, y_test_np, "XGBoost")
xgb_results;

Using standard branch (fit/predict/predict_proba)...

=== XGBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.77%     |      0.9468 |
+-------+------------+-------------+
| Train | 90.34%     |      0.9741 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Light GBM`

In [17]:
lgbm_model = LGBMClassifier(verbose=-1)
lgbm_results = evaluate_model(lgbm_model, x_train_np, y_train_np, x_test_np, y_test_np, "LightGBM")
lgbm_results;

Using standard branch (fit/predict/predict_proba)...

=== LightGBM ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 86.51%     |      0.9495 |
+-------+------------+-------------+
| Train | 88.04%     |      0.9618 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Cat Boost`

In [18]:
catb_model = CatBoostClassifier(verbose=False)
catb_results = evaluate_model(catb_model, x_train_np, y_train_np, x_test_np, y_test_np, "CatBoost")
catb_results;

Using standard branch (fit/predict/predict_proba)...

=== CatBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 86.41%     |      0.9474 |
+-------+------------+-------------+
| Train | 89.24%     |      0.9682 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



#### `SnapBoost`

In [19]:
snb_model = BoostingMachineClassifier()
snb_results = evaluate_model(snb_model, x_train_np, y_train_np, x_test_np, y_test_np, "SnapBoost")
snb_results;

Using standard branch (fit/predict/predict_proba)...

=== SnapBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.39%     |      0.9423 |
+-------+------------+-------------+
| Train | 86.32%     |      0.9486 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Explainable Boosting Machine (EBM)`

In [20]:
ebm_model = ExplainableBoostingClassifier(n_jobs=1)
ebm_results = evaluate_model(ebm_model, x_train_np, y_train_np, x_test_np, y_test_np, "EBM")
ebm_results;

Using standard branch (fit/predict/predict_proba)...

=== EBM ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.01%     |      0.9324 |
+-------+------------+-------------+
| Train | 85.74%     |      0.9385 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `NGBoost`

In [21]:
ngb_model = NGBClassifier(Dist=Bernoulli, verbose=False)
ngb_results = evaluate_model(ngb_model, x_train_np, y_train_np, x_test_np, y_test_np, "NGBoost")
ngb_results;

Using standard branch (fit/predict/predict_proba)...

=== NGBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.03%     |      0.9324 |
+-------+------------+-------------+
| Train | 84.85%     |      0.9357 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `AdaBoost`

In [22]:
adb_model = AdaBoostClassifier(random_state=42)
adb_results = evaluate_model(adb_model, x_train_np, y_train_np, x_test_np, y_test_np, "AdaBoost")
adb_results;

Using standard branch (fit/predict/predict_proba)...

=== AdaBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 84.16%     |      0.9118 |
+-------+------------+-------------+
| Train | 84.39%     |      0.9148 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `GradientBoosting`

In [23]:
grb_model =  GradientBoostingClassifier(random_state=42)
grb_results = evaluate_model(grb_model, x_train_np, y_train_np, x_test_np, y_test_np, "Gradient Boosting")
grb_results;

Using standard branch (fit/predict/predict_proba)...

=== Gradient Boosting ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 84.99%     |      0.9389 |
+-------+------------+-------------+
| Train | 85.98%     |      0.9433 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Hist GradientBoosting`

In [None]:
hgrb_model =  HistGradientBoostingClassifier(random_state=42)
hgrb_results = evaluate_model(hgrb_model, x_train_np, y_train_np, x_test_np, y_test_np, "Hist Gradient Boosting")
hgrb_results;

Using standard branch (fit/predict/predict_proba)...

=== Hist Gradient Boosting ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 86.43%     |      0.9498 |
+-------+------------+-------------+
| Train | 87.43%     |      0.9574 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



#### `Cascaded Random Forest`

In [25]:
gcf_model = gcForest(n_cascadeRF=2,n_cascadeRFtree=500) #Default values tolerance =0.0 ,n_cascadeRFtree=101
gcf_results = evaluate_model(gcf_model, x_train_np, y_train_np, x_test_np, y_test_np, "Cascaded Random Forest")
gcf_results;

Using cascade_forest branch for training and prediction...
Adding/Training Layer, n_layer=1
Layer validation accuracy = 0.8445878136200717
Adding/Training Layer, n_layer=2
Layer validation accuracy = 0.8484587813620071
Adding/Training Layer, n_layer=3
Layer validation accuracy = 0.8497491039426524
Adding/Training Layer, n_layer=4
Layer validation accuracy = 0.8513261648745519
Adding/Training Layer, n_layer=5
Layer validation accuracy = 0.8530465949820788
Adding/Training Layer, n_layer=6
Layer validation accuracy = 0.8541935483870968
Adding/Training Layer, n_layer=7
Layer validation accuracy = 0.8536200716845879

=== Cascaded Random Forest ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.71%     |      0.9422 |
+-------+------------+-------------+
| Train | 85.97%     |      0.9458 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



#### `TabNet`

In [30]:
tabnet_model = TabNetClassifier(verbose=0)
tabnet_results = evaluate_model(tabnet_model, x_train_np, y_train_np, x_test_np, y_test_np, "TabNet Classifier")
tabnet_results;

Using standard branch (fit/predict/predict_proba)...

=== TabNet Classifier ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 86.65%     |      0.9506 |
+-------+------------+-------------+
| Train | 86.86%     |      0.953  |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



#### `NN`

In [31]:
class KerasModelWrapper(BaseEstimator):
    def __init__(self, model, epochs=100, batch_size=32, validation_split=0.2, callbacks=None):
        self.model = model
        self.epochs = epochs
        self.batch_size = batch_size
        self.validation_split = validation_split
        self.callbacks = callbacks

    def fit(self, x, y):
        self.model.fit(
            x, y,
            epochs=self.epochs,
            batch_size=self.batch_size,
            validation_split=self.validation_split,
            callbacks=self.callbacks,
            verbose=0
        )
        return self

    def predict(self, x):
        proba = self.model.predict(x)
        return (proba > 0.5).astype(int)

    def predict_proba(self, x):
        proba = self.model.predict(x)
        return np.hstack([1 - proba, proba])

In [32]:
model = Sequential([
    Dense(32, input_dim=x_train_np.shape[1], activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

wrapped_model = KerasModelWrapper(
    model=model,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop]
)

nn_results = evaluate_model(
    wrapped_model, 
    x_train_np, y_train_np,
    x_test_np, y_test_np,
    model_name='Keras Sequential Model'
)

nn_results;

Using standard branch (fit/predict/predict_proba)...
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 739us/step
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 511us/step
[1m1090/1090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 450us/step
[1m1090/1090[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 449us/step

=== Keras Sequential Model ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 86.37%     |      0.9485 |
+-------+------------+-------------+
| Train | 86.73%     |      0.9528 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



#### **Summary Table**

In [33]:
results = [logreg_results, rf_results, dt_results, 
        svm_results, nb_results, knn_results, 
        xgb_results, lgbm_results, catb_results, 
        snb_results, ebm_results, ngb_results, 
        adb_results, grb_results, hgrb_results, 
        gcf_results, tabnet_results, nn_results]

create_summary_table(results)


Summary Table - Test Metrics
+------------------------+------------+-------------+----------+------------+
| Algorithm              |   Accuracy |   Precision |   Recall |   F1-Score |
| Logistic Regression    |     0.8424 |      0.8444 |   0.8424 |     0.8421 |
+------------------------+------------+-------------+----------+------------+
| Random Forest          |     0.8531 |      0.8532 |   0.8531 |     0.853  |
+------------------------+------------+-------------+----------+------------+
| Decision Tree          |     0.8326 |      0.8326 |   0.8326 |     0.8326 |
+------------------------+------------+-------------+----------+------------+
| SVM                    |     0.8587 |      0.8622 |   0.8587 |     0.8582 |
+------------------------+------------+-------------+----------+------------+
| Naive Bayes            |     0.8452 |      0.8502 |   0.8452 |     0.8445 |
+------------------------+------------+-------------+----------+------------+
| KNN                    |     0.8