### **Import Libraries**

In [30]:
import os
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

warnings.filterwarnings("ignore")
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
os.environ["OMP_NUM_THREADS"] = "1"

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,roc_auc_score, classification_report, 
    confusion_matrix, ConfusionMatrixDisplay, roc_curve
)
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import (
    RandomForestClassifier, StackingClassifier, GradientBoostingClassifier, 
    HistGradientBoostingClassifier, AdaBoostClassifier
)
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.base import BaseEstimator

from interpret.glassbox import ExplainableBoostingClassifier 
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from ngboost import NGBClassifier
from ngboost.distns import Bernoulli 
from snapml import BoostingMachineClassifier  

from lib.utils import gcForest

from pytorch_tabnet.tab_model import TabNetClassifier

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

from tabulate import tabulate

### **Load & Split Data**

In [31]:
df = pd.read_csv('data/preprocessed_data_full_encoded_new_v4.csv')

cols_to_keep = ['age', 
                'height',
                'weight',
                'systolic', 
                'diastolic',
                'bmi',
                'map',
                'pulse_pressure',
                'gender',
                'cholesterol', 
                'gluc',
                'smoke', 
                'alco', 
                'active',
                'cardio'
                ]

df = df[cols_to_keep]

print('Sample Data', len(df)) 
display(df.head())

Sample Data 48838


Unnamed: 0,age,height,weight,systolic,diastolic,bmi,map,pulse_pressure,gender,cholesterol,gluc,smoke,alco,active,cardio
0,50,168,62.0,110,80,21.97,90.0,30,1,0,0,0,0,1,0
1,55,156,85.0,140,90,34.93,106.67,50,0,2,0,0,0,1,1
2,51,165,64.0,130,70,23.51,90.0,60,0,2,0,0,0,0,1
3,48,169,82.0,150,100,28.71,116.67,50,1,0,0,0,0,1,1
4,61,178,95.0,130,90,29.98,103.33,40,1,2,2,0,0,1,1


In [32]:
X = df.drop('cardio', axis=1)  
y = df['cardio']   

In [33]:
label_mapping = {0: 'Healthy', 1: 'Cardio Risk'}
target_names = [label_mapping[label] for label in y.unique()]

### **Scaling Data**

In [34]:
numerical_features = ['age', 'height', 'weight', 'systolic', 'diastolic', 'bmi', 'map', 'pulse_pressure']
ordinal_features = ['cholesterol', 'gluc']
binary_features = ['gender', 'smoke', 'alco', 'active']

assert all(feature in X.columns for feature in numerical_features + ordinal_features + binary_features)

In [35]:
scaler_standard = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('num_scaler', scaler_standard, numerical_features),
         ('ord_scaler', scaler_standard, ordinal_features)
    ],
    remainder='passthrough'  
)

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor)
])

X_preprocessed = pipeline.fit_transform(X)

X_preprocessed = pd.DataFrame(X_preprocessed, columns=numerical_features + ordinal_features + binary_features)
X_preprocessed

Unnamed: 0,age,height,weight,systolic,diastolic,bmi,map,pulse_pressure,cholesterol,gluc,gender,smoke,alco,active
0,-0.395262,0.496159,-0.937176,-1.090057,-0.186194,-1.153100,-0.672977,-1.394707,-0.576010,-0.421194,1.0,0.0,0.0,1.0
1,0.339920,-1.095265,0.902556,0.914602,1.083539,1.612954,1.076080,0.484135,2.217416,-0.421194,0.0,0.0,0.0,1.0
2,-0.248226,0.098303,-0.777199,0.246383,-1.455926,-0.824418,-0.672977,1.423557,2.217416,-0.421194,0.0,0.0,0.0,0.0
3,-0.689335,0.628778,0.662591,1.582822,2.353272,0.285418,2.125304,0.484135,-0.576010,-0.421194,1.0,0.0,0.0,1.0
4,1.222138,1.822347,1.702440,0.246383,1.083539,0.556475,0.725639,-0.455286,2.217416,2.881940,1.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48833,0.486956,-0.697409,-0.137292,0.246383,1.083539,0.236330,0.725639,-0.455286,0.820703,1.230373,0.0,0.0,0.0,1.0
48834,-0.248226,-0.432172,-1.417106,2.919261,1.083539,-1.232070,2.125304,3.302399,-0.576010,-0.421194,0.0,0.0,0.0,1.0
48835,0.633993,0.098303,0.502615,1.582822,-0.186194,0.428417,0.725639,2.362978,-0.576010,-0.421194,0.0,0.0,0.0,1.0
48836,1.222138,-0.166934,-0.137292,0.580492,-0.186194,-0.058204,0.201027,0.953846,-0.576010,1.230373,0.0,0.0,0.0,0.0


### **Train, Val, Test**

In [36]:
x_train, x_temp, y_train, y_temp = train_test_split(
    X_preprocessed, y, test_size=0.3, random_state=42, stratify=y
)  # Train 80%

x_val, x_test, y_val, y_test = train_test_split(
    x_temp, y_temp, test_size=(1/3), random_state=42
)  # Val 10%, Test 10%


In [37]:
x_train_np = np.array(x_train)
y_train_np = np.array(y_train)
x_test_np = np.array(x_test)
y_test_np = np.array(y_test)

### **Base Model**

In [38]:
def evaluate_model(model, x_train, y_train, x_test, y_test, model_name):
    
    if hasattr(model, 'cascade_forest'):
        print("Using cascade_forest branch for training and prediction...")
        model.cascade_forest(x_train, y_train)
        predict_func = lambda x: np.argmax(np.mean(model.cascade_forest(x), axis=0), axis=1)
        predict_proba_func = lambda x: np.mean(model.cascade_forest(x), axis=0)
    else:
        print("Using standard branch (fit/predict/predict_proba)...")
        model.fit(x_train, y_train)
        predict_func = lambda x: model.predict(x)
        predict_proba_func = lambda x: model.predict_proba(x)

    y_pred_test = predict_func(x_test)
    y_proba_test = predict_proba_func(x_test)
    
    if y_proba_test.shape[1] > 1:
        y_probs_test = y_proba_test[:, 1]
    else:
        y_probs_test = y_proba_test[:, 0]
    
    y_pred_train = predict_func(x_train)
    y_proba_train = predict_proba_func(x_train)
    if y_proba_train.shape[1] > 1:
        y_probs_train = y_proba_train[:, 1]
    else:
        y_probs_train = y_proba_train[:, 0]
    
    test_accuracy = accuracy_score(y_test, y_pred_test)
    test_acc_str = f"{(test_accuracy * 100):.2f}%"
    test_auc = roc_auc_score(y_test, y_probs_test)
    test_auc_str = f"{test_auc:.4f}"
    test_report_dict = classification_report(y_test, y_pred_test, output_dict=True)
    test_precision = test_report_dict['weighted avg']['precision'] 
    test_recall    = test_report_dict['weighted avg']['recall']   
    test_f1        = test_report_dict['weighted avg']['f1-score']
    
    train_accuracy = accuracy_score(y_train, y_pred_train)
    train_acc_str = f"{(train_accuracy * 100):.2f}%"
    train_auc = roc_auc_score(y_train, y_probs_train)
    train_auc_str = f"{train_auc:.4f}"
    train_report_dict = classification_report(y_train, y_pred_train, output_dict=True)
    train_precision = train_report_dict['weighted avg']['precision']
    train_recall    = train_report_dict['weighted avg']['recall']    
    train_f1        = train_report_dict['weighted avg']['f1-score']  

    data = [
        ["Test", test_acc_str, test_auc_str],
        ["Train", train_acc_str, train_auc_str]
    ]

    headers = ["", "Accuracy", "AUC Score"]

    print(f"\n=== {model_name} ===\n")
    print(tabulate(data, headers=headers, tablefmt="grid"))
    
    print("\nOverfitting Check :")
    if train_accuracy > test_accuracy + 5 or train_auc > test_auc + 0.05:
        print("The model might be overfitting.")
    else:
        print("No significant signs of overfitting.\n")
    
    # # Plot Confusion Matrix and ROC Curve
    # fig, axes = plt.subplots(1, 2, figsize=(12, 5))
    # cm = confusion_matrix(y_test, y_pred_test)
    # # If a global variable 'label_mapping' exists, use it for display labels
    # display_labels = list(label_mapping.values()) if 'label_mapping' in globals() else None
    # disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=display_labels)
    # disp.plot(ax=axes[0], cmap='viridis', colorbar=False)
    # axes[0].set_title(f"{model_name} - Confusion Matrix")
    
    # fpr, tpr, _ = roc_curve(y_test, y_probs_test)
    # axes[1].plot(fpr, tpr, label=f"ROC Curve (AUC = {test_auc:.4f})", linewidth=2)
    # axes[1].plot([0, 1], [0, 1], 'k--', label="Random Guess", linewidth=1)
    # axes[1].set_title(f"{model_name} - ROC Curve")
    # axes[1].legend(loc="lower right")
    # axes[1].grid(alpha=0.3)
    
    # plt.tight_layout()
    # plt.show()
    
    return {
        'model_name': model_name,
        'train_accuracy': train_accuracy,
        'train_precision': train_precision,
        'train_recall': train_recall,
        'train_f1': train_f1,
        'test_accuracy': test_accuracy,
        'test_precision': test_precision,
        'test_recall': test_recall,
        'test_f1': test_f1
    }


def create_summary_table(results):
    test_summary = pd.DataFrame([{
        'Algorithm': r['model_name'],
        'Accuracy':  round(r['test_accuracy'], 4),
        'Precision': round(r['test_precision'], 4),
        'Recall':    round(r['test_recall'], 4),
        'F1-Score':  round(r['test_f1'], 4)
    } for r in results])
    train_summary = pd.DataFrame([{
        'Algorithm': r['model_name'],
        'Accuracy':  round(r['train_accuracy'], 4),
        'Precision': round(r['train_precision'], 4),
        'Recall':    round(r['train_recall'], 4),
        'F1-Score':  round(r['train_f1'], 4)
    } for r in results])
    
    
    print("\nSummary Table - Test Metrics")
    print(tabulate(test_summary, headers='keys', tablefmt='grid', showindex=False))

    print("Summary Table - Training Metrics")
    print(tabulate(train_summary, headers='keys', tablefmt='grid', showindex=False))
    

##### `Logistic Regression`

In [39]:
logreg_model = LogisticRegression()
logreg_results = evaluate_model(logreg_model, x_train_np, y_train_np, x_test_np, y_test_np, "Logistic Regression")
logreg_results;

Using standard branch (fit/predict/predict_proba)...



=== Logistic Regression ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.95%     |      0.9259 |
+-------+------------+-------------+
| Train | 85.94%     |      0.923  |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Random Forest`

In [40]:
rf_model = RandomForestClassifier()
rf_results = evaluate_model(rf_model, x_train_np, y_train_np, x_test_np, y_test_np, "Random Forest")
rf_results;

Using standard branch (fit/predict/predict_proba)...

=== Random Forest ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.59%     |      0.9556 |
+-------+------------+-------------+
| Train | 99.59%     |      0.9999 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Decision Tree`

In [41]:
dt_model = DecisionTreeClassifier()
dt_results = evaluate_model(dt_model, x_train_np, y_train_np, x_test_np, y_test_np, "Decision Tree")
dt_results;

Using standard branch (fit/predict/predict_proba)...

=== Decision Tree ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 84.60%     |      0.8472 |
+-------+------------+-------------+
| Train | 99.59%     |      1      |
+-------+------------+-------------+

Overfitting Check :
The model might be overfitting.


##### `SVM`

In [42]:
svm_model = SVC(probability=True)
svm_results = evaluate_model(svm_model, x_train_np, y_train_np, x_test_np, y_test_np, "SVM")
svm_results;

Using standard branch (fit/predict/predict_proba)...



=== SVM ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.63%     |      0.9518 |
+-------+------------+-------------+
| Train | 88.11%     |      0.9563 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Naive Bayes`

In [43]:
nb_model = GaussianNB()
nb_results = evaluate_model(nb_model, x_train_np, y_train_np, x_test_np, y_test_np, "Naive Bayes")
nb_results;

Using standard branch (fit/predict/predict_proba)...

=== Naive Bayes ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.11%     |      0.9292 |
+-------+------------+-------------+
| Train | 84.61%     |      0.927  |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `KNN`

In [44]:
knn_model = KNeighborsClassifier()
knn_results = evaluate_model(knn_model, x_train_np, y_train_np, x_test_np, y_test_np, "KNN")
knn_results;

Using standard branch (fit/predict/predict_proba)...

=== KNN ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.89%     |      0.9369 |
+-------+------------+-------------+
| Train | 90.45%     |      0.9745 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `XGBoost`

In [45]:
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_results = evaluate_model(xgb_model, x_train_np, y_train_np, x_test_np, y_test_np, "XGBoost")
xgb_results;

Using standard branch (fit/predict/predict_proba)...

=== XGBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.29%     |      0.9596 |
+-------+------------+-------------+
| Train | 92.21%     |      0.9823 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Light GBM`

In [46]:
lgbm_model = LGBMClassifier(verbose=-1)
lgbm_results = evaluate_model(lgbm_model, x_train_np, y_train_np, x_test_np, y_test_np, "LightGBM")
lgbm_results;

Using standard branch (fit/predict/predict_proba)...

=== LightGBM ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.65%     |      0.9618 |
+-------+------------+-------------+
| Train | 89.53%     |      0.9717 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Cat Boost`

In [47]:
catb_model = CatBoostClassifier(verbose=False)
catb_results = evaluate_model(catb_model, x_train_np, y_train_np, x_test_np, y_test_np, "CatBoost")
catb_results;

Using standard branch (fit/predict/predict_proba)...

=== CatBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.41%     |      0.9607 |
+-------+------------+-------------+
| Train | 91.19%     |      0.9775 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



#### `SnapBoost`

In [48]:
snb_model = BoostingMachineClassifier()
snb_results = evaluate_model(snb_model, x_train_np, y_train_np, x_test_np, y_test_np, "SnapBoost")
snb_results;

Using standard branch (fit/predict/predict_proba)...

=== SnapBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.26%     |      0.9573 |
+-------+------------+-------------+
| Train | 88.12%     |      0.9622 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Explainable Boosting Machine (EBM)`

In [49]:
ebm_model = ExplainableBoostingClassifier(n_jobs=1)
ebm_results = evaluate_model(ebm_model, x_train_np, y_train_np, x_test_np, y_test_np, "EBM")
ebm_results;

Using standard branch (fit/predict/predict_proba)...

=== EBM ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 86.71%     |      0.9521 |
+-------+------------+-------------+
| Train | 87.27%     |      0.9539 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `NGBoost`

In [50]:
ngb_model = NGBClassifier(Dist=Bernoulli, verbose=False)
ngb_results = evaluate_model(ngb_model, x_train_np, y_train_np, x_test_np, y_test_np, "NGBoost")
ngb_results;

Using standard branch (fit/predict/predict_proba)...

=== NGBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 86.00%     |      0.951  |
+-------+------------+-------------+
| Train | 86.13%     |      0.9505 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `AdaBoost`

In [51]:
adb_model = AdaBoostClassifier(random_state=42)
adb_results = evaluate_model(adb_model, x_train_np, y_train_np, x_test_np, y_test_np, "AdaBoost")
adb_results;

Using standard branch (fit/predict/predict_proba)...

=== AdaBoost ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 85.59%     |      0.929  |
+-------+------------+-------------+
| Train | 85.77%     |      0.9294 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `GradientBoosting`

In [52]:
grb_model =  GradientBoostingClassifier(random_state=42)
grb_results = evaluate_model(grb_model, x_train_np, y_train_np, x_test_np, y_test_np, "Gradient Boosting")
grb_results;

Using standard branch (fit/predict/predict_proba)...

=== Gradient Boosting ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.35%     |      0.9553 |
+-------+------------+-------------+
| Train | 87.63%     |      0.9578 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



##### `Hist GradientBoosting`

In [53]:
hgrb_model =  HistGradientBoostingClassifier(random_state=42)
hgrb_results = evaluate_model(hgrb_model, x_train_np, y_train_np, x_test_np, y_test_np, "Hist Gradient Boosting")
hgrb_results;

Using standard branch (fit/predict/predict_proba)...

=== Hist Gradient Boosting ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.74%     |      0.9624 |
+-------+------------+-------------+
| Train | 89.55%     |      0.9716 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



#### `Cascaded Random Forest`

In [54]:
gcf_model = gcForest(n_cascadeRF=2,n_cascadeRFtree=500) #Default values tolerance =0.0 ,n_cascadeRFtree=101
gcf_results = evaluate_model(gcf_model, x_train_np, y_train_np, x_test_np, y_test_np, "Cascaded Random Forest")
gcf_results;

Using cascade_forest branch for training and prediction...
Adding/Training Layer, n_layer=1
Layer validation accuracy = 0.855805791167008
Adding/Training Layer, n_layer=2
Layer validation accuracy = 0.8642878034513015
Adding/Training Layer, n_layer=3
Layer validation accuracy = 0.8657502193623866
Adding/Training Layer, n_layer=4
Layer validation accuracy = 0.8676513600467973
Adding/Training Layer, n_layer=5
Layer validation accuracy = 0.8686750511845569
Adding/Training Layer, n_layer=6
Layer validation accuracy = 0.8689675343667739
Adding/Training Layer, n_layer=7
Layer validation accuracy = 0.8704299502778591
Adding/Training Layer, n_layer=8
Layer validation accuracy = 0.870137467095642

=== Cascaded Random Forest ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 86.98%     |      0.9557 |
+-------+------------+-------------+
| Train | 87.61%     |      0.9582 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of ove

#### `TabNet`

In [59]:
best_params = {'n_d': 55, 'n_a': 39, 'n_steps': 9, 'gamma': 1.0350228727394724, 'lambda_sparse': 1.3957603942801675e-05, 'momentum': 0.4587005208996888, 'clip_value': 1.2827518858412652, 'seed': 4478, 'verbose': 0}
tabnet_model = TabNetClassifier(**best_params)
tabnet_results = evaluate_model(tabnet_model, x_train_np, y_train_np, x_test_np, y_test_np, "TabNet Classifier")
tabnet_results;

Using standard branch (fit/predict/predict_proba)...

=== TabNet Classifier ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.49%     |      0.9596 |
+-------+------------+-------------+
| Train | 89.09%     |      0.9687 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



#### `NN`

In [56]:
class KerasModelWrapper(BaseEstimator):
    def __init__(self, model, epochs=100, batch_size=32, validation_split=0.2, callbacks=None):
        self.model = model
        self.epochs = epochs
        self.batch_size = batch_size
        self.validation_split = validation_split
        self.callbacks = callbacks

    def fit(self, x, y):
        self.model.fit(
            x, y,
            epochs=self.epochs,
            batch_size=self.batch_size,
            validation_split=self.validation_split,
            callbacks=self.callbacks,
            verbose=0
        )
        return self

    def predict(self, x):
        proba = self.model.predict(x)
        return (proba > 0.5).astype(int)

    def predict_proba(self, x):
        proba = self.model.predict(x)
        return np.hstack([1 - proba, proba])

In [57]:
model = Sequential([
    Dense(32, input_dim=x_train_np.shape[1], activation='relu'),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')  
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

wrapped_model = KerasModelWrapper(
    model=model,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=[early_stop]
)

nn_results = evaluate_model(
    wrapped_model, 
    x_train_np, y_train_np,
    x_test_np, y_test_np,
    model_name='Keras Sequential Model'
)

nn_results;

Using standard branch (fit/predict/predict_proba)...
[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 525us/step
[1m153/153[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 398us/step
[1m1069/1069[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350us/step
[1m1069/1069[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 348us/step

=== Keras Sequential Model ===

+-------+------------+-------------+
|       | Accuracy   |   AUC Score |
| Test  | 87.45%     |      0.9585 |
+-------+------------+-------------+
| Train | 87.91%     |      0.9612 |
+-------+------------+-------------+

Overfitting Check :
No significant signs of overfitting.



#### **Summary Table**

In [58]:
results = [logreg_results, rf_results, dt_results, 
        svm_results, nb_results, knn_results, 
        xgb_results, lgbm_results, catb_results, 
        snb_results, ebm_results, ngb_results, 
        adb_results, grb_results, hgrb_results, 
        gcf_results, tabnet_results, nn_results]

create_summary_table(results)


Summary Table - Test Metrics
+------------------------+------------+-------------+----------+------------+
| Algorithm              |   Accuracy |   Precision |   Recall |   F1-Score |
| Logistic Regression    |     0.8595 |      0.8623 |   0.8595 |     0.8593 |
+------------------------+------------+-------------+----------+------------+
| Random Forest          |     0.8759 |      0.876  |   0.8759 |     0.8759 |
+------------------------+------------+-------------+----------+------------+
| Decision Tree          |     0.846  |      0.846  |   0.846  |     0.846  |
+------------------------+------------+-------------+----------+------------+
| SVM                    |     0.8763 |      0.8787 |   0.8763 |     0.8762 |
+------------------------+------------+-------------+----------+------------+
| Naive Bayes            |     0.8511 |      0.8595 |   0.8511 |     0.8503 |
+------------------------+------------+-------------+----------+------------+
| KNN                    |     0.8