## 1. Import Libraries

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score

### Importing Classic ML models

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

## 2. Load Processed Data

In [5]:
try:
    data = pd.read_csv('../data/processed/cleaned_data.csv')
    print("Processed data loaded successfully.")
    print(data.head())
except FileNotFoundError:
    print("Error: 'cleaned_data.csv' not found in 'data/processed/'.")
    print("Please ensure you have run the preprocessing script first.")

Processed data loaded successfully.
        age  trestbps      chol   thalach   oldpeak  sex_1  cp_1  cp_2  cp_3  \
0 -0.267966 -0.376556 -0.667728  0.806035 -0.037124    1.0   0.0   0.0   0.0   
1 -0.157260  0.478910 -0.841918  0.237495  1.773958    1.0   0.0   0.0   0.0   
2  1.724733  0.764066 -1.403197 -1.074521  1.342748    1.0   0.0   0.0   0.0   
3  0.728383  0.935159 -0.841918  0.499898 -0.899544    1.0   0.0   0.0   0.0   
4  0.839089  0.364848  0.919336 -1.905464  0.739054    0.0   0.0   0.0   0.0   

   fbs_1  ...  slope_1  slope_2  ca_1  ca_2  ca_3  ca_4  thal_1  thal_2  \
0    0.0  ...      0.0      1.0   0.0   1.0   0.0   0.0     0.0     0.0   
1    1.0  ...      0.0      0.0   0.0   0.0   0.0   0.0     0.0     0.0   
2    0.0  ...      0.0      0.0   0.0   0.0   0.0   0.0     0.0     0.0   
3    0.0  ...      0.0      1.0   1.0   0.0   0.0   0.0     0.0     0.0   
4    1.0  ...      1.0      0.0   0.0   0.0   1.0   0.0     0.0     1.0   

   thal_3  target  
0     1.0   

## 3. Seperating Features and Target & Spliting Data

In [10]:
X= data.drop('target', axis=1)
y= data['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

Training set shape: (241, 22)
Testing set shape: (61, 22)


## 4. Model Development & Evaluation

In [17]:
models ={
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector MAchine": SVC(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "Grandient Boosting": GradientBoostingClassifier(random_state=42),
    "XGBoost": XGBClassifier(eval_metric='logloss', random_state=42)
}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred= model.predict(X_test)
    print(f"Model: {model_name}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("\n")


Model: Logistic Regression
Confusion Matrix:
[[24  4]
 [ 5 28]]
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.86      0.84        28
           1       0.88      0.85      0.86        33

    accuracy                           0.85        61
   macro avg       0.85      0.85      0.85        61
weighted avg       0.85      0.85      0.85        61



Model: K-Nearest Neighbors
Confusion Matrix:
[[24  4]
 [ 8 25]]
Classification Report:
              precision    recall  f1-score   support

           0       0.75      0.86      0.80        28
           1       0.86      0.76      0.81        33

    accuracy                           0.80        61
   macro avg       0.81      0.81      0.80        61
weighted avg       0.81      0.80      0.80        61



Model: Support Vector MAchine
Confusion Matrix:
[[24  4]
 [ 8 25]]
Classification Report:
              precision    recall  f1-score   support

           0       0.75

In [19]:
models ={
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector MAchine": SVC(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "Grandient Boosting": GradientBoostingClassifier(random_state=42),
    "XGBoost": XGBClassifier(eval_metric='logloss', random_state=42)
}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred= model.predict(X_test)
    print(f"Model: {model_name}")
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print(f"Precision: {precision_score(y_test, y_pred)}")
    print(f"Recall: {recall_score(y_test, y_pred)}")
    print(f"F1-Score: {f1_score(y_test, y_pred)}")
    print("\n")

Model: Logistic Regression
Accuracy: 0.8524590163934426
Precision: 0.875
Recall: 0.8484848484848485
F1-Score: 0.8615384615384616


Model: K-Nearest Neighbors
Accuracy: 0.8032786885245902
Precision: 0.8620689655172413
Recall: 0.7575757575757576
F1-Score: 0.8064516129032258


Model: Support Vector MAchine
Accuracy: 0.8032786885245902
Precision: 0.8620689655172413
Recall: 0.7575757575757576
F1-Score: 0.8064516129032258


Model: Naive Bayes
Accuracy: 0.8524590163934426
Precision: 0.8529411764705882
Recall: 0.8787878787878788
F1-Score: 0.8656716417910447


Model: Decision Tree
Accuracy: 0.6557377049180327
Precision: 0.6764705882352942
Recall: 0.696969696969697
F1-Score: 0.6865671641791045


Model: Random Forest
Accuracy: 0.819672131147541
Precision: 0.84375
Recall: 0.8181818181818182
F1-Score: 0.8307692307692308


Model: Grandient Boosting
Accuracy: 0.7704918032786885
Precision: 0.8064516129032258
Recall: 0.7575757575757576
F1-Score: 0.78125


Model: XGBoost
Accuracy: 0.7704918032786885
Pre