## 1. Import Libraries

In [23]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, precision_score, recall_score, f1_score

### Importing Classic ML models

In [25]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier

## 2. Load Processed Data

In [27]:
try:
    df = pd.read_csv('../data/processed/cleaned_data.csv')
    print("Processed data loaded successfully.")
    print(df.head())
except FileNotFoundError:
    print("Error: 'cleaned_data.csv' not found in 'data/processed/'.")
    print("Please ensure you have run the preprocessing script first.")

Processed data loaded successfully.
        age  trestbps      chol   thalach   oldpeak  sex_1  cp_1  cp_2  cp_3  \
0 -0.267966 -0.376556 -0.667728  0.806035 -0.037124    1.0   0.0   0.0   0.0   
1 -0.157260  0.478910 -0.841918  0.237495  1.773958    1.0   0.0   0.0   0.0   
2  1.724733  0.764066 -1.403197 -1.074521  1.342748    1.0   0.0   0.0   0.0   
3  0.728383  0.935159 -0.841918  0.499898 -0.899544    1.0   0.0   0.0   0.0   
4  0.839089  0.364848  0.919336 -1.905464  0.739054    0.0   0.0   0.0   0.0   

   fbs_1  ...  slope_1  slope_2  ca_1  ca_2  ca_3  ca_4  thal_1  thal_2  \
0    0.0  ...      0.0      1.0   0.0   1.0   0.0   0.0     0.0     0.0   
1    1.0  ...      0.0      0.0   0.0   0.0   0.0   0.0     0.0     0.0   
2    0.0  ...      0.0      0.0   0.0   0.0   0.0   0.0     0.0     0.0   
3    0.0  ...      0.0      1.0   1.0   0.0   0.0   0.0     0.0     0.0   
4    1.0  ...      1.0      0.0   0.0   0.0   1.0   0.0     0.0     1.0   

   thal_3  target  
0     1.0   

## 3. Seperating Features and Target & Spliting Data

In [29]:
X= df.drop('target', axis=1)
y= df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print(f"Training set shape: {X_train.shape}")
print(f"Testing set shape: {X_test.shape}")

Training set shape: (241, 22)
Testing set shape: (61, 22)


## 4. Model Development & Evaluation

In [31]:
models ={
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "K-Nearest Neighbors": KNeighborsClassifier(),
    "Support Vector MAchine": SVC(),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Random Forest": RandomForestClassifier(random_state=42),
    "Grandient Boosting": GradientBoostingClassifier(random_state=42),
    "XGBoost": XGBClassifier(eval_metric='logloss', random_state=42)
}

for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred= model.predict(X_test)
    print(f"Model: {model_name}")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
    print(f"Precision: {precision_score(y_test, y_pred):.2f}")
    print(f"Recall: {recall_score(y_test, y_pred):.2f}")
    print(f"F1-Score: {f1_score(y_test, y_pred):.2f}")
    print("\n")

Model: Logistic Regression
Accuracy: 0.85
Precision: 0.88
Recall: 0.85
F1-Score: 0.86


Model: K-Nearest Neighbors
Accuracy: 0.80
Precision: 0.86
Recall: 0.76
F1-Score: 0.81


Model: Support Vector MAchine
Accuracy: 0.80
Precision: 0.86
Recall: 0.76
F1-Score: 0.81


Model: Naive Bayes
Accuracy: 0.85
Precision: 0.85
Recall: 0.88
F1-Score: 0.87


Model: Decision Tree
Accuracy: 0.66
Precision: 0.68
Recall: 0.70
F1-Score: 0.69


Model: Random Forest
Accuracy: 0.82
Precision: 0.84
Recall: 0.82
F1-Score: 0.83


Model: Grandient Boosting
Accuracy: 0.77
Precision: 0.81
Recall: 0.76
F1-Score: 0.78


Model: XGBoost
Accuracy: 0.77
Precision: 0.83
Recall: 0.73
F1-Score: 0.77




In [11]:
df.describe()

Unnamed: 0,age,trestbps,chol,thalach,oldpeak,sex_1,cp_1,cp_2,cp_3,fbs_1,...,slope_1,slope_2,ca_1,ca_2,ca_3,ca_4,thal_1,thal_2,thal_3,target
count,302.0,302.0,302.0,302.0,302.0,302.0,302.0,302.0,302.0,302.0,...,302.0,302.0,302.0,302.0,302.0,302.0,302.0,302.0,302.0,302.0
mean,-2.646889e-16,-6.617223e-16,0.0,-5.470238e-16,-1.117575e-16,0.682119,0.165563,0.284768,0.076159,0.149007,...,0.463576,0.466887,0.215232,0.125828,0.066225,0.013245,0.059603,0.546358,0.387417,0.543046
std,1.00166,1.00166,1.00166,1.00166,1.00166,0.466426,0.372305,0.452053,0.265693,0.356686,...,0.499499,0.49973,0.411665,0.332206,0.249088,0.114512,0.237142,0.498673,0.487969,0.49897
min,-2.814192,-2.144521,-2.33221,-3.436149,-0.8995441,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,-0.7107878,-0.6617119,-0.687083,-0.7137164,-0.8995441,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.1195033,-0.09140084,-0.116127,0.1281605,-0.2096081,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0
75%,0.7283833,0.4789102,0.546763,0.7185677,0.480328,1.0,0.0,1.0,0.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
max,2.499671,3.900776,6.145034,2.292987,4.44746,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
