In [33]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

In [34]:
file_path = r"C:\Users\LENOVO\Desktop\heart-disease-detection\data\heart.csv"
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [35]:
from sklearn.model_selection import train_test_split

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

### Scale-Insensitive

In [36]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, y_train)

In [37]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train, y_train)

In [38]:
from sklearn.ensemble import GradientBoostingClassifier
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)

### Scale-Sensitive

In [39]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [40]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn.fit(X_train_scaled, y_train)

In [41]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_train_scaled, y_train)

In [42]:
from sklearn.svm import SVC

svc = SVC()
svc.fit(X_train_scaled, y_train)

## Model Evaluation

In [43]:
models = [rf, nb, gb, knn, lr, svc]
model_names = ['Random Forest', 'Naive Bayes', 'Gradient Boosting', 'KNN', 'Logistic Regression', 'SVC']

In [44]:
# Accuracy of the models
for model, name in zip(models, model_names):
    print(f'{name} Accuracy: {round(model.score(X_test, y_test), 3)}')

Random Forest Accuracy: 0.985
Naive Bayes Accuracy: 0.815
Gradient Boosting Accuracy: 0.961
KNN Accuracy: 0.483
Logistic Regression Accuracy: 0.493
SVC Accuracy: 0.493


In [45]:
from sklearn.metrics import recall_score

y_preds = [ rf.predict(X_test), 
            nb.predict(X_test), 
            gb.predict(X_test), 
            knn.predict(X_test_scaled), 
            lr.predict(X_test_scaled), 
            svc.predict(X_test_scaled)
          ]

# Recall of the models
for y_pred, name in zip(y_preds, model_names):
    print(f'{name} Recall: {round(recall_score(y_test, y_pred), 3)}')

Random Forest Recall: 0.971
Naive Bayes Recall: 0.861
Gradient Boosting Recall: 0.966
KNN Recall: 0.87
Logistic Regression Recall: 0.837
SVC Recall: 0.942
