In [66]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load Heart Disease dataset (Cleveland subset) from a local file
url = "processed.cleveland.data.csv"  # Update with the correct file path
headers = ["age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", "thalach", "exang", "oldpeak", "slope", "ca", "thal", "num"]  # Use correct headers
df = pd.read_csv(url, header=None, names=headers)

# Replace missing values denoted by '?' with NaN and drop them (you could also impute them)
df.replace('?', np.nan, inplace=True)
df.dropna(inplace=True)

# Convert target 'num' to binary classification (presence of heart disease)
df['num'] = df['num'].apply(lambda x: 1 if x > 0 else 0)  # 1 = presence, 0 = absence of heart disease

# Separate features (X) and target (y)
X = df.drop('num', axis=1)
y = df['num']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# K-fold cross-validation setup (let's use K=5)
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

### Apply the models and evaluate performance ###

# Logistic Regression
log_reg = LogisticRegression(max_iter=1000)
log_reg_scores = cross_val_score(log_reg, X_scaled, y, cv=kf, scoring='accuracy')
print(f"Logistic Regression Accuracy: {np.mean(log_reg_scores)}")

# Support Vector Machine Classifier
svc = SVC()
svc_scores = cross_val_score(svc, X_scaled, y, cv=kf, scoring='accuracy')
print(f"SVM Accuracy: {np.mean(svc_scores)}")

# Decision Tree Classifier
tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf_scores = cross_val_score(tree_clf, X_scaled, y, cv=kf, scoring='accuracy')
print(f"Decision Tree Classifier Accuracy: {np.mean(tree_clf_scores)}")

# Multilayer Perceptron Classifier
mlp_clf = MLPClassifier(random_state=42, max_iter=1000)
mlp_clf_scores = cross_val_score(mlp_clf, X_scaled, y, cv=kf, scoring='accuracy')
print(f"MLP Classifier Accuracy: {np.mean(mlp_clf_scores)}")


Logistic Regression Accuracy: 0.8249152542372882
SVM Accuracy: 0.8247457627118644
Decision Tree Classifier Accuracy: 0.7271186440677966
MLP Classifier Accuracy: 0.7978531073446329
