In [1]:
# Import libraries
import pandas as pd
import numpy as np
import warnings
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

# Suppress warnings
warnings.filterwarnings('ignore')

# Load the dataset
file_path = r'C:\Users\Srivasthav Sandesh\Downloads\heart+disease\processed.cleveland.data'
df = pd.read_csv(file_path, header=None)

# Assign column names based on UCI documentation
df.columns = [
    'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
    'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target'
]

# Replace missing values marked as '?' with NaN and convert columns to numeric
df.replace('?', np.nan, inplace=True)
df = df.apply(pd.to_numeric, errors='coerce')

# Drop rows with missing values
df.dropna(inplace=True)

# Separate features and target
X = df.drop(columns=['target'])
y = df['target']

# Convert target to binary: 0 = no disease, 1 = disease
y = y.apply(lambda x: 1 if x > 0 else 0)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define models
models = {
    'Logistic Regression': LogisticRegression(),
    'SVM': SVC(probability=True),
    'Random Forest': RandomForestClassifier(),
    'XGBoost': XGBClassifier(eval_metric='logloss')  # Removed use_label_encoder
}

# Train and evaluate each model
for name, model in models.items():
    print(f"\n{name}")
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {round(acc * 100, 2)}%")
    print(classification_report(y_test, y_pred))


Logistic Regression
Accuracy: 88.89%
              precision    recall  f1-score   support

           0       0.88      0.92      0.90        49
           1       0.90      0.85      0.88        41

    accuracy                           0.89        90
   macro avg       0.89      0.89      0.89        90
weighted avg       0.89      0.89      0.89        90


SVM
Accuracy: 88.89%
              precision    recall  f1-score   support

           0       0.85      0.96      0.90        49
           1       0.94      0.80      0.87        41

    accuracy                           0.89        90
   macro avg       0.90      0.88      0.89        90
weighted avg       0.89      0.89      0.89        90


Random Forest
Accuracy: 84.44%
              precision    recall  f1-score   support

           0       0.83      0.90      0.86        49
           1       0.86      0.78      0.82        41

    accuracy                           0.84        90
   macro avg       0.85      0.84   