In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Load dataset
df = pd.read_csv("heart.csv")

# Basic cleaning: drop rows with missing or negative values
df = df.dropna()
df = df[(df.select_dtypes(include='number') >= 0).all(axis=1)]

# Error correcting: Remove outliers using Z-score method
z_scores = np.abs((df - df.mean()) / df.std())
df = df[(z_scores < 3).all(axis=1)]

# Prepare features and target
X = df.drop("target", axis=1)
y = df["target"]

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Logistic Regression
lr = LogisticRegression()
lr.fit(X_train, y_train)
lr_acc = accuracy_score(y_test, lr.predict(X_test))

# k-Nearest Neighbors
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_acc = accuracy_score(y_test, knn.predict(X_test))

# Output results
print(f"Logistic Regression Accuracy: {lr_acc * 100:.2f}%")
print(f"kNN Accuracy: {knn_acc * 100:.2f}%")


Logistic Regression Accuracy: 88.14%
kNN Accuracy: 90.72%
