In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, roc_auc_score


In [None]:
# Load dataset (adjust path if needed)
data = pd.read_csv("heart.csv")

# View first rows
data.head()


FileNotFoundError: [Errno 2] No such file or directory: 'heart.csv'

In [None]:
data.isnull().sum()


In [None]:
# Fill missing values with median (safe for medical data)
data.fillna(data.median(), inplace=True)


In [None]:
sns.countplot(x="target", data=data)
plt.title("Heart Disease Distribution")
plt.show()


In [None]:
plt.figure(figsize=(8,5))
sns.boxplot(x="target", y="age", data=data)
plt.title("Age vs Heart Disease")
plt.show()


In [None]:
plt.figure(figsize=(12,8))
sns.heatmap(data.corr(), annot=False, cmap="coolwarm")
plt.title("Feature Correlation Heatmap")
plt.show()


In [None]:
X = data.drop("target", axis=1)
y = data["target"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:,1]


In [None]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix")
plt.show()


In [None]:
fpr, tpr, _ = roc_curve(y_test, y_prob)
auc = roc_auc_score(y_test, y_prob)

plt.figure(figsize=(8,6))
plt.plot(fpr, tpr, label=f"AUC = {auc:.2f}")
plt.plot([0,1], [0,1], linestyle="--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.legend()
plt.show()


In [None]:
feature_importance = pd.DataFrame({
    "Feature": X.columns,
    "Importance": model.coef_[0]
}).sort_values(by="Importance", ascending=False)

feature_importance


In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x="Importance", y="Feature", data=feature_importance)
plt.title("Important Features Affecting Heart Disease")
plt.show()
