# ❤️ Heart Disease Prediction

Predicting heart disease using clinical health records.

**Prepared by: K. Chanikya**

In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler

In [ ]:
# 📥 Load Dataset
df = pd.read_csv("heart.csv")
df.head()

In [ ]:
# 🔍 Data Cleaning
# No '?' values in this dataset, but we ensure numeric types and check missing
print(df.isnull().sum())

In [ ]:
# ⚖️ Feature Scaling
scaler = StandardScaler()
df[['age', 'trestbps', 'chol', 'thalach', 'oldpeak']] = scaler.fit_transform(
    df[['age', 'trestbps', 'chol', 'thalach', 'oldpeak']]
)

In [ ]:
# 📊 EDA - Plot 1: Target Distribution
sns.countplot(x='target', data=df)
plt.title("Heart Disease Distribution")
plt.show()

In [ ]:
# 📊 EDA - Plot 2: Cholesterol vs Heart Disease
sns.boxplot(x='target', y='chol', data=df)
plt.title("Cholesterol vs Target")
plt.show()

In [ ]:
# 📊 EDA - Plot 3: Age vs Thalach
sns.scatterplot(x='age', y='thalach', hue='target', data=df)
plt.title("Age vs Max Heart Rate")
plt.show()

In [ ]:
# 🎯 Feature Engineering
X = df.drop('target', axis=1)
y = df['target']

In [ ]:
# 📦 Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [ ]:
# 🤖 Train Random Forest Classifier
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [ ]:
# 📈 Model Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True)
plt.title("Confusion Matrix")
plt.show()

In [ ]:
# 🔍 Feature Importance
importances = pd.Series(model.feature_importances_, index=X.columns)
importances.sort_values().tail(10).plot(kind='barh')
plt.title("Top Predictive Features")
plt.show()

## 📌 Business Insights

- High cholesterol and blood pressure are key heart disease indicators.
- Patients with low max heart rate (thalach) are more at risk.
- Preventive programs should focus on patients with high `oldpeak` and chest pain (`cp`).
- Early screening should be prioritized for age > 50 and sedentary lifestyle indicators.