# Heart Disease Prediction using Random Forest

This notebook demonstrates the use of a Random Forest Classifier to predict heart disease risk using patient data. The workflow follows the CRISP-DM methodology.

## Step 1: Import Libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import joblib

## Step 2: Load and Explore Data

In [None]:
# Load dataset
# Replace 'heart_disease_data.csv' with the path to your dataset file
data = pd.read_csv('heart_disease_data.csv')

# Preview the data
print(data.head())
print(data.info())

## Step 3: Data Preparation
- Normalize numeric features
- Split the data into training and testing sets

In [None]:
# Check for missing values
print(data.isnull().sum())

# Normalize numeric features
scaler = StandardScaler()
numeric_features = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
data[numeric_features] = scaler.fit_transform(data[numeric_features])

# Features and target
X = data.drop('target', axis=1)
y = data['target']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Step 4: Train the Random Forest Model

In [None]:
# Initialize and train Random Forest Classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict on the test set
y_pred = rf_model.predict(X_test)

## Step 5: Evaluate the Model

In [None]:
# Calculate metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

# Display results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("\nConfusion Matrix:")
print(conf_matrix)

# Detailed Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))