In [None]:
# Step 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Step 2: Load the dataset
df = pd.read_csv('/content/framingham.csv')

# Step 3: Drop rows with missing values
df = df.dropna()

# 🚫 Drop 'education' column as requested
df = df.drop(columns=['education'])

# Step 4: Feature and label separation
X = df.drop(['TenYearCHD'], axis=1)
y = df['TenYearCHD']

# Step 5: Handle categorical variables
X = pd.get_dummies(X, drop_first=True)  # e.g., 'male' becomes binary

# Step 6: Train-test split (stratified)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

# Step 7: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Step 8: Train the model (Random Forest)
model = RandomForestClassifier(n_estimators=200, random_state=42)
model.fit(X_train_scaled, y_train)

# Step 9: Evaluate the model
y_pred = model.predict(X_test_scaled)
acc = accuracy_score(y_test, y_pred)

print(f"🔍 Accuracy: {acc:.4f}")
print("\n🧾 Classification Report:\n", classification_report(y_test, y_pred))
print("\n📉 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


🔍 Accuracy: 0.8470

🧾 Classification Report:
               precision    recall  f1-score   support

           0       0.85      0.99      0.92       620
           1       0.50      0.07      0.12       112

    accuracy                           0.85       732
   macro avg       0.68      0.53      0.52       732
weighted avg       0.80      0.85      0.80       732


📉 Confusion Matrix:
 [[612   8]
 [104   8]]
