In [2]:
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier

# Expected columns in order (same as used in model/scaler)
expected_cols = [
    'Age', 'Cholesterol', 'Blood Pressure', 'Heart Rate', 'Exercise Hours', 'Stress Level', 'Blood Sugar',
    'Gender', 'Alcohol Intake', 'Family History', 'Diabetes', 'Obesity', 'Exercise Induced Angina',
    'Smoking_Former', 'Smoking_Never',
    'Chest Pain Type_Atypical Angina', 'Chest Pain Type_Non-anginal Pain', 'Chest Pain Type_Typical Angina'
]

def preprocess(df):
    df['Alcohol Intake'] = df['Alcohol Intake'].fillna('None')
    df['Gender'] = df['Gender'].map({'Male':0, 'Female':1})
    df['Family History'] = df['Family History'].map({'No':0, 'Yes':1})
    df['Diabetes'] = df['Diabetes'].map({'No':0, 'Yes':1})
    df['Obesity'] = df['Obesity'].map({'No':0, 'Yes':1})
    df['Exercise Induced Angina'] = df['Exercise Induced Angina'].map({'No':0, 'Yes':1})
    df['Alcohol Intake'] = df['Alcohol Intake'].map({'None':0, 'Moderate':1, 'Heavy':2})

    df = pd.get_dummies(df, columns=['Smoking', 'Chest Pain Type'], drop_first=True)

    # Add missing dummy columns
    for col in expected_cols:
        if col not in df.columns:
            df[col] = 0

    # Reorder columns
    df = df[expected_cols]
    return df

# Load dataset
df = pd.read_csv('../dataset/heart_disease_dataset.csv')

# Preprocess features
X = preprocess(df)
y = df['Heart Disease']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Scale features
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = RandomForestClassifier(n_estimators=200, max_depth=8, random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate
print("Train accuracy:", model.score(X_train_scaled, y_train))
print("Test accuracy:", model.score(X_test_scaled, y_test))

# Save model and scaler
pickle.dump(model, open('../models/heart_model.pkl', 'wb'))
pickle.dump(scaler, open('../models/heart_scaler.pkl', 'wb'))


Train accuracy: 1.0
Test accuracy: 0.99
