In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pickle

# Load dataset
df = pd.read_csv('heart.csv')

# --- Encoding Categorical Columns ---
# Make a copy to avoid warnings
df = df.copy()

# Manual encoding based on your app logic
# 'Sex': 'M'->1, 'F'->0
df['Sex'] = df['Sex'].map({'M': 1, 'F': 0})

# ChestPainType
chest_pain_map = {"Typical Angina": 3, "Atypical Angina": 1, "Non-Anginal Pain": 2, "Asymptomatic": 0}
df['ChestPainType'] = df['ChestPainType'].map(chest_pain_map)

# FastingBS is already 0/1 (assumed), otherwise map like {"Yes": 1, "No": 0}
# RestingECG
resting_ecg_map = {"Normal": 1, "ST": 2, "LVH": 0}
df['RestingECG'] = df['RestingECG'].map(resting_ecg_map)

# ExerciseAngina: 'Y'->1, 'N'->0
df['ExerciseAngina'] = df['ExerciseAngina'].map({'Y': 1, 'N': 0})

# ST_Slope
st_slope_map = {"Up": 2, "Flat": 1, "Down": 0}
df['ST_Slope'] = df['ST_Slope'].map(st_slope_map)

# --- Feature selection ---
features = ['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol',
            'FastingBS', 'RestingECG', 'MaxHR', 'ExerciseAngina',
            'Oldpeak', 'ST_Slope']

X = df[features]
y = df['HeartDisease']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Save model
with open('models/random_model.pkl', 'wb') as f:
    pickle.dump(rf_model, f)
