In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
import joblib

# -----------------------------
# 1. Load dataset
# -----------------------------
df = pd.read_csv("../data/heart_selected_features.csv")

X = df.drop("target", axis=1)
y = df["target"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)



# -----------------------------
# 2. Define best model (from Step 6)
# -----------------------------
best_rf = RandomForestClassifier(
    n_estimators=100,
    max_depth=5,
    min_samples_split=10,
    min_samples_leaf=1,
    max_features='log2',
    random_state=42
)



# -----------------------------
# 3. Create pipeline (Scaler + RF)
# -----------------------------
pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("classifier", best_rf)
])

# Train pipeline
pipeline.fit(X_train, y_train)



# -----------------------------
# 4. Export model
# -----------------------------
joblib.dump(pipeline, "../models/final_model.pkl")
print("Final model exported as final_model.pkl")


Final model exported as final_model.pkl


In [11]:
# -----------------------------
# 5. Test: Load model and predict
# -----------------------------
loaded_model = joblib.load("../models/final_model.pkl")

# Test on some real X_test samples
sample = X_test.iloc[0:10]
true_labels = y_test.iloc[0:10].values
predictions = loaded_model.predict(sample)

print("\nTesting loaded model:")
for i, (pred, true) in enumerate(zip(predictions, true_labels)):
    print(f" Patient {i+1}: Predicted={pred}, Actual={true}")


Testing loaded model:
 Patient 1: Predicted=0, Actual=0
 Patient 2: Predicted=0, Actual=0
 Patient 3: Predicted=0, Actual=0
 Patient 4: Predicted=1, Actual=0
 Patient 5: Predicted=0, Actual=0
 Patient 6: Predicted=0, Actual=0
 Patient 7: Predicted=1, Actual=1
 Patient 8: Predicted=0, Actual=0
 Patient 9: Predicted=0, Actual=1
 Patient 10: Predicted=0, Actual=0


🔍 8/10 correct --> 80% accuracy 
✅ Model is working as expected.