In [2]:
# 1. Imports
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib
import os

# 2. Load dataset
df = pd.read_csv("../data/ckd_simulated_input.csv")

# 3. Define streamlined features and target
features = ['RIDAGEYR', 'RIAGENDR', 'LBXGH', 'SMQ020', 'PAQ605']
target = 'CKD'

X = df[features]
y = df[target]

# 4. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

# 5. Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6. Train Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)

# 7. Evaluate
y_pred = rf.predict(X_test_scaled)
print(classification_report(y_test, y_pred))

# 8. Save model and scaler
os.makedirs("../models", exist_ok=True)
joblib.dump(rf, "../models/rf_model_streamlined.pkl")
joblib.dump(scaler, "../models/scaler_streamlined.joblib")

print("✅ Model and scaler saved to ../models/")

import joblib

# Save the streamlined model
joblib.dump(rf, "../models/rf_model_streamlined.pkl")

# Save the corresponding scaler
joblib.dump(scaler, "../models/scaler_streamlined.joblib")

print("✅ Model and Scaler saved!")

              precision    recall  f1-score   support

           0       0.68      0.74      0.71       214
           1       0.47      0.39      0.42       124

    accuracy                           0.61       338
   macro avg       0.57      0.57      0.57       338
weighted avg       0.60      0.61      0.60       338

✅ Model and scaler saved to ../models/
✅ Model and Scaler saved!
