In [2]:
# 1. Imports
import pandas as pd
import numpy as np
import joblib

from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

# Optional: SMOTE if needed
from imblearn.over_sampling import SMOTE

In [6]:
# 2. Load Data
data = pd.read_csv('../data/ckd_simulated_input.csv')

In [8]:
print(data.columns.tolist())

['SEQN', 'RIAGENDR', 'RIDAGEYR', 'RIDRETH1', 'LBXGH', 'URXUMA', 'URXUCR', 'MCQ010', 'SMQ020', 'SMD030', 'PAQ605', 'PAQ620', 'LBXGLU', 'CKD']


In [9]:
selected_features = [
    'RIDAGEYR',       # Age
    'RIAGENDR',       # Gender
    'LBXGH',          # PRS
    'SMQ020',         # Smoking
    'PAQ605',         # Physical Activity
]

X = data[selected_features]
y = data['CKD']

In [10]:
# 4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [11]:
# 5. Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [12]:
# 6. Optional: Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_train_sm, y_train_sm = smote.fit_resample(X_train_scaled, y_train)

In [13]:
# 7. Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_sm, y_train_sm)

In [15]:
# 8. Evaluate
y_pred = rf.predict(X_test_scaled)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Confusion Matrix:
 [[156  58]
 [ 70  54]]

Classification Report:
               precision    recall  f1-score   support

           0       0.69      0.73      0.71       214
           1       0.48      0.44      0.46       124

    accuracy                           0.62       338
   macro avg       0.59      0.58      0.58       338
weighted avg       0.61      0.62      0.62       338



In [16]:
# 9. Save Scaler and Model
joblib.dump(scaler, '../models/scaler_minimal.joblib')
joblib.dump(rf, '../models/rf_minimal_model.pkl')

print("\n✅ Minimal scaler and model saved successfully.")


✅ Minimal scaler and model saved successfully.
