In [7]:
import pandas as pd
import joblib
import numpy as np

# 1) Load artifacts
feature_cols = joblib.load("../models/feature_cols.pkl")   # saved in 02_preprocessing
scaler       = joblib.load("../models/scaler.pkl")         # saved in 02_preprocessing
model        = joblib.load("../models/pipeline_model.pkl") # saved in 03_modeling

# 2) Load and OHE test
test_df = pd.read_csv("../data/test_df.csv")
if "readmitted" in test_df.columns:
    test_df = test_df.drop(columns=["readmitted"])

test_df = pd.get_dummies(test_df, drop_first=True)

# 3) Force exact same columns (names + order) as training
test_df = test_df.reindex(columns=feature_cols, fill_value=0)

# 4) Scale using EXACT same columns in the SAME order
#    Use .values to avoid sklearn's feature-name check errors
X_test = scaler.transform(test_df.values)

# 5) Predict
preds = model.predict(X_test)
probs = model.predict_proba(X_test)[:, 1]

# 6) Save
submission = pd.DataFrame({
    "Id": np.arange(len(test_df)),
    "readmitted": preds,
    "probability": probs
})
submission.to_csv("../data/submission.csv", index=False)
print("✅ Inference complete → ../data/submission.csv")


✅ Inference complete → ../data/submission.csv


