In [1]:
import pandas as pd
import numpy as np
import pickle
import json
import os

MODEL_DIR = "../models"

MODEL_PATH = os.path.join(MODEL_DIR, "final_model.pkl")
SCALER_PATH = os.path.join(MODEL_DIR, "scaler.pkl")
FEATURE_COLS_PATH = os.path.join(MODEL_DIR, "feature_columns.json")


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
with open(MODEL_PATH, "rb") as f:
    model = pickle.load(f)

with open(SCALER_PATH, "rb") as f:
    scaler = pickle.load(f)

with open(FEATURE_COLS_PATH, "r") as f:
    feature_columns = json.load(f)

print("Model, Scaler, and Feature Columns Loaded Successfully!")


Model, Scaler, and Feature Columns Loaded Successfully!


In [3]:
def predict_single(input_dict):
    """
    input_dict: Python dictionary with feature_name → value
    Returns: probability, prediction_class
    """
    df = pd.DataFrame([input_dict])

    # Arrange columns exactly like training
    df = df.reindex(columns=feature_columns, fill_value=0)

    # Scaling
    X = scaler.transform(df)

    # Prediction
    prob = model.predict_proba(X)[0][1]
    pred = int(prob > 0.5)

    return prob, pred


In [4]:
sample = {
    "Product_Price": 499.0,
    "Order_Quantity": 2,
    "Days_to_Return": 5,
    "User_Age": 30,
    "Discount_Applied": 0.15,
    "Order_Year": 2023,
    "Order_Month": 7,
    "Order_DayOfWeek": 4,
    "Order_Day": 21,
    "High_Discount": 0,
    "High_Price": 0,
    "Bulk_Order": 0,

    # categorical encoded columns manually set to 0 unless testing
    # Your OHE columns will automatically align using reindex
}

prob, pred = predict_single(sample)
print("Return Probability:", prob)
print("Predicted Class:", pred)


Return Probability: 0.9997032538924585
Predicted Class: 1


In [5]:
def predict_from_csv(csv_file):
    df = pd.read_csv(csv_file)

    df = df.reindex(columns=feature_columns, fill_value=0)
    X = scaler.transform(df)

    probs = model.predict_proba(X)[:, 1]
    preds = (probs > 0.5).astype(int)

    df["Return_Probability"] = probs
    df["Predicted_Return"] = preds

    return df


In [6]:
test_df = predict_from_csv("../data/processed/processed_returns.csv")
test_df.head()


Unnamed: 0,Product_Price,Order_Quantity,Days_to_Return,User_Age,Discount_Applied,Order_Year,Order_Month,Order_Day,Order_DayOfWeek,High_Discount,...,User_Location_City97,User_Location_City98,User_Location_City99,Payment_Method_Debit Card,Payment_Method_Gift Card,Payment_Method_PayPal,Shipping_Method_Next-Day,Shipping_Method_Standard,Return_Probability,Predicted_Return
0,411.59,3,387.0,58,45.27,2023,8,5,5,1,...,False,False,False,True,False,False,True,False,0.999703,1
1,288.88,3,31.0,68,47.79,2023,10,9,0,1,...,False,False,False,False,False,False,False,False,0.999703,1
2,390.03,5,0.0,22,26.64,2023,5,6,5,1,...,False,False,False,True,False,False,True,False,0.000528,0
3,401.09,3,0.0,40,15.37,2024,8,29,3,1,...,False,False,False,False,False,True,True,False,0.000528,0
4,110.09,4,0.0,34,16.37,2023,1,16,0,1,...,False,False,False,False,True,False,False,True,0.000528,0


In [7]:
output_path = "../results/sample_predictions.csv"
test_df.to_csv(output_path, index=False)
print("Saved sample predictions to:", output_path)


Saved sample predictions to: ../results/sample_predictions.csv
