In [3]:
import joblib
import pandas as pd

# Load the saved XGBoost model
model_path = "../models/final_xgb_model.pkl"
loaded_model = joblib.load(model_path)

# Load test data
test_merged = pd.read_csv("../data/test_merged.csv")

# Define feature columns
feature_cols = [
    "checkout_price", "base_price", "price_diff", "promotion", 
    "category", "cuisine", "city_code", "region_code", "center_type", "op_area"
]
X_test = test_merged[feature_cols]

# Predict on test set using the loaded model
y_test_pred = loaded_model.predict(X_test)

# Save predictions
test_predictions = test_merged[["id"]].copy()
test_predictions["num_orders"] = y_test_pred
test_predictions.to_csv("../results/final_test_predictions.csv", index=False)

print("\nTest predictions saved to ../results/final_test_predictions.csv")


Test predictions saved to ../results/final_test_predictions.csv


In [4]:
import pandas as pd

# Load the saved predictions
test_predictions = pd.read_csv("../results/final_test_predictions.csv")

# Display an overview of predictions
print("Test Predictions Overview:")
print(test_predictions.head())

# Check statistics of predicted values
print("\nPrediction Statistics:")
print(test_predictions["num_orders"].describe())

Test Predictions Overview:
        id  num_orders
0  1028232  175.962770
1  1127204  185.019210
2  1212707  175.987300
3  1082698   49.999634
4  1400926   48.969376

Prediction Statistics:
count    32573.000000
mean       254.666544
std        358.125108
min        -42.383087
25%         64.297670
50%        150.549500
75%        306.465060
max       6151.734400
Name: num_orders, dtype: float64


In [5]:
# Replace negative predictions with 0
test_predictions["num_orders"] = test_predictions["num_orders"].clip(lower=0)

# Save the updated predictions
test_predictions.to_csv("../results/final_test_predictions.csv", index=False)
print("Negative predictions clipped and saved.")

Negative predictions clipped and saved.


In [6]:
# Compute key statistics
test_predictions = pd.read_csv("../results/final_test_predictions.csv")  # Ensure predictions are loaded
min_pred = test_predictions["num_orders"].min()
max_pred = test_predictions["num_orders"].max()
mean_pred = test_predictions["num_orders"].mean()

print(f"Minimum Predicted Orders: {min_pred}")
print(f"Maximum Predicted Orders: {max_pred}")
print(f"Mean Predicted Orders: {mean_pred}")

Minimum Predicted Orders: 0.0
Maximum Predicted Orders: 6151.7344
Mean Predicted Orders: 254.69094103602066


In [7]:
import pandas as pd

# Define the columns and a custom input (replace values with your own)
custom_input = pd.DataFrame({
    "checkout_price": [150.5],
    "base_price": [160.0],
    "price_diff": [150.5 - 160.0],
    "promotion": [1],
    "category": [3],  # Example encoded category (e.g., 'Extras')
    "cuisine": [2],   # Example encoded cuisine (e.g., 'Italian')
    "city_code": [647],
    "region_code": [56],
    "center_type": [1],  # Example encoded center type (e.g., 'TYPE_B')
    "op_area": [2.0]
})

# Preview custom input
print("Custom Input:")
print(custom_input)

Custom Input:
   checkout_price  base_price  price_diff  promotion  category  cuisine  \
0           150.5       160.0        -9.5          1         3        2   

   city_code  region_code  center_type  op_area  
0        647           56            1      2.0  


In [8]:
# Predict for the custom input
custom_prediction = loaded_model.predict(custom_input)

print(f"Predicted Number of Orders: {custom_prediction[0]}")

Predicted Number of Orders: 224.65176391601562
