In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from xgboost import plot_importance

# Load data
PROCESSED_FILE = '../data/processed/sales_cleaned.csv'
df = pd.read_csv(PROCESSED_FILE, parse_dates=['Order Date'])



In [2]:
# 1. Define features and target as we agreed
# We REMOVE 'Price Each' (it's static)
# We ADD 'Product', 'City', and 'DayOfWeek' to get a rich forecast
features = ['Product', 'City', 'Month', 'Hour', 'DayOfWeek']
target = 'Quantity Ordered'

X = df[features]
y = df[target]

In [3]:
# 2. Handle Categorical Features (CRITICAL STEP)
# XGBoost requires all inputs to be numeric. We'll use One-Hot Encoding.
X_encoded = pd.get_dummies(X, columns=['Product', 'City'])

In [None]:
# 3. Split the data
# Note: We split the *encoded* data
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

# 4. Train the XGBoost Model
xgb_model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=5,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42,
    enable_categorical=False # We already one-hot encoded
)

print("Training XGBoost Demand Forecaster...")
xgb_model.fit(X_train, y_train)

Training XGBoost Demand Forecaster...


In [None]:
# 5. Evaluate the Model
y_pred = xgb_model.predict(X_test)
# Since quantity is discrete (1, 2, 3), let's round the predictions
y_pred_rounded = np.round(y_pred) 
rmse = np.sqrt(mean_squared_error(y_test, y_pred_rounded))
print(f"Model Training Complete.")
# RMSE will tell us, on average, how many units our forecast is "off" by.
# An RMSE close to 0 is great. 
print(f"Model RMSE: {rmse:.3f} units") 

In [None]:
# 6. Show REAL Feature Importance
# This now tells us WHAT actually drives demand (at the current prices)
plt.figure(figsize=(10, 8))
plot_importance(xgb_model, max_num_features=20) # Show top 20 features
plt.title("Feature Importance for Demand Forecasting")
plt.show()

In [None]:
# 7. Save the trained model
import os

MODEL_DIR = '../models/'
os.makedirs(MODEL_DIR, exist_ok=True)
MODEL_PATH = os.path.join(MODEL_DIR, 'xgb_demand_forecaster.json')

# Use the .save_model() method for XGBoost
xgb_model.save_model(MODEL_PATH)

print(f"Model saved successfully to {MODEL_PATH}")