In [None]:

# --- 7. IN-DEPTH ANALYSIS AND VISUALIZATION ---
print("\n--- Step 7: In-Depth Analysis and Visualization ---")
# This new section provides deeper insights into model performance and predictions.

# 7.1. Model Performance Summary Table
print("\n--- 7.1. Model Performance Summary ---")
performance_pivot = results_df.pivot(index='Site', columns='Horizon', values='Avg_Asymmetric_Loss')
print("Average Asymmetric Loss by Site and Horizon:")
display(performance_pivot.style.background_gradient(cmap='Reds', axis=None).format("{:.2f}"))

# 7.2. Feature Importance Visualization
print("\n--- 7.2. Feature Importance Visualization ---")
# Visualize feature importances for a sample forecasting model (first site, 1-day horizon)
sample_site = sites[0]
sample_horizon = 'target_1_day'
model_to_inspect = models[sample_site][sample_horizon]['model']
features_to_inspect = models[sample_site][sample_horizon]['features']

feature_importances_forecast = pd.DataFrame({
    'feature': features_to_inspect,
    'importance': model_to_inspect.feature_importances_
}).sort_values('importance', ascending=False)

plt.figure(figsize=(12, 8))
sns.barplot(x='importance', y='feature', data=feature_importances_forecast.head(15), palette='mako')
plt.title(f'Top 15 Feature Importances for Forecasting\n(Site: {sample_site}, Horizon: {sample_horizon})')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.tight_layout()
plt.show()

# 7.3. Prediction vs. Actuals Plots for All Sites
print("\n--- 7.3. Prediction vs. Actuals Plots ---")
print("Visualizing performance on the last cross-validation fold for the 1-day horizon.")
for site in sites:
    test_data = all_test_sets[site]['target_1_day']
    y_test = test_data['y_test']
    y_pred = test_data['y_pred']

    plt.figure(figsize=(15, 7))
    plt.plot(y_test.index, y_test, label='Actual Values', color='dodgerblue', alpha=0.8, marker='o', linestyle='-')
    plt.plot(y_test.index, y_pred, label='Predicted Values', color='red', linestyle='--')
    plt.title(f'Forecast vs. Actuals for {site} (1-Day Horizon)')
    plt.xlabel('Date')
    plt.ylabel('Net Meal Quantity')
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.show()

# 7.4. Error Analysis
print("\n--- 7.4. Error Analysis ---")
# Analyze errors for a sample model (first site, 1-day horizon)
sample_test_data = all_test_sets[sample_site][sample_horizon]
errors = sample_test_data['y_test'] - sample_test_data['y_pred']

# Plot 1: Error Distribution
plt.figure(figsize=(10, 6))
sns.histplot(errors, kde=True, bins=30)
plt.axvline(x=0, color='red', linestyle='--', label='Zero Error')
plt.title(f'Distribution of Prediction Errors\n(Site: {sample_site}, Horizon: {sample_horizon})')
plt.xlabel('Error (Actual - Predicted)')
plt.ylabel('Frequency')
plt.legend()
plt.show()

# Plot 2: Errors Over Time
plt.figure(figsize=(15, 6))
errors.plot(label='Prediction Error', color='purple', alpha=0.8)
plt.axhline(y=0, color='red', linestyle='--')
plt.title(f'Prediction Errors Over Time\n(Site: {sample_site}, Horizon: {sample_horizon})')
plt.xlabel('Date')
plt.ylabel('Error')
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()

print("\n--- End of Analysis ---")


In [None]:
# --- 3. & 4. TIME SERIES CROSS-VALIDATION & MODEL TRAINING ---
print("\n--- Steps 3 & 4: Time Series Cross-Validation & Final Model Training ---")

sites = pivot_demand.columns
models = {}
evaluation_results = []
reg_features = []  # Initialize empty list to store feature names
all_test_sets = {}
# Setup for TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)

for site in sites:
    print(f"\n--- Processing Site: {site} ---")
    models[site] = {}
    all_test_sets[site] = {}

    # Create site-specific features to prevent data leakage from other sites' futures
    site_features = time_features.copy()
    for lag in [1, 2, 3, 7, 14]:
        site_features[f'site_lag_{lag}'] = pivot_demand[site].shift(lag)
    site_features['site_rolling_mean_7'] = pivot_demand[site].shift(1).rolling(window=7).mean()
    site_features['site_rolling_mean_14'] = pivot_demand[site].shift(1).rolling(window=14).mean()
    site_features = site_features.ffill().bfill().fillna(0)  # Fill NaNs from shifts/rolls

    # Store the list of all feature names *once* to ensure consistency
    if not reg_features:
        reg_features = site_features.columns.tolist()

    for horizon_name, target_df in target_dfs.items():
        print(f"  Processing model for horizon: {horizon_name}")

        y_series = target_df[site]
        # Align features and target, dropping rows where target is NaN
        temp_df = pd.concat([site_features, y_series.rename('target')], axis=1)
        temp_df.dropna(subset=['target'], inplace=True)
        X = temp_df[reg_features]  # Ensure column order is consistent
        y = temp_df['target']

        # Perform Cross-Validation
        mae_scores = []
        print(f"    Running {tscv.n_splits}-fold Time Series Cross-Validation...")
        for fold, (train_index, test_index) in enumerate(tscv.split(X)):
            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
            y_train, y_test = y.iloc[train_index], y.iloc[test_index]

            rf_regressor_fold = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1, min_samples_leaf=5)
            rf_regressor_fold.fit(X_train, y_train)

            y_pred_fold = rf_regressor_fold.predict(X_test)
            mae = mean_absolute_error(y_test, y_pred_fold)
            mae_scores.append(mae)

        average_mae = np.mean(mae_scores)
        std_mae = np.std(mae_scores)
        print(f"  > CV Average MAE for {horizon_name}: {average_mae:.2f} (+/- {std_mae:.2f})")
        evaluation_results.append(
            {'Site': site, 'Horizon': horizon_name, 'Average_MAE': average_mae, 'Std_MAE': std_mae})

        # Train the final model on ALL available data for this horizon
        print(f"  Training final model on {len(X)} data points...")
        final_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1, min_samples_leaf=5)
        final_model.fit(X, y)
        models[site][horizon_name] = {'model': final_model}

print("\n--- End of Steps 3 & 4 ---")
