In [1]:
import shap
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from shap import sample as shap_sample

def generate_shap_explanation(exog_train, exog_test, model_results, exog_vars, product, geography, sample_size=100, save_dir="shap_outputs"):
    """
    Generate and visualize SHAP values for a SARIMAX model.

    Parameters:
    - exog_train: DataFrame of training exogenous variables
    - exog_test: DataFrame of testing exogenous variables
    - model_results: Trained SARIMAX model result object
    - exog_vars: List of exogenous variable names
    - product: Product identifier (for title/file name)
    - geography: Geography identifier (for title/file name)
    - sample_size: Sample size for SHAP explanation (default: 100)
    - save_dir: Directory to save the SHAP plot (default: 'shap_outputs')

    Returns:
    - DataFrame of SHAP feature importances
    """

    # Ensure sample size is feasible
    sample_size = min(sample_size, len(exog_test))
    if sample_size < 30:
        print(f"Skipping SHAP for ({product}, {geography}): Not enough test data.")
        return None

    # Sample background data
    background = shap_sample(exog_train, sample_size)

    # Define model prediction wrapper
    def model_predict(X):
        X_df = pd.DataFrame(X, columns=exog_vars)
        preds = model_results.get_prediction(start=0, end=len(X_df)-1, exog=X_df)
        return preds.predicted_mean.values

    # Initialize SHAP explainer
    explainer = shap.KernelExplainer(model_predict, background)

    # Compute SHAP values
    shap_values = explainer.shap_values(exog_test[:sample_size], nsamples=sample_size)

    # Calculate mean absolute SHAP values
    importances = np.abs(shap_values).mean(axis=0)
    importance_df = pd.DataFrame({
        'Feature': exog_vars,
        'Mean_Abs_SHAP': importances
    }).sort_values(by='Mean_Abs_SHAP', ascending=True)

    # Create output directory if it doesn't exist
    os.makedirs(save_dir, exist_ok=True)
    save_path = os.path.join(save_dir, f"shap_bar_summary_{product}_{geography}.png")

    # Plot
    plt.figure(figsize=(6, 3))
    plt.barh(importance_df['Feature'], importance_df['Mean_Abs_SHAP'])
    plt.xlabel("Mean |SHAP value|")
    plt.title(f"SHAP Bar Summary: {product} - {geography}")
    plt.tight_layout()
    plt.savefig(save_path)
    plt.show()

    return importance_df

To Call this function the code should have: generate_shap_explanation(
    exog_train, exog_test, model_results, exog_vars,
    product='Prod123', geography='East', sample_size=100
)
To call the generate_shap_explanation() function successfully, you need the following prerequisites 

1.Trained SARIMAX model:
You  should have a trained SARIMAX model with exogenous variables.
from statsmodels.tsa.statespace.sarimax import SARIMAX

model = SARIMAX(endog_train, exog=exog_train, order=(1,1,1))
model_results = model.fit(disp=Fal

2.Exogenous Variables for Train & Test
You should have the exogenous variables for both training and testing data as DataFrames:
exog_vars = ['Log_MRP', 'Log_Competitor_Price']  # example list
exog_train = train_data[exog_vars]
exog_test = test_data[exog_vars]
les.
