# Basic Kernel
https://shap.readthedocs.io/en/latest/example_notebooks/tabular_examples/model_agnostic/Simple%20Kernel%20SHAP.html

In [74]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import itertools
import scipy.special
import time
import shap
import pandas as pd

# Step 1: Generate a simple regression dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=0.5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 3: Train a RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

# Step 4: Select the first test instance for explanation
test_instance = X_test[0]

# Define helper functions for Kernel SHAP
def powerset(iterable):
    s = list(iterable)
    return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s) + 1))

def shapley_kernel(M, s):
    if s == 0 or s == M:
        return 10000  # Large constant for numerical stability
    return (M - 1) / (scipy.special.binom(M, s) * s * (M - s))

def kernel_shap(f, x, reference, M):
    X = np.zeros((2**M, M + 1))
    X[:, -1] = 1
    weights = np.zeros(2**M)
    V = np.zeros((2**M, M))
    for i in range(2**M):
        V[i, :] = reference

    for i, s in enumerate(powerset(range(M))):
        s = list(s)
        V[i, s] = x[s]
        X[i, s] = 1
        weights[i] = shapley_kernel(M, len(s))
    y = f(V)
    wsq = np.sqrt(weights)
    result = np.linalg.lstsq(wsq[:, None] * X, wsq * y, rcond=None)[0]
    return result

# Define the prediction function
def prediction_function(X):
    return regressor.predict(X)

# Define the reference input (mean of the training data)
reference = np.mean(X_train, axis=0)

# Number of features
M = X_train.shape[1]

# Step 6: Compute SHAP values and time the computation

# TreeExplainer SHAP
start_time = time.time()
tree_explainer = shap.TreeExplainer(regressor)
tree_shap_values = tree_explainer.shap_values(test_instance.reshape(1, -1))[0]
tree_baseline = float(tree_explainer.expected_value)  # Ensure scalar
tree_time = time.time() - start_time

# KernelExplainer SHAP
start_time = time.time()
kernel_explainer = shap.KernelExplainer(prediction_function, X_train)
kernel_shap_values = kernel_explainer.shap_values(test_instance.reshape(1, -1))[0]
kernel_baseline = float(kernel_explainer.expected_value)  # Ensure scalar
kernel_time = time.time() - start_time

# Custom Kernel SHAP
start_time = time.time()
custom_kernel_phi = kernel_shap(prediction_function, test_instance, reference, M)
custom_baseline = float(custom_kernel_phi[-1])  # Baseline (intercept), ensure scalar
custom_kernel_shap_values = custom_kernel_phi[:-1]  # SHAP values for features
custom_kernel_time = time.time() - start_time  # Time taken for Custom Kernel SHAP

# Step 7: Display Results
results = pd.DataFrame({
    "Feature": [f"Feature {i+1}" for i in range(X.shape[1])],
    "TreeExplainer SHAP": np.round(tree_shap_values, 4),  # TreeExplainer output
    "KernelExplainer SHAP": np.round(kernel_shap_values, 4),  # KernelExplainer output
    "CustomKernel SHAP": np.round(custom_kernel_shap_values, 4),  # Custom Kernel SHAP
})

# Add baselines to the results
results.loc["Baseline"] = {
    "Feature": "Baseline",
    "TreeExplainer SHAP": np.round(tree_baseline, 4),
    "KernelExplainer SHAP": np.round(kernel_baseline, 4),
    "CustomKernel SHAP": np.round(custom_baseline, 4),
}

# Calculate the sum of SHAP values + baseline for each method
tree_shap_sum = float(np.sum(tree_shap_values) + tree_baseline)  # Ensure scalar
kernel_shap_sum = float(np.sum(kernel_shap_values) + kernel_baseline)  # Ensure scalar
custom_shap_sum = float(np.sum(custom_kernel_shap_values) + custom_baseline)  # Ensure scalar

results.loc["Sum"] = {
    "Feature": "Sum",
    "TreeExplainer SHAP": np.round(tree_shap_sum, 4),
    "KernelExplainer SHAP": np.round(kernel_shap_sum, 4),
    "CustomKernel SHAP": np.round(custom_shap_sum, 4),
}

# Add computation times as the last row
results.loc["Computation Time (s)"] = {
    "Feature": "Computation Time (s)",
    "TreeExplainer SHAP": np.round(tree_time, 4),
    "KernelExplainer SHAP": np.round(kernel_time, 4),
    "CustomKernel SHAP": np.round(custom_kernel_time, 4),
}

results

  tree_baseline = float(tree_explainer.expected_value)  # Ensure scalar
Using 800 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Feature,TreeExplainer SHAP,KernelExplainer SHAP,CustomKernel SHAP
0,Feature 1,13.4197,13.4311,12.4019
1,Feature 2,1.7568,1.356,3.4165
2,Feature 3,1.96,0.9772,-0.5405
3,Feature 4,75.3594,74.8056,72.5709
4,Feature 5,1.0264,0.5648,-0.5133
5,Feature 6,-3.2434,-3.414,-3.533
6,Feature 7,-30.718,-31.2188,-33.8595
7,Feature 8,1.8544,1.8552,1.8361
8,Feature 9,1.4149,0.8683,0.1188
9,Feature 10,-71.4951,-68.8819,-69.919


# Updated Kernel

In [92]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import itertools
import scipy.special
import time
import shap
import pandas as pd

# Step 1: Generate a simple regression dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=0.5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 3: Train a RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

# Step 4: Select the first test instance for explanation
test_instance = X_test[0]

# Monte Carlo-based sampling for subsets
def sample_subsets(M, num_samples):
    """
    Generate random subsets of features using Monte Carlo sampling.
    This replaces the exhaustive powerset generation for efficiency.
    """
    np.random.seed(0)
    subsets = []
    for _ in range(num_samples):
        subset = np.random.choice([0, 1], size=M, p=[0.5, 0.5])
        subsets.append(np.where(subset == 1)[0])  # Indices of features in the subset
    return subsets

# Shapley kernel weight calculation
def shapley_kernel(M, s):
    """
    Compute the Shapley kernel weight for a given subset size `s`.
    """
    if s == 0 or s == M:
        return 10000  # Large constant for numerical stability
    return (M - 1) / (scipy.special.binom(M, s) * s * (M - s))

# Kernel SHAP implementation with optional Monte Carlo sampling
def kernel_shap(f, x, reference, M, num_samples=None):
    """
    Compute SHAP values using the Kernel SHAP method.
    - If `num_samples` is None, compute exact SHAP values (2^M subsets).
    - Otherwise, approximate SHAP values using Monte Carlo sampling.

    Args:
        f (callable): Prediction function.
        x (np.ndarray): Input instance to explain.
        reference (np.ndarray): Reference values for each feature.
        M (int): Number of features.
        num_samples (int): Number of Monte Carlo samples (optional).

    Returns:
        np.ndarray: SHAP values for the input instance.
    """
    if num_samples is None:
        # Use exact computation (exhaustive powerset)
        subsets = list(itertools.chain.from_iterable(itertools.combinations(range(M), r) for r in range(M + 1)))
    else:
        # Use Monte Carlo sampling
        subsets = sample_subsets(M, num_samples)

    # Prepare matrices for least-squares regression
    num_subsets = len(subsets)
    X = np.zeros((num_subsets, M + 1))
    X[:, -1] = 1  # Bias term
    V = np.tile(reference, (num_subsets, 1))
    weights = np.zeros(num_subsets)

    # Populate X, V, and weights based on subsets
    for i, s in enumerate(subsets):
        s = list(s)
        V[i, s] = x[s]
        X[i, s] = 1
        weights[i] = shapley_kernel(M, len(s))

    # Normalize weights for numerical stability
    weights /= np.sum(weights)

    # Model predictions for perturbed inputs
    y = f(V)

    # Weighted least-squares regression
    wsq = np.sqrt(weights)
    result = np.linalg.lstsq(wsq[:, None] * X, wsq * y, rcond=None)[0]

    return result


# Define the prediction function
def prediction_function(X):
    return regressor.predict(X)

# Define the reference input (mean of the training data)
reference = np.mean(X_train, axis=0)

# Number of features
M = X_train.shape[1]

# Step 6: Compute SHAP values and time the computation

# TreeExplainer SHAP
start_time = time.time()
tree_explainer = shap.TreeExplainer(regressor)
tree_shap_values = tree_explainer.shap_values(test_instance.reshape(1, -1))[0]
tree_baseline = float(tree_explainer.expected_value)  # Ensure scalar
tree_time = time.time() - start_time

# KernelExplainer SHAP
start_time = time.time()
kernel_explainer = shap.KernelExplainer(prediction_function, np.tile(reference, (500, 1)))
kernel_shap_values = kernel_explainer.shap_values(test_instance.reshape(1, -1))[0]
kernel_baseline = float(kernel_explainer.expected_value)  # Ensure scalar
kernel_time = time.time() - start_time

# Custom Kernel SHAP
start_time = time.time()
num_samples = 100  # Increase Monte Carlo samples for accuracy
custom_kernel_phi = kernel_shap(prediction_function, test_instance, reference, M, num_samples)
custom_baseline = float(custom_kernel_phi[-1])  # Baseline (intercept), ensure scalar
custom_kernel_shap_values = custom_kernel_phi[:-1]  # SHAP values for features
custom_kernel_time = time.time() - start_time  # Time taken for Custom Kernel SHAP

# Step 7: Display Results
results = pd.DataFrame({
    "Feature": [f"Feature {i+1}" for i in range(X.shape[1])],
    "TreeExplainer SHAP": np.round(tree_shap_values, 4),  # TreeExplainer output
    "KernelExplainer SHAP": np.round(kernel_shap_values, 4),  # KernelExplainer output
    "CustomKernel SHAP": np.round(custom_kernel_shap_values, 4),  # Custom Kernel SHAP
})

# Add baselines to the results
results = pd.concat([
    results,
    pd.DataFrame([
        {"Feature": "Baseline", "TreeExplainer SHAP": np.round(tree_baseline, 4),
         "KernelExplainer SHAP": np.round(kernel_baseline, 4),
         "CustomKernel SHAP": np.round(custom_baseline, 4)},
        {"Feature": "Sum", "TreeExplainer SHAP": np.round(np.sum(tree_shap_values) + tree_baseline, 4),
         "KernelExplainer SHAP": np.round(np.sum(kernel_shap_values) + kernel_baseline, 4),
         "CustomKernel SHAP": np.round(np.sum(custom_kernel_shap_values) + custom_baseline, 4)},
        {"Feature": "Computation Time (s)", "TreeExplainer SHAP": np.round(tree_time, 4),
         "KernelExplainer SHAP": np.round(kernel_time, 4),
         "CustomKernel SHAP": np.round(custom_kernel_time, 4)},
    ])
])

# Print the results
results

  tree_baseline = float(tree_explainer.expected_value)  # Ensure scalar
Using 500 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Feature,TreeExplainer SHAP,KernelExplainer SHAP,CustomKernel SHAP
0,Feature 1,-47.8522,-50.7102,-49.7264
1,Feature 2,9.0346,5.7863,9.8388
2,Feature 3,3.3038,-2.5937,0.9901
3,Feature 4,-2.1609,-4.1729,-17.3094
4,Feature 5,-0.7324,1.7503,-0.6865
5,Feature 6,-1.6299,-1.6725,4.1388
6,Feature 7,25.5404,14.752,4.7303
7,Feature 8,35.0099,29.4719,30.4919
8,Feature 9,-74.1182,-83.5356,-88.8265
9,Feature 10,-115.6296,-82.9742,-79.2923


# cosine with reference point!

In [106]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import itertools
import scipy.special
import time
import shap
import pandas as pd

# Step 1: Generate a simple regression dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=0.5, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 2: Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 3: Train a RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

# Step 4: Select the first test instance for explanation
test_instance = X_test[0]

# Shapley kernel weight calculation
def shapley_kernel(M, s):
    """
    Compute the Shapley kernel weight for a given subset size `s`.
    """
    if s == 0 or s == M:
        return 10000  # Large constant for numerical stability
    return (M - 1) / (scipy.special.binom(M, s) * s * (M - s))

# Mean Kernel SHAP function
def mean_kernel_shap(f, x, reference, M):
    """
    Compute SHAP values by substituting excluded features with the mean of the features.

    Args:
        f (callable): Prediction function.
        x (np.ndarray): Input instance to explain.
        reference (np.ndarray): Mean feature values (reference).
        M (int): Number of features.

    Returns:
        np.ndarray: SHAP values for the input instance.
    """
    # Generate all subsets (powerset of features)
    subsets = list(itertools.chain.from_iterable(itertools.combinations(range(M), r) for r in range(M + 1)))
    num_subsets = len(subsets)
    
    # Prepare matrices for least-squares regression
    X = np.zeros((num_subsets, M + 1))
    X[:, -1] = 1  # Bias term
    V = np.zeros((num_subsets, M))

    # Populate X, V, and weights
    weights = np.zeros(num_subsets)
    for i, s in enumerate(subsets):
        s = list(s)
        V[i, :] = reference  # Start with the mean (reference)
        V[i, s] = x[s]       # Replace included features with values from `x`
        X[i, s] = 1
        weights[i] = shapley_kernel(M, len(s))

    # Normalize weights for numerical stability
    weights /= np.sum(weights)

    # Model predictions for perturbed inputs
    y = f(V)

    # Weighted least-squares regression
    wsq = np.sqrt(weights)
    result = np.linalg.lstsq(wsq[:, None] * X, wsq * y, rcond=None)[0]

    return result

# Kernel SHAP with farthest points
def kernel_shap_with_far_points(f, x, X_train, M, num_samples=None, k=10):
    """
    Compute SHAP values using a modified Kernel SHAP method.
    - Replace excluded features with random samples from the farthest `k` training points.

    Args:
        f (callable): Prediction function.
        x (np.ndarray): Input instance to explain.
        X_train (np.ndarray): Training dataset (used to find far points).
        M (int): Number of features.
        num_samples (int): Number of Monte Carlo samples (optional).
        k (int): Number of farthest points to sample for replacing excluded features.

    Returns:
        np.ndarray: SHAP values for the input instance.
    """
    # Compute distances from the test instance to all training instances
    distances = np.linalg.norm(X_train - x, axis=1)
    
    # Find the farthest `k` points
    farthest_indices = np.argsort(distances)[-k:]  # Indices of the k farthest points
    farthest_points = X_train[farthest_indices]
    
    # Monte Carlo sampling or exhaustive computation
    if num_samples is None:
        subsets = list(itertools.chain.from_iterable(itertools.combinations(range(M), r) for r in range(M + 1)))
    else:
        subsets = sample_subsets(M, num_samples)

    # Prepare matrices for least-squares regression
    num_subsets = len(subsets)
    X = np.zeros((num_subsets, M + 1))
    X[:, -1] = 1  # Bias term
    V = np.zeros((num_subsets, M))

    # Populate X, V, and weights based on subsets
    weights = np.zeros(num_subsets)
    for i, s in enumerate(subsets):
        s = list(s)
        V[i, :] = x  # Start with the full instance
        
        # Replace excluded features with a random far point sample
        if len(s) < M:
            far_sample = farthest_points[np.random.choice(k)]  # Randomly sample from far points
            V[i, [j for j in range(M) if j not in s]] = far_sample[[j for j in range(M) if j not in s]]
        
        # Set included features
        X[i, s] = 1
        weights[i] = shapley_kernel(M, len(s))

    # Normalize weights for numerical stability
    weights /= np.sum(weights)

    # Model predictions for perturbed inputs
    y = f(V)

    # Weighted least-squares regression
    wsq = np.sqrt(weights)
    result = np.linalg.lstsq(wsq[:, None] * X, wsq * y, rcond=None)[0]

    return result


# Define the prediction function
def prediction_function(X):
    return regressor.predict(X)

# Number of features
M = X_train.shape[1]

# Step 6: Compute SHAP values and time the computation

# TreeExplainer SHAP
start_time = time.time()
tree_explainer = shap.TreeExplainer(regressor)
tree_shap_values = tree_explainer.shap_values(test_instance.reshape(1, -1))[0]
tree_baseline = float(tree_explainer.expected_value)  # Ensure scalar
tree_time = time.time() - start_time

# KernelExplainer SHAP
start_time = time.time()
kernel_explainer = shap.KernelExplainer(prediction_function, X_train)
kernel_shap_values = kernel_explainer.shap_values(test_instance.reshape(1, -1))[0]
kernel_baseline = float(kernel_explainer.expected_value)  # Ensure scalar
kernel_time = time.time() - start_time

# Mean Kernel SHAP
start_time = time.time()
reference = np.mean(X_train, axis=0)  # Mean feature values
mean_kernel_phi = mean_kernel_shap(prediction_function, test_instance, reference, M)
mean_baseline = float(mean_kernel_phi[-1])  # Baseline (intercept), ensure scalar
mean_kernel_shap_values = mean_kernel_phi[:-1]  # SHAP values for features
mean_kernel_time = time.time() - start_time

# Kernel SHAP with farthest points
start_time = time.time()
num_samples = None  # Exact computation
k = 100  # Number of farthest points to use
far_kernel_phi = kernel_shap_with_far_points(prediction_function, test_instance, X_train, M, num_samples=num_samples, k=k)
far_baseline = float(far_kernel_phi[-1])  # Baseline (intercept), ensure scalar
far_kernel_shap_values = far_kernel_phi[:-1]  # SHAP values for features
far_kernel_time = time.time() - start_time

# Step 7: Display Results
results = pd.DataFrame({
    "Feature": [f"Feature {i+1}" for i in range(X.shape[1])],
    "TreeExplainer SHAP": np.round(tree_shap_values, 4),
    "KernelExplainer SHAP": np.round(kernel_shap_values, 4),
    "MeanKernel SHAP": np.round(mean_kernel_shap_values, 4),
    "FarKernel SHAP": np.round(far_kernel_shap_values, 4),
})

# Append baseline, sum, and computation time
results = pd.concat([
    results,
    pd.DataFrame([
        {"Feature": "Baseline", "TreeExplainer SHAP": np.round(tree_baseline, 4),
         "KernelExplainer SHAP": np.round(kernel_baseline, 4),
         "MeanKernel SHAP": np.round(mean_baseline, 4),
         "FarKernel SHAP": np.round(far_baseline, 4)},
        {"Feature": "Sum", "TreeExplainer SHAP": np.round(np.sum(tree_shap_values) + tree_baseline, 4),
         "KernelExplainer SHAP": np.round(np.sum(kernel_shap_values) + kernel_baseline, 4),
         "MeanKernel SHAP": np.round(np.sum(mean_kernel_shap_values) + mean_baseline, 4),
         "FarKernel SHAP": np.round(np.sum(far_kernel_shap_values) + far_baseline, 4)},
        {"Feature": "Computation Time (s)", "TreeExplainer SHAP": np.round(tree_time, 4),
         "KernelExplainer SHAP": np.round(kernel_time, 4),
         "MeanKernel SHAP": np.round(mean_kernel_time, 4),
         "FarKernel SHAP": np.round(far_kernel_time, 4)},
    ])
])

# Print the results
results

  tree_baseline = float(tree_explainer.expected_value)  # Ensure scalar
Using 800 background data samples could cause slower run times. Consider using shap.sample(data, K) or shap.kmeans(data, K) to summarize the background as K samples.


  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Feature,TreeExplainer SHAP,KernelExplainer SHAP,MeanKernel SHAP,FarKernel SHAP
0,Feature 1,13.4197,13.4311,12.4019,5.113
1,Feature 2,1.7568,1.356,3.4165,17.8503
2,Feature 3,1.96,0.9772,-0.5405,35.1193
3,Feature 4,75.3594,74.8056,72.5709,103.4481
4,Feature 5,1.0264,0.5648,-0.5133,25.5756
5,Feature 6,-3.2434,-3.414,-3.533,-21.5159
6,Feature 7,-30.718,-31.2188,-33.8595,-61.2246
7,Feature 8,1.8544,1.8552,1.8361,12.0021
8,Feature 9,1.4149,0.8683,0.1188,-23.778
9,Feature 10,-71.4951,-68.8819,-69.919,-97.2885


# mean with sampling

In [1]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import itertools
import scipy.special
import time
import shap
import pandas as pd


# Step 1: Generate a simple regression dataset
X, y = make_regression(n_samples=10000, n_features=10, noise=0.5)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Step 2: Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 3: Train a RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100, random_state=42)
regressor.fit(X_train, y_train)

# Step 4: Select the first test instance for explanation
test_instance = X_test[0]

# Shapley kernel weight calculation
def shapley_kernel(M, s):
    """
    Calculate the Shapley kernel weight for a subset of size s with M total features.

    Parameters:
    - M: Total number of features.
    - s: Size of the subset.

    Returns:
    - The kernel weight for the subset size.
    """
    if s == 0 or s == M:
        return 10000  # Large constant for numerical stability
    return (M - 1) / (scipy.special.binom(M, s) * s * (M - s))

# Mean Kernel SHAP function (Exact computation)
import numpy as np
import scipy.special

def mean_kernel_shap_with_constraint(f, x, reference, M, nsamples="auto"):
    r"""
    Kernel SHAP with additive efficiency constraint and kernel weights.
    
    Parameters:
    - f: The model function to explain.
    - x: Instance to explain (1D array of feature values).
    - reference: Reference value for each feature (1D array, usually the mean of the dataset).
    - M: Number of features.
    - nsamples: Number of samples for the sampling method. 
                If "auto", uses nsamples = min(2 * M + 2048, 2^M).
    
    Returns:
    - shap_values: Shapley values (1D array of size M).
    - baseline: The baseline value (\phi_0).
    """

    # Step 1: Calculate the number of samples
    if nsamples == "auto":
        nsamples = min(2 * M + 2048, 2 ** M)

    # Step 2: Sampling subsets with Shapley kernel weights
    np.random.seed(0)
    subsets = []  # List to store sampled subsets
    weights = []  # List to store corresponding weights

    for _ in range(nsamples):
        # Generate a random binary mask representing a subset
        subset = np.random.choice([0, 1], size=M, p=[0.5, 0.5])  # Random binary mask
        subset_indices = np.where(subset == 1)[0]
        subsets.append(subset_indices)
        
        # Compute Shapley kernel weight for the subset
        s_size = len(subset_indices)
        if s_size == 0 or s_size == M:
            weight = 10000  # Large constant for stability
        else:
            weight = (M - 1) / (scipy.special.binom(M, s_size) * s_size * (M - s_size))
        weights.append(weight)

    # Normalize weights
    weights = np.array(weights)
    weights /= np.sum(weights)

    # Number of subsets
    num_subsets = len(subsets)

    # Step 3: Initialize matrices for regression
    X = np.zeros((num_subsets, M))  # Design matrix
    V = np.zeros((num_subsets, M))  # Perturbed input data matrix

    # Prepare feature subsets
    for i, s in enumerate(subsets):
        V[i, :] = reference  # Start with reference values
        V[i, s] = x[s]       # Replace selected features with values from `x`
        X[i, s] = 1          # Set feature presence in the design matrix

    # Ensure `reference` and `x` are reshaped to (1, M) before passing to `f`
    reference = reference.reshape(1, -1)
    x = x.reshape(1, -1)

    # Step 4: Evaluate the model on the sampled feature subsets
    y = f(V) - f(reference)  # Centered outputs: v_x(s) - v_x(0)

    # Step 5: Add efficiency constraint row to X and y
    # Add a row to enforce the constraint: sum(ϕ_i) = v_x(1) - v_x(0)
    efficiency_row = np.ones((1, M))  # Row of ones
    X = np.vstack([X, efficiency_row])  # Append to the design matrix
    y = np.append(y, f(x) - f(reference))  # Append the efficiency constraint to outputs

    # Add corresponding weight for the efficiency constraint
    weights = np.append(weights, 1.0)  # Assign unit weight to the efficiency constraint

    # Step 6: Weighted least squares regression
    # Compute weighted least squares: Minimize Σ (w_i * (y_i - X_i^T * φ)^2)
    wsq = np.sqrt(weights)  # Square root of weights
    result = np.linalg.lstsq(wsq[:, None] * X, wsq * y, rcond=None)[0]  # Solve for SHAP values

    # Step 7: Return results
    return result, f(reference).flatten()


# Define the prediction function
def prediction_function(X):
    return regressor.predict(X)

# Number of features
M = X_train.shape[1]

# Step 6: Compute SHAP values and time the computation

# TreeExplainer SHAP
start_time = time.time()
tree_explainer = shap.TreeExplainer(regressor)
tree_shap_values = tree_explainer.shap_values(test_instance.reshape(1, -1))[0]
tree_baseline = float(tree_explainer.expected_value[0])  # Extract scalar from array
tree_time = time.time() - start_time

# KernelExplainer SHAP
start_time = time.time()
background = shap.sample(X_train, 100)  # Summarize the background to 100 samples
kernel_explainer = shap.KernelExplainer(prediction_function, background)
kernel_shap_values = kernel_explainer.shap_values(test_instance.reshape(1, -1))[0]
kernel_baseline = float(kernel_explainer.expected_value)  # Ensure scalar
kernel_time = time.time() - start_time

# Mean Kernel SHAP (Auto)
start_time = time.time()
reference = np.mean(X_train, axis=0)  # Mean feature values
mean_kernel_phi, mean_baseline = mean_kernel_shap_with_constraint(prediction_function, test_instance, reference, M)
mean_kernel_shap_values = mean_kernel_phi  # SHAP values for features
mean_kernel_time = time.time() - start_time

# Compute the prediction of the test_instance
test_instance_prediction = prediction_function(test_instance.reshape(1, -1))[0]

# Step 6: Display Results
results = pd.DataFrame({
    "Feature": [f"Feature {i+1}" for i in range(X.shape[1])],
    "TreeExplainer SHAP": np.round(tree_shap_values, 4),
    "KernelExplainer SHAP": np.round(kernel_shap_values, 4),
    "MeanKernel SHAP (auto)": np.round(mean_kernel_shap_values, 4),
})

# Append baseline, sum, computation time, and prediction
results = pd.concat([
    results,
    pd.DataFrame([
        {"Feature": "Baseline", "TreeExplainer SHAP": np.round(tree_baseline, 4),
         "KernelExplainer SHAP": np.round(kernel_baseline, 4),
         "MeanKernel SHAP (auto)": np.round(mean_baseline[0], 4)},
        {"Feature": "Sum", "TreeExplainer SHAP": np.round(np.sum(tree_shap_values) + tree_baseline, 4),
         "KernelExplainer SHAP": np.round(np.sum(kernel_shap_values) + kernel_baseline, 4),
         "MeanKernel SHAP (auto)": np.round(np.sum(mean_kernel_shap_values) + mean_baseline[0], 4)},
        {"Feature": "Computation Time (s)", "TreeExplainer SHAP": np.round(tree_time, 4),
         "KernelExplainer SHAP": np.round(kernel_time, 4),
         "MeanKernel SHAP (auto)": np.round(mean_kernel_time, 4)},
        {"Feature": "Prediction (Test Instance)", "TreeExplainer SHAP": np.round(test_instance_prediction, 4),
         "KernelExplainer SHAP": np.round(test_instance_prediction, 4), "MeanKernel SHAP (auto)": np.round(test_instance_prediction, 4)}
    ])
])


# Print the results
results

  0%|          | 0/1 [00:00<?, ?it/s]

Unnamed: 0,Feature,TreeExplainer SHAP,KernelExplainer SHAP,MeanKernel SHAP (auto)
0,Feature 1,-43.7884,-43.7849,-26.0819
1,Feature 2,-8.641,-7.6429,-6.6619
2,Feature 3,7.1345,11.2744,8.2919
3,Feature 4,13.0327,6.0428,4.3561
4,Feature 5,-62.4468,-56.778,-61.5866
5,Feature 6,-0.8848,0.122,-0.3457
6,Feature 7,1.8424,2.6715,1.5175
7,Feature 8,10.3981,19.8204,15.5358
8,Feature 9,8.5729,7.9822,4.8366
9,Feature 10,-28.4331,-20.5258,-28.8828
