In [4]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import shap
import math  # <- Fix: use Python's built-in math module

# 1. Create a simple dataset
X = pd.DataFrame({
    "Feature1": [1, 2, 3],
    "Feature2": [4, 5, 6],
    "Feature3": [7, 8, 9]
})
y = 1*X["Feature1"] + 2*X["Feature2"] + 3*X["Feature3"] + 5  # Linear relationship

# 2. Train a model
model = LinearRegression()
model.fit(X, y)

# 3. Make predictions
preds = model.predict(X)

# 4. Manual SHAP calculation for first row
def powerset(s):
    """Return all subsets of a list s"""
    from itertools import combinations
    return [list(comb) for i in range(len(s)+1) for comb in combinations(s, i)]

def manual_shap_values(model, X_row, X_background):
    """
    Compute SHAP values manually for a single row.
    """
    feature_names = list(X_row.index)
    baseline = X_background.mean().values.reshape(1, -1)
    shap_values = dict.fromkeys(feature_names, 0.0)
    
    n = len(feature_names)
    
    for feature in feature_names:
        contribution = 0
        for subset in powerset([f for f in feature_names if f != feature]):
            with_feature = list(subset + [feature])
            without_feature = list(subset)
            
            x_with = baseline.copy()
            x_without = baseline.copy()
            
            for f in with_feature:
                x_with[0, feature_names.index(f)] = X_row[f]
            for f in without_feature:
                x_without[0, feature_names.index(f)] = X_row[f]

            pred_with = model.predict(pd.DataFrame(x_with, columns=feature_names))[0]
            pred_without = model.predict(pd.DataFrame(x_without, columns=feature_names))[0]
            marginal_contrib = pred_with - pred_without
            
            # Fix: use math.factorial, not np.math.factorial
            weight = math.factorial(len(subset)) * math.factorial(n - len(subset) - 1) / math.factorial(n)
            contribution += weight * marginal_contrib
        shap_values[feature] = contribution
    return shap_values

# Compute manual SHAP values for first row
manual_shap = manual_shap_values(model, X.iloc[0], X)

# 5. SHAP package values
explainer = shap.Explainer(model, X)
shap_values_package = explainer(X)

# 6. Final comparison table
comparison = pd.DataFrame({
    "Feature1": [X.iloc[0]["Feature1"]],
    "Feature2": [X.iloc[0]["Feature2"]],
    "Feature3": [X.iloc[0]["Feature3"]],
    "Prediction": [preds[0]],
    "Manual_SHAP_Feature1": [manual_shap["Feature1"]],
    "Manual_SHAP_Feature2": [manual_shap["Feature2"]],
    "Manual_SHAP_Feature3": [manual_shap["Feature3"]],
    "SHAP_Package_Feature1": [shap_values_package.values[0][0]],
    "SHAP_Package_Feature2": [shap_values_package.values[0][1]],
    "SHAP_Package_Feature3": [shap_values_package.values[0][2]],
})

comparison


Unnamed: 0,Feature1,Feature2,Feature3,Prediction,Manual_SHAP_Feature1,Manual_SHAP_Feature2,Manual_SHAP_Feature3,SHAP_Package_Feature1,SHAP_Package_Feature2,SHAP_Package_Feature3
0,1,4,7,35.0,-2.0,-2.0,-2.0,-2.0,-2.0,-2.0
