# Laufzeitanalyse: SHAP vs SHAPIQ

Dieses Notebook vergleicht verschiedene Approximationsverfahren zur Berechnung von Shapley-Werten.

# Ziel
Analyse von:
- Laufzeit (x-Achse)
- Approximationsgenauigkeit (L1 & L2 Fehler) gegenüber Referenzwerten

# Verglichene Methoden
| Bibliothek | Methode                  |
|------------|--------------------------|
| `shapiq`   | KernelSHAP, SVARM, PermutationSamplingSV |
| `shap`     | KernelExplainer, PermutationExplainer    |

# Datensätze
- Bike Sharing
- California Housing

# Modelle
- Lineare Regression
- Random Forest Regressor

Imports & Einstellungen

In [1]:
import shapiq
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
import shap
from shapiq import TabularExplainer
import time
import matplotlib.pyplot as plt
from tqdm import tqdm

sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 7)

import random
np.random.seed(42)
random.seed(42)



Daten vorbereiten & subsample Background (für schnellere Laufzeiten)

In [2]:
# --- Daten laden und vorverarbeiten ---

X_bike, y_bike = shapiq.datasets.load_bike_sharing()
X_cal, y_cal = shapiq.datasets.load_california_housing()

print("Bike Sharing - X shape:", X_bike.shape, "y shape:", y_bike.shape)
print("California Housing - X shape:", X_cal.shape, "y shape:", y_cal.shape)

def preprocess_data(X, y, categorical_cols=None, sample_size=None):
    if isinstance(X, pd.DataFrame):
        if categorical_cols is not None:
            X_processed = pd.get_dummies(X, columns=categorical_cols, drop_first=True)
        else:
            cat_cols = X.select_dtypes(include=['category', 'object']).columns.tolist()
            X_processed = pd.get_dummies(X, columns=cat_cols, drop_first=True)
    else:
        X_processed = pd.DataFrame(X)
    
    y_processed = pd.Series(y) if not isinstance(y, pd.Series) else y
    
    if sample_size is not None and len(X_processed) > sample_size:
        sampled_indices = X_processed.sample(n=sample_size, random_state=42).index
        X_processed = X_processed.loc[sampled_indices].reset_index(drop=True)
        y_processed = y_processed.loc[sampled_indices].reset_index(drop=True)
    else:
        X_processed = X_processed.reset_index(drop=True)
        y_processed = y_processed.reset_index(drop=True)
    
    return X_processed, y_processed

bike_categorical_cols = ['season', 'weather']

X_bike_proc, y_bike_proc = preprocess_data(X_bike, y_bike, categorical_cols=bike_categorical_cols)
X_cal_proc, y_cal_proc = preprocess_data(X_cal, y_cal)

print("Preprocessed Bike Sharing shape:", X_bike_proc.shape)
print("Preprocessed California Housing shape:", X_cal_proc.shape)

Bike Sharing - X shape: (17379, 12) y shape: (17379,)
California Housing - X shape: (20640, 8) y shape: (20640,)
Preprocessed Bike Sharing shape: (17379, 16)
Preprocessed California Housing shape: (20640, 8)


Modelle trainieren

In [3]:
# --- Train-Test Split ---

Xb_train, Xb_test, yb_train, yb_test = train_test_split(
    X_bike_proc, y_bike_proc, test_size=0.2, random_state=42
)
Xc_train, Xc_test, yc_train, yc_test = train_test_split(
    X_cal_proc, y_cal_proc, test_size=0.2, random_state=42
)

# --- Modelle trainieren ---

model_bike_rf = RandomForestRegressor(random_state=42)
model_bike_rf.fit(Xb_train, yb_train)
print("Bike Sharing RandomForestRegressor trainiert.")

model_cal_rf = RandomForestRegressor(random_state=42)
model_cal_rf.fit(Xc_train, yc_train)
print("California Housing RandomForestRegressor trainiert.")

model_bike_lr = LinearRegression()
model_bike_lr.fit(Xb_train, yb_train)
print("Bike Sharing LinearRegression trainiert.")

model_cal_lr = LinearRegression()
model_cal_lr.fit(Xc_train, yc_train)
print("California Housing LinearRegression trainiert.")

# --- Wrapper Funktion für sauberes predict mit Feature-Namen ---

def model_predict_wrapper(model, feature_names):
    def predict(X):
        if not isinstance(X, pd.DataFrame):
            X = pd.DataFrame(X, columns=feature_names)
        return model.predict(X)
    return predict

# Feature-Namen
feature_names_bike = X_bike_proc.columns.tolist()
feature_names_cal = X_cal_proc.columns.tolist()


Bike Sharing RandomForestRegressor trainiert.
California Housing RandomForestRegressor trainiert.
Bike Sharing LinearRegression trainiert.
California Housing LinearRegression trainiert.


In [4]:
import shapiq.explainer

print(dir(shapiq.explainer))

['AgnosticExplainer', 'Explainer', 'TabPFNExplainer', 'TabularExplainer', 'TreeExplainer', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'agnostic', 'base', 'configuration', 'custom_types', 'tabpfn', 'tabular', 'tree', 'utils', 'validation']


In [5]:
import shapiq.explainer.tabular

print(dir(shapiq.explainer.tabular))

['Any', 'Explainer', 'ExplainerIndices', 'InteractionValues', 'Literal', 'TYPE_CHECKING', 'TabularExplainer', 'TabularExplainerApproximators', 'TabularExplainerImputers', 'TabularExplainerIndices', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'annotations', 'finalize_computed_interactions', 'overrides', 'setup_approximator', 'warn']


In [6]:
from shapiq.explainer.tabular import TabularExplainerApproximators

print(TabularExplainerApproximators)

typing.Literal['spex', 'montecarlo', 'svarm', 'permutation', 'regression']


In [7]:
# Hintergrunddaten als NumPy Arrays (für shapiq Explainer)
sample_size = 100
background_bike_np = X_bike_proc.sample(n=sample_size, random_state=42).to_numpy()
background_cal_np = X_cal_proc.sample(n=sample_size, random_state=42).to_numpy()

# Wrapped Predict Funktionen für alle Modelle
wrapped_predict_bike_rf = model_predict_wrapper(model_bike_rf, feature_names_bike)
wrapped_predict_cal_rf = model_predict_wrapper(model_cal_rf, feature_names_cal)
wrapped_predict_bike_lr = model_predict_wrapper(model_bike_lr, feature_names_bike)
wrapped_predict_cal_lr = model_predict_wrapper(model_cal_lr, feature_names_cal)

# --- shapiq Explainer ---

explainer_shapiq_spex_bike_rf = shapiq.TabularExplainer(
    wrapped_predict_bike_rf, background_bike_np, approximator="spex", sample_size=sample_size
)
explainer_shapiq_svarm_bike_rf = shapiq.TabularExplainer(
    wrapped_predict_bike_rf, background_bike_np, approximator="svarm", sample_size=sample_size
)
explainer_shapiq_perm_bike_rf = shapiq.TabularExplainer(
    wrapped_predict_bike_rf, background_bike_np, approximator="permutation", sample_size=sample_size
)

explainer_shapiq_spex_cal_rf = shapiq.TabularExplainer(
    wrapped_predict_cal_rf, background_cal_np, approximator="spex", sample_size=sample_size
)
explainer_shapiq_svarm_cal_rf = shapiq.TabularExplainer(
    wrapped_predict_cal_rf, background_cal_np, approximator="svarm", sample_size=sample_size
)
explainer_shapiq_perm_cal_rf = shapiq.TabularExplainer(
    wrapped_predict_cal_rf, background_cal_np, approximator="permutation", sample_size=sample_size
)

explainer_shapiq_spex_bike_lr = shapiq.TabularExplainer(
    wrapped_predict_bike_lr, background_bike_np, approximator="spex", sample_size=sample_size
)
explainer_shapiq_svarm_bike_lr = shapiq.TabularExplainer(
    wrapped_predict_bike_lr, background_bike_np, approximator="svarm", sample_size=sample_size
)
explainer_shapiq_perm_bike_lr = shapiq.TabularExplainer(
    wrapped_predict_bike_lr, background_bike_np, approximator="permutation", sample_size=sample_size
)

explainer_shapiq_spex_cal_lr = shapiq.TabularExplainer(
    wrapped_predict_cal_lr, background_cal_np, approximator="spex", sample_size=sample_size
)
explainer_shapiq_svarm_cal_lr = shapiq.TabularExplainer(
    wrapped_predict_cal_lr, background_cal_np, approximator="svarm", sample_size=sample_size
)
explainer_shapiq_perm_cal_lr = shapiq.TabularExplainer(
    wrapped_predict_cal_lr, background_cal_np, approximator="permutation", sample_size=sample_size
)

# --- shap Explainer (jetzt mit Wrapper-Funktion verwenden) ---

background_bike_df = X_bike_proc.sample(n=sample_size, random_state=42).reset_index(drop=True)
background_cal_df = X_cal_proc.sample(n=sample_size, random_state=42).reset_index(drop=True)

explainer_shap_kernel_bike_rf = shap.KernelExplainer(
    wrapped_predict_bike_rf, background_bike_df, feature_names=feature_names_bike
)
explainer_shap_perm_bike_rf = shap.PermutationExplainer(
    wrapped_predict_bike_rf, background_bike_df
)

explainer_shap_kernel_cal_rf = shap.KernelExplainer(
    wrapped_predict_cal_rf, background_cal_df, feature_names=feature_names_cal
)
explainer_shap_perm_cal_rf = shap.PermutationExplainer(
    wrapped_predict_cal_rf, background_cal_df
)

explainer_shap_kernel_bike_lr = shap.KernelExplainer(
    wrapped_predict_bike_lr, background_bike_df, feature_names=feature_names_bike
)
explainer_shap_perm_bike_lr = shap.PermutationExplainer(
    wrapped_predict_bike_lr, background_bike_df
)

explainer_shap_kernel_cal_lr = shap.KernelExplainer(
    wrapped_predict_cal_lr, background_cal_df, feature_names=feature_names_cal
)
explainer_shap_perm_cal_lr = shap.PermutationExplainer(
    wrapped_predict_cal_lr, background_cal_df
)

print("Alle Explainer (shapiq + shap) erfolgreich initialisiert ohne sklearn-Warnungen.")


Alle Explainer (shapiq + shap) erfolgreich initialisiert ohne sklearn-Warnungen.


In [8]:
def l1_error(a, b):
    return np.mean(np.abs(a - b))

def benchmark_shap_explainers(
    explainers_dict,       # dict mit {Name: explainer}
    X_test_df,             # Testdaten als DataFrame (für shap Explainer)
    X_test_np,             # Testdaten als NumPy Array (für shapiq Explainer)
    ref_shap_values,       # Referenz-Shapley-Werte als NumPy Array
    n_eval=20              # Anzahl Test-Samples zum Messen
):
    results = []

    for name, explainer in explainers_dict.items():
        print(f"Starte Benchmark für {name}...")
        start = time.time()
        # shapiq Explainer erwarten NumPy Array
        if isinstance(explainer, shapiq.TabularExplainer):
            shap_values = explainer.shap_values(X_test_np[:n_eval])
        else:
            # shap Explainer erwarten DataFrame
            shap_values = explainer.shap_values(X_test_df.iloc[:n_eval])
        
        duration = time.time() - start

        # Manche shap-Implementierungen geben Listen zurück (z.B. PermutationExplainer),
        # daher konvertieren wir in ein numpy Array, falls nötig
        if isinstance(shap_values, list):
            
            shap_values = np.array(shap_values)

        # Absicherung: Falls tuple (z.B. bei PermutationExplainer), nehme erstes Element (shap values)
        if isinstance(shap_values, tuple):
            shap_values = shap_values[0]

        error = l1_error(shap_values, ref_shap_values[:n_eval])
        print(f"{name} - Zeit: {duration:.3f}s, L1-Fehler: {error:.5f}")
        
        results.append({
            "method": name,
            "time": duration,
            "error": error
        })
    return pd.DataFrame(results)




shapley

In [9]:
n_ref = 100  # Anzahl der Referenzdatenpunkte
budget_ref = 500  # Anzahl der Modellbewertungen pro Punkt

# Hilfsfunktion: erklärt eine Liste von Instanzen einzeln
def get_shap_values(explainer, X, budget):
    shap_values = []
    for i in range(len(X)):
        x_i = X.iloc[[i]].to_numpy()  # Einzelne Instanz als 2D-Array
        values = explainer.explain(x_i, budget=budget).values
        squeezed = values.squeeze()
        print(f"Instance {i}: values.shape = {values.shape}, squeezed.shape = {squeezed.shape}")
        shap_values.append(squeezed)
    return np.array(shap_values, dtype=object)

# Anwendung für alle 4 Kombinationen
# Bike Sharing – Random Forest
ref_shap_bike_rf = get_shap_values(explainer_shapiq_spex_bike_rf, Xb_test.iloc[:n_ref], budget_ref)

# California Housing – Random Forest
ref_shap_cal_rf = get_shap_values(explainer_shapiq_spex_cal_rf, Xc_test.iloc[:n_ref], budget_ref)

# Bike Sharing – Linear Regression
ref_shap_bike_lr = get_shap_values(explainer_shapiq_spex_bike_lr, Xb_test.iloc[:n_ref], budget_ref)

# California Housing – Linear Regression
ref_shap_cal_lr = get_shap_values(explainer_shapiq_spex_cal_lr, Xc_test.iloc[:n_ref], budget_ref)

print("Xb_test shape:", Xb_test.shape)
print("Xb_test[:n_ref] shape:", Xb_test.iloc[:n_ref].shape)

# Vorhersage testen
pred = model_bike_rf.predict(Xb_test.iloc[:n_ref])
print("Prediction shape:", pred.shape)


Instance 0: values.shape = (32,), squeezed.shape = (32,)
Instance 1: values.shape = (27,), squeezed.shape = (27,)
Instance 2: values.shape = (25,), squeezed.shape = (25,)
Instance 3: values.shape = (26,), squeezed.shape = (26,)
Instance 4: values.shape = (24,), squeezed.shape = (24,)
Instance 5: values.shape = (28,), squeezed.shape = (28,)
Instance 6: values.shape = (25,), squeezed.shape = (25,)
Instance 7: values.shape = (20,), squeezed.shape = (20,)
Instance 8: values.shape = (25,), squeezed.shape = (25,)
Instance 9: values.shape = (32,), squeezed.shape = (32,)
Instance 10: values.shape = (27,), squeezed.shape = (27,)
Instance 11: values.shape = (30,), squeezed.shape = (30,)
Instance 12: values.shape = (33,), squeezed.shape = (33,)
Instance 13: values.shape = (36,), squeezed.shape = (36,)
Instance 14: values.shape = (27,), squeezed.shape = (27,)
Instance 15: values.shape = (27,), squeezed.shape = (27,)
Instance 16: values.shape = (30,), squeezed.shape = (30,)
Instance 17: values.shap

In [10]:
# --- Explainer Dictionaries für alle Modelle und Datensätze ---

explainers_bike_rf = {
    "shapiq_spex": explainer_shapiq_spex_bike_rf,
    "shapiq_svarm": explainer_shapiq_svarm_bike_rf,
    "shapiq_perm": explainer_shapiq_perm_bike_rf,
    "shap_kernel": explainer_shap_kernel_bike_rf,
    "shap_perm": explainer_shap_perm_bike_rf,
}

explainers_cal_rf = {
    "shapiq_spex": explainer_shapiq_spex_cal_rf,
    "shapiq_svarm": explainer_shapiq_svarm_cal_rf,
    "shapiq_perm": explainer_shapiq_perm_cal_rf,
    "shap_kernel": explainer_shap_kernel_cal_rf,
    "shap_perm": explainer_shap_perm_cal_rf,
}

explainers_bike_lr = {
    "shapiq_spex": explainer_shapiq_spex_bike_lr,
    "shapiq_svarm": explainer_shapiq_svarm_bike_lr,
    "shapiq_perm": explainer_shapiq_perm_bike_lr,
    "shap_kernel": explainer_shap_kernel_bike_lr,
    "shap_perm": explainer_shap_perm_bike_lr,
}

explainers_cal_lr = {
    "shapiq_spex": explainer_shapiq_spex_cal_lr,
    "shapiq_svarm": explainer_shapiq_svarm_cal_lr,
    "shapiq_perm": explainer_shapiq_perm_cal_lr,
    "shap_kernel": explainer_shap_kernel_cal_lr,
    "shap_perm": explainer_shap_perm_cal_lr,
}

# --- Referenz-Shapley-Werte berechnen (KernelExplainer auf kleiner Stichprobe, langsam aber genau) ---

ref_sample_size = 50  # Klein für Referenz, sonst sehr langsam

print("Berechne Referenz-Shapley-Werte (Bike, RF)...")
ref_shap_bike_rf, _ = timeit(
    explainer_shap_kernel_bike_rf.shap_values,
    X_bike_proc.iloc[:ref_sample_size]
)

print("Berechne Referenz-Shapley-Werte (California, RF)...")
ref_shap_cal_rf, _ = timeit(
    explainer_shap_kernel_cal_rf.shap_values,
    X_cal_proc.iloc[:ref_sample_size]
)

print("Berechne Referenz-Shapley-Werte (Bike, LR)...")
ref_shap_bike_lr, _ = timeit(
    explainer_shap_kernel_bike_lr.shap_values,
    X_bike_proc.iloc[:ref_sample_size]
)

print("Berechne Referenz-Shapley-Werte (California, LR)...")
ref_shap_cal_lr, _ = timeit(
    explainer_shap_kernel_cal_lr.shap_values,
    X_cal_proc.iloc[:ref_sample_size]
)

# --- Benchmarks durchführen ---

print("Benchmark: Bike Sharing - RandomForestRegressor")
results_bike_rf = benchmark_shap_explainers(
    explainers_bike_rf,
    X_bike_proc,
    X_bike_proc.to_numpy(),
    ref_shap_bike_rf,
    n_eval=50
)

print("Benchmark: California Housing - RandomForestRegressor")
results_cal_rf = benchmark_shap_explainers(
    explainers_cal_rf,
    X_cal_proc,
    X_cal_proc.to_numpy(),
    ref_shap_cal_rf,
    n_eval=50
)

print("Benchmark: Bike Sharing - LinearRegression")
results_bike_lr = benchmark_shap_explainers(
    explainers_bike_lr,
    X_bike_proc,
    X_bike_proc.to_numpy(),
    ref_shap_bike_lr,
    n_eval=50
)

print("Benchmark: California Housing - LinearRegression")
results_cal_lr = benchmark_shap_explainers(
    explainers_cal_lr,
    X_cal_proc,
    X_cal_proc.to_numpy(),
    ref_shap_cal_lr,
    n_eval=50
)

# Ergebnisse als DataFrames verfügbar:
print("Benchmark abgeschlossen!")


Berechne Referenz-Shapley-Werte (Bike, RF)...


NameError: name 'timeit' is not defined

Ergebnisse in DataFrame

In [None]:
df_results = pd.DataFrame(all_results)
# Entferne fehlerhafte oder unvollständige Ergebnisse
df_results = pd.DataFrame([r for r in all_results if r is not None and "explainer" in r and "dataset" in r])

df_results["method"] = df_results["explainer"] + " | " + df_results["dataset"]
df_results.head()


Visualisierung (Plotting)

In [None]:
plt.figure(figsize=(14,6))
sns.barplot(data=df_results, x="method", y="runtime", errorbar='sd')
plt.xticks(rotation=45, ha="right")
plt.title("Durchschnittliche Laufzeit je Methode und Datensatz")
plt.ylabel("Laufzeit (Sekunden)")
plt.xlabel("Methode | Datensatz")
plt.tight_layout()
plt.show()

plt.figure(figsize=(14,6))
sns.boxplot(data=df_results, x="method", y="quality")
plt.xticks(rotation=45, ha="right")
plt.title("Qualitätsfehler (L2-Abweichung) je Methode und Datensatz")
plt.ylabel("L2-Abweichung zum Referenz")
plt.xlabel("Methode | Datensatz")
plt.tight_layout()
plt.show()

plt.figure(figsize=(16,8))
sns.violinplot(data=df_results, x="method", y="runtime", inner="quartile", color="skyblue")
plt.xticks(rotation=45, ha="right")
plt.title("Verteilung der Laufzeiten je Methode und Datensatz")
plt.ylabel("Laufzeit (Sekunden)")
plt.xlabel("Methode | Datensatz")
plt.tight_layout()
plt.show()

plt.figure(figsize=(16,8))
sns.violinplot(data=df_results, x="method", y="quality", inner="quartile", color="lightcoral")
plt.xticks(rotation=45, ha="right")
plt.title("Verteilung der Qualitätsfehler (L2) je Methode und Datensatz")
plt.ylabel("L2-Abweichung zum Referenz")
plt.xlabel("Methode | Datensatz")
plt.tight_layout()
plt.show()
