# Shapley value computation

We consider a monte carlo (or quasi-monte carlo) estimation technique before jumping to the discrete uniforme shapley value as proposed by the article.

We imagine a scenario where k players each have some data and need to pool it to collaborate on tasks :

1 - MNIST digit classification with a twist : some of the data is noisy and not all players have thus the same quality or quantity of data.

2 - An XGBoost regressor : the data is squeaky clean and well adjusted for each. The goal is to compare the featue importances with those already implemented in XGB


In [7]:
import shap
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
from xgboost import XGBClassifier

# Generate synthetic data
np.random.seed(42)
data = {
    'Feature1': np.random.normal(loc=0, scale=1, size=100),
    'Feature2': np.random.normal(loc=2, scale=1.5, size=100),
    'Feature3': np.random.uniform(low=-1, high=1, size=100)
}

# Create a DataFrame
df = pd.DataFrame(data)

# Generate a target variable with some dependency
df['Target'] = df['Feature1'] * 0.5 + df['Feature2'] * 1.5 + np.random.normal(loc=0, scale=1, size=100)

X = df.drop('Target', axis=1)
y = df['Target']

# transform the target to category by binning
y = pd.qcut(y, q=2, labels=False)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model = XGBClassifier().fit(X_train, y_train)

# Create the explainer and SHAP values
explainer = shap.Explainer(model)
shap_values = explainer(X_train)

# Calculate mean absolute SHAP values for each feature
shap_sum = np.abs(shap_values.values).mean(axis=0)
importance_df = pd.DataFrame([X_train.columns.tolist(), shap_sum.tolist()]).T
importance_df.columns = ['feature', 'shap_importance']

# Sort features by importance
importance_df = importance_df.sort_values('shap_importance', ascending=False)

# Select top features (here there are only 3)
selected_features = importance_df.head(10)['feature'].tolist()

# show the results
print(importance_df)
print(selected_features)

    feature shap_importance
1  Feature2        2.975537
0  Feature1        0.516939
2  Feature3        0.379312
['Feature2', 'Feature1', 'Feature3']


# Dataset degradation

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from scipy import stats
from utils import DU_mnist_ShapleyCalculator, run_experiment, add_noise, apply_blur, occlude_data, downsample_data, mislabel_data

# --- Configuration des scénarios et exécution des expériences ---

scenarios = {
    "noise": [0.1, 0.2, 0.3],
    "blur": [3, 5, 7],
    "occlusion": [0.1, 0.2, 0.3],
    "downsampling": [2, 3, 4],
    "mislabeling": [0.1, 0.2, 0.3]
}

num_players = [5, 10]  # Nombre de joueurs à tester

all_results = []
for I in num_players:
    for degradation_type, levels in scenarios.items():
        for level in levels:
            results = run_experiment(I, degradation_type, level)
            all_results.append(results)

# --- Analyse et visualisation des résultats ---

df = pd.DataFrame()
for r in all_results:
    for i in range(r['I']):
        for rep in range(len(r['DU-Shapley'])):  # Parcourir les répétitions
            df = df.append({'DU-Shapley': r['DU-Shapley'][rep][i],
                            'True Shapley': r['True Shapley'][rep][i],
                            'degradation_type': r['degradation_type'],
                            'degradation_level': r['degradation_level'],
                            'I': r['I'],
                            'Player': i + 1,
                            'Repetition': rep + 1}, ignore_index=True)

# 1. Comparaison des distributions de DU-Shapley et True Shapley
plt.figure(figsize=(12, 6))
sns.boxplot(x='degradation_type', y='DU-Shapley', hue='degradation_level', data=df)
plt.title("Distribution de DU-Shapley par type et niveau de dégradation")
plt.show()

plt.figure(figsize=(12, 6))
sns.boxplot(x='degradation_type', y='True Shapley', hue='degradation_level', data=df)
plt.title("Distribution de la vraie valeur de Shapley par type et niveau de dégradation")
plt.show()

# 2. Analyse du biais de DU-Shapley
df['Difference'] = df['DU-Shapley'] - df['True Shapley']
plt.figure(figsize=(12, 6))
sns.boxplot(x='degradation_type', y='Difference', hue='degradation_level', data=df)
plt.title("Biais de DU-Shapley (DU-Shapley - True Shapley) par type et niveau de dégradation")
plt.axhline(y=0, color='r', linestyle='--')  # Ligne de référence pour un biais nul
plt.show()

# 3. Calcul de la corrélation
for degradation_type in scenarios.keys():
    for level in scenarios[degradation_type]:
        for I in num_players:
            subset = df[(df['degradation_type'] == degradation_type) & (df['degradation_level'] == level) & (df['I'] == I)]
            correlation = subset['DU-Shapley'].corr(subset['True Shapley'])
            print(f"Corrélation entre DU-Shapley et True Shapley ({degradation_type}, niveau {level}, I={I}): {correlation:.3f}")

# 4. Visualisation des résultats (exemple: impact du nombre de joueurs)
plt.figure(figsize=(12, 6))
sns.boxplot(x='I', y='Difference', hue='degradation_type', data=df)
plt.title("Biais de DU-Shapley en fonction du nombre de joueurs")
plt.axhline(y=0, color='r', linestyle='--')
plt.show()

# --- Tests statistiques ---

# 1. Test t pour comparer DU-Shapley et True Shapley pour chaque scénario
for degradation_type in scenarios.keys():
    for level in scenarios[degradation_type]:
        for I in num_players:
            subset = df[(df['degradation_type'] == degradation_type) & (df['degradation_level'] == level) & (df['I'] == I)]
            t_statistic, p_value = stats.ttest_rel(subset['DU-Shapley'], subset['True Shapley'])
            print(f"Test t pour {degradation_type}, niveau {level}, I={I}:")
            print(f"  Statistique t: {t_statistic:.3f}")
            print(f"  Valeur p: {p_value:.3f}")

# 2. ANOVA pour tester l'effet du niveau de dégradation sur le biais de DU-Shapley
for degradation_type in scenarios.keys():
    for I in num_players:
        subset = df[(df['degradation_type'] == degradation_type) & (df['I'] == I)]
        groups = subset['degradation_level'].unique()
        f_statistic, p_value = stats.f_oneway(*[subset['Difference'][subset['degradation_level'] == g] for g in groups])
        print(f"ANOVA pour l'effet du niveau de {degradation_type} sur le biais, I={I}:")
        print(f"  Statistique F: {f_statistic:.3f}")
        print(f"  Valeur p: {p_value:.3f}")