# Example: n-Shapley Values for a Gradient Boosted Tree on the Folktables Income data set

In [None]:
import xgboost

from folktables import ACSDataSource, ACSIncome

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")
sns.set_context("notebook", rc={'axes.linewidth': 2, 'grid.linewidth': 1},  font_scale=1.5)

import nshap

### Load the data

In [None]:
data_source = ACSDataSource(survey_year='2016',
                            horizon = '1-Year', 
                            survey = 'person',
                            root_dir = '../data/')
data = data_source.get_data(states=["CA"], download=True)
X, Y, _ = ACSIncome.df_to_numpy(data)                            
feature_names = ACSIncome.features
    
# zero mean and unit variance for all features
X = StandardScaler().fit_transform(X)

# train-test split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, random_state=0)

### Train the classifier

In [None]:
gbtree = xgboost.XGBClassifier()
gbtree.fit(X_train, Y_train)
print(f'Accuracy: {accuracy_score(Y_test, gbtree.predict(X_test)):0.3f}')

### Define the value function

In [None]:
vfunc = nshap.vfunc.interventional_shap(gbtree.predict_proba, X_train, target=0)

### Compute n-Shapley Values

In [None]:
%%time
n_shapley_values = nshap.n_shapley_values(X_test[0, :], vfunc)
#n_shapley_values = nshap.load('n-shapley-values.json')

In [None]:
n_shapley_values.plot(feature_names = feature_names)

#### From the n-Shapley Values, we can obtain the 3-Shapley Values

In [None]:
n_shapley_values.k_shapley_values(3).plot(feature_names = feature_names)
plt.show()

#### ... Shapley Interaction Values

In [None]:
n_shapley_values.k_shapley_values(2).plot(feature_names = feature_names)

#### ... and the usual Shapley Values

In [None]:
import shap
shap.initjs()

In [None]:
shap.force_plot(vfunc(X_test[0,:], []), n_shapley_values.shapley_values())

### Let's compare this to the Shapley Values from the shap package

In [None]:
explainer = shap.KernelExplainer(gbtree.predict_proba, shap.kmeans(X_train, 25))

shap_values = explainer.shap_values(X_test[0, :])

In [None]:
shap.force_plot(explainer.expected_value[0], shap_values[0])