In [1]:
from sklearn.model_selection import train_test_split
from sklearn import datasets, svm
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
import numpy as np
from math import sqrt
from shapleycomposition import ShapleyExplainer
from composition_stats import ilr, sbp_basis
from bifurc_tree import create_tree_from_sbp, init_graph, build_graph
import plotly.graph_objects as go

In [2]:
K = 10         #Index of the instance you want to test in the test set
N_class = 4    #Number of class, the dimension of the simplex is therefore N_class-1
N_feat  = 6    #In this example, since the number of feature of the digit dataset is quite large (64), we propose to reduce it with a PCA

#load the dataset, take a subset of N_class classes, scale it and split into a training and testing set
X, Y = datasets.load_digits(return_X_y=True)
subset_i = np.where(Y < N_class)
X = X[subset_i]
Y = Y[subset_i]
X = scale(X)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

#Reduce the number of feature to N_feat with a PCA
pca = PCA(n_components=N_feat)
X_train = pca.fit_transform(X_train)
X_test  = pca.transform(X_test)


#train an SVM classifier
svc_linear = svm.SVC(kernel='rbf', probability=True)
svc_linear.fit(X_train, Y_train)

SVC(probability=True)

In [3]:
# explain all the predictions in the test set
explainer = ShapleyExplainer(svc_linear.predict_proba, X_train, N_class)
(shapley, base) = explainer.explain_instance(np.array(X_test[K]))

print("True label of the tested instance: ", end="")
print(Y_test[K])

#get the model prediction for the Kth instance of the test partition
pred = svc_linear.predict_proba(X_test[K].reshape(1,-1))
print("Prediction on the simplex: ",end="")
print(pred)
ilr_pred = ilr(pred)
print("Prediction in the ILR space: ",end="")
print(ilr_pred)

#The sum of the base distribution and the shapley composition in the ILR space is equal to the predicted probability distribution
sum_shap_base = np.array(shapley).sum(axis=0)+base
print("Sum of the shapley composition and the base distribution in the ILR space: ", end="")
print(sum_shap_base)

True label of the tested instance: 0
Prediction on the simplex: [[9.95656723e-01 5.01023879e-04 1.96691252e-03 1.87534040e-03]]
Prediction in the ILR space: [5.37012532 1.98382986 1.4440672 ]
Sum of the shapley composition and the base distribution in the ILR space: [5.37012528 1.98382982 1.44406723]


In [4]:
#SUMMARIZE WITH NORM, COSINE AND INNER PRODUCTS

(norm_shapley, proj_shap_class, cos_shap_shap) = explainer.summarize()


List of the features sorted by their Shapley strength (norm of their Shapley composition):
	 feature n.1: 4.4612056
	 feature n.3: 1.7281931
	 feature n.2: 0.923722
	 feature n.4: 0.3892875
	 feature n.5: 0.1527009
	 feature n.6: 0.1167266

Projection of the Shapley compositions on the class vectors:
		feat. n.1 	feat. n.2 	feat. n.3 	feat. n.4 	feat. n.5 	feat. n.6 	
class 1:  	4.2776829	0.4515825	0.9486388	0.3054007	0.1149726	0.0902013	
class 2:  	-1.0946137	-0.9100515	0.0673990	-0.2164928	-0.0981858	-0.0883336	
class 3:  	-2.5849525	0.2444913	0.6237005	0.1257939	0.0552071	-0.0342952	
class 4:  	-0.5981168	0.2139777	-1.6397383	-0.2147018	-0.0719939	0.0324275	

Cosine between each Shapley compositions:
		feat. n.1 	feat. n.2 	feat. n.3 	feat. n.4 	feat. n.5 	feat. n.6 	
feat. n. 1:	1.0000000	0.3945544	0.3261474	0.5815492	0.5500838	0.7947321	
feat. n. 2:	0.3945544	1.0000000	0.0792457	0.6668934	0.7410307	0.8324435	
feat. n. 3:	0.3261474	0.0792457	1.0000000	0.7866459	0.7245269	0.0187845	

In [6]:
#PLOT the 3D ILR SPACE (CORRESPONDING TO THE CHOSEN ILR COMPONENTS LISTED IN BALANCES).
#plot range [-lim, lim]
#If shapley_sum is True, the sum of the shapley vectors are summed from the base distribution to the prediction

fig = explainer.plot_ilr_space(balances=[1, 2, 3], lim=6, figsize=500,
                               names_classes=['0','1','2','3'], 
                               names_features=['1st prin. comp.','2nd prin. comp.','3rd prin. comp.','4th prin. comp.','5th prin. comp.','6th prin. comp.'])

fig = explainer.plot_ilr_space(balances=[1, 2, 3], shapley_sum=True, lim=6, figsize=500,
                               names_classes=['0','1','2','3'], 
                               names_features=['1st prin. comp.','2nd prin. comp.','3rd prin. comp.','4th prin. comp.','5th prin. comp.','6th prin. comp.'])
