In [1]:
import numpy as np
import pandas as pd

# import shap
import sklearn

import scipy.special
import itertools

In [2]:
df_data = pd.DataFrame(data={'A':[2,1,2,2],'B':[2,2,1,1],'C':[2,2,2,1]})
df_data

Unnamed: 0,A,B,C
0,2,2,2
1,1,2,2
2,2,1,2
3,2,1,1


In [3]:
dict_rules = {
    'rule1':'x>1',
    'rule2':'y>1',
    'rule3':'z>1'
}

In [4]:
def scoring_simul(row, activated_rules):
    rules_res = []
    if activated_rules[0]==1:
        x = row['A']
        rules_res.append(eval(dict_rules['rule1']))
    if activated_rules[1]==1:
        y = row['B']
        rules_res.append(eval(dict_rules['rule2']))
    if activated_rules[2]==1:
        z = row['C']
        rules_res.append(eval(dict_rules['rule3']))
    if False not in rules_res:
        return 1
    return 0

In [5]:
def predict_proba(activated_rules):
#     activated_rules = activated_rules.tolist()[0]
    label = df_data.apply(lambda x: scoring_simul(x, activated_rules), axis=1).values
    return np.sum(label)/len(label)

In [6]:
def proba_combinations(activated_rules_arr):
    return np.apply_along_axis(predict_proba, 1, activated_rules_arr)

**Weights**

From https://christophm.github.io/interpretable-ml-book/shap.html#definition:

"The intuition behind it is: We learn most about individual features if we can study their effects in isolation. If a coalition consists of a single feature, we can learn about this feature’s isolated main effect on the prediction. If a coalition consists of all but one feature, we can learn about this feature’s total effect (main effect plus feature interactions). If a coalition consists of half the features, we learn little about an individual feature’s contribution, as there are many possible coalitions with half of the features."

<br>

WARNING: it seems the research paper and the Medium articles do not refer to the same thing when mentionning weights.


<ins>In the research paper</ins> (implementation below): the Shap values are the results of the linear regression $\beta = (X^TWX)^{-1}XWy$ where $W$ is the weighted matrix ("Shapley Kernel"). The weights are computed as such:

$$\omega_{z'} = \frac{M-1}{C^n_{|z'|}|z'|(M-|z'|)}$$

where $z'$ is a coalition

<ins>In the Medium article</ins>: the weights are computed looking at the edges of the Powerset.

In [7]:
# Implementation from 
# https://shap.readthedocs.io/en/latest/example_notebooks/tabular_examples/model_agnostic/Simple%20Kernel%20SHAP.html

def powerset(iterable):
    s = list(iterable)
    return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s)+1))

def shapley_kernel(M,s):
    if s == 0 or s == M:
        return 10000
    return (M-1)/(scipy.special.binom(M,s)*s*(M-s))

def kernel_shap(f, x, reference, M):
    X = np.zeros((2**M,M+1))
    X[:,-1] = 1
    weights = np.zeros(2**M)
    V = np.zeros((2**M,M))
    for i in range(2**M):
        V[i,:] = reference

    for i,s in enumerate(powerset(range(M))):
        s = list(s)
        V[i,s] = x[s]
        X[i,s] = 1
        weights[i] = shapley_kernel(M,len(s))
    y = f(V)
    tmp = np.linalg.inv(np.dot(np.dot(X.T, np.diag(weights)), X))
    return np.dot(tmp, np.dot(np.dot(X.T, np.diag(weights)), y)) # Linear regression with kernel (weights)!

In [8]:
M = 3
reference = np.zeros(M)
phi = kernel_shap(proba_combinations, np.array([1,1,1]), reference, M)
shap_values = phi[:-1]

print("shap_values =", shap_values)

shap_values = [-0.25  -0.375 -0.125]
