# Purpose of notebook
This notebook examplifies the replication of the shapleyPermEx and shapleyPermRand functions from the R package 'sensitivity'. I implement the functions to see if we get the same results as the R package in order to consequently apply the methods to the respy model.

In [1]:
# import neccesary packages
import numpy as np
import openturns as ot
import pandas as pd

## Shapley function

In [2]:
# define shapley function as in the R package
def ShapleyPerm(method, m, model, Xall, Xcond, d, Nv, No, Ni=3):
    """
    """
    if (method == 'exact'):
        perms_tool = ot.KPermutations(d, d)
        perms = perms_tool.generate()

        m = perms.getSize() # number of permutation
    else:
        perms = np.zeros((m,d), dtype = np.int64)
        for i in range(m):
            perms[i] = np.random.permutation(d) # random permutation
    
    #------------------------------
    # Creation of the design matrix
    #------------------------------
    
    X = np.zeros((Nv+m*(d-1)*No*Ni, d)) 
    X[:Nv,:] = Xall(Nv)

    for p in range(m):
    
        pi = perms[p]
        pi_sorted = np.argsort(pi)
    
        for j in range(1,d):
        
            Sj = pi[:j] # set of the 0st-(j-1)th elements in pi      
            Sjc = pi[j:] # set of the jth-dth elements in pi
        
            xjcM = Xcond(No,Sjc,None,None)# sampled values of the inputs in Sjc

            for l in range(No):
                
                xjc = xjcM[l,:]
            
                # sample values of inputs in Sj conditional on xjc
                xj = Xcond(Ni, Sj, Sjc, xjc)
                xx = np.concatenate((xj, np.ones((Ni,1))*xjc), axis = 1)
                ind_inner = Nv + p*(d-1)*No*Ni + (j-1)*No*Ni + l*Ni
                X[ind_inner:(ind_inner + Ni),:] = xx[:,pi_sorted]
    
    #-----------------------
    # Calcul of the response
    #-----------------------
    
    y = model(X)

    #-----------------------------------------------------------------
    # Initialize Shapley, main and total Sobol effects for all players
    #-----------------------------------------------------------------
    
    Sh = np.zeros(d)
    Vsob = np.zeros(d)
    Tsob = np.zeros(d)
    
    nV = np.zeros(d) # number of samples used to estimate V1,...,Vd
    nT = np.zeros(d) # number of samples used to estimate T1,...,Td
    
    #----------------
    # Estimate Var[Y]
    #----------------
    
    Y = y[:Nv]
    y = y[Nv:]
    EY = np.mean(Y)
    VarY = np.var(Y)

    #-----------------------------------------------
    # Estimate Shapley, main and total Sobol effects
    #-----------------------------------------------
    
    cVar = np.zeros(No)

    for p in range(m):
    
        pi = perms[p]
        prevC = 0
    
        for j in range(d):
            if (j == (d-1)):
                Chat = VarY
                delta = Chat - prevC
                Vsob[pi[j]] = Vsob[pi[j]] + prevC # first order effect
                nV[pi[j]] = nV[pi[j]] + 1
            else:
                for l in range(No):
                    Y = y[:Ni]
                    y = y[Ni:]
                    cVar[l] = np.var(Y)
                Chat = np.mean(cVar)
                delta = Chat - prevC
      
            Sh[pi[j]] = Sh[pi[j]] + delta
        
            prevC = Chat
        
            if (j == 0):
                Tsob[pi[j]] = Tsob[pi[j]] + Chat # Total effect
                nT[pi[j]] = nT[pi[j]] + 1
    
    Sh = Sh / m / VarY
    
    if (method == 'exact'):
        Vsob = Vsob / (m/d) / VarY # averaging by number of permutations with j=d-1
        Vsob = 1 - Vsob 
        Tsob = Tsob / (m/d) / VarY # averaging by number of permutations with j=1 
    else:
        Vsob = Vsob / nV / VarY # averaging by number of permutations with j=d-1
        Vsob = 1 - Vsob 
        Tsob = Tsob / nT / VarY # averaging by number of permutations with j=1 
    
    col = ['X' + str(i) for i in np.arange(d)+1]
    effects = pd.DataFrame(np.array([Sh,Vsob,Tsob]), index = ['Shapley effects', 'First order Sobol', 'Total Sobol'], columns = col)

    return effects

# Conditional sampling functions
These funcitons estimate the conditional vector for use in the shapley function

In [3]:
# Calculate conditional mean and variance
def condMVN(mean, cov, dependent_ind, given_ind, X_given):
    """ Returns conditional mean and variance of X[dependent.ind] | X[given.ind] = X.given
    where X is multivariateNormal(mean = mean, covariance = cov)"""
    
    cov = np.array(cov)
    
    B = cov.take(dependent_ind, axis = 1)
    B = B[dependent_ind]
    
    C = cov.take(dependent_ind, axis = 1)
    C = C[given_ind]
    
    D = cov.take(given_ind, axis = 1)
    D = D[given_ind]
    
    CDinv = np.dot(np.transpose(C),np.linalg.inv(D))
    
    condMean = mean[dependent_ind] + np.dot(CDinv,(X_given - mean[given_ind]))
    condVar = B - np.dot(CDinv,C)
    condVar = ot.CovarianceMatrix(condVar)
    
    return condMean,condVar

#condMVN(moyenne, cov, dependent_ind = np.array([1,0]), given_ind= np.array([2]), X_given= np.array([10]))

In [4]:
# Generate conditional law
def r_condMVN(n, mean, cov, dependent_ind, given_ind, X_given):
    """ Function to simulate conditional gaussian distribution of X[dependent.ind] | X[given.ind] = X.given
    where X is multivariateNormal(mean = mean, covariance = cov)"""
    
    cond_mean,cond_var = condMVN(mean, cov, dependent_ind, given_ind, X_given)
    distribution = ot.Normal(cond_mean,cond_var)
    return distribution.getSample(n)

## Evaluation Shapley effects on linear test model model

In [5]:
# Ni = 3
def gaussian_model(X):
    return np.sum(X,1)

d = 3
moyenne = np.zeros(3)
cov = np.array([[1.0, 0, 0], [0, 1.0, 1.8], [0, 1.8, 4.0]])
cov = ot.CovarianceMatrix(cov)

def Xall(n):
    distribution = ot.Normal(moyenne,cov)
    return distribution.getSample(n)

def Xcond(n, Sj, Sjc, xjc):
    if Sjc is None:
        cov_int = np.array(cov)
        cov_int = cov_int.take(Sj, axis = 1)
        cov_int = cov_int[Sj]        
        cov_int = ot.CovarianceMatrix(cov_int)
        distribution = ot.Normal(moyenne[Sj],cov_int)
        return distribution.getSample(n)
    else:
        return r_condMVN(n,mean = moyenne, cov = cov, dependent_ind = Sj, given_ind = Sjc, X_given = xjc)

# Exact method
method = 'exact'
m = None
Nv = 10**4
No = 10**3
Ni = 3

index = ShapleyPerm(method,m,gaussian_model, Xall, Xcond, d, Nv, No, Ni)
print('Exact method \n' + str(index) + '\n\n')

# Random method
method = 'random'
m = 6000
Nv = 10**4
No = 1
Ni = 3

index = ShapleyPerm(method,m,gaussian_model, Xall, Xcond, d, Nv, No, Ni)
print('Random method \n' + str(index) + '\n')

Exact method 
                         X1        X2        X3
Shapley effects    0.179902  0.393545  0.426553
First order Sobol  0.399626  0.874549  0.913776
Total Sobol        0.068521  0.013683  0.052918


Random method 
                         X1        X2        X3
Shapley effects    0.187076  0.395449  0.417475
First order Sobol  0.429143  0.877724  0.920397
Total Sobol        0.067125  0.013176  0.051249



In [6]:
# Ni = 100
def gaussian_model(X):
    return np.sum(X,1)

d = 3
moyenne = np.zeros(3)
cov = np.array([[1.0, 0, 0], [0, 1.0, 1.8], [0, 1.8, 4.0]])
cov = ot.CovarianceMatrix(cov)

def Xall(n):
    distribution = ot.Normal(moyenne,cov)
    return distribution.getSample(n)

def Xcond(n, Sj, Sjc, xjc):
    if Sjc is None:
        cov_int = np.array(cov)
        cov_int = cov_int.take(Sj, axis = 1)
        cov_int = cov_int[Sj]        
        cov_int = ot.CovarianceMatrix(cov_int)
        distribution = ot.Normal(moyenne[Sj],cov_int)
        return distribution.getSample(n)
    else:
        return r_condMVN(n,mean = moyenne, cov = cov, dependent_ind = Sj, given_ind = Sjc, X_given = xjc)

# Exact method
method = 'exact'
m = None
Nv = 10**4
No = 10**3
Ni = 100

index = ShapleyPerm(method,m,gaussian_model, Xall, Xcond, d, Nv, No, Ni)
print('Exact method \n' + str(index) + '\n\n')

# Random method
method = 'random'
m = 6000
Nv = 10**4
No = 1
Ni = 100

index = ShapleyPerm(method,m,gaussian_model, Xall, Xcond, d, Nv, No, Ni)
print('Random method \n' + str(index) + '\n')

Exact method 
                         X1        X2        X3
Shapley effects    0.103177  0.419059  0.477764
First order Sobol  0.101765  0.817796  0.876114
Total Sobol        0.104323  0.019857  0.079092


Random method 
                         X1        X2        X3
Shapley effects    0.107001  0.408174  0.484825
First order Sobol  0.114354  0.818659  0.876980
Total Sobol        0.103367  0.019398  0.079125

