In [35]:
import pandas as pd
import numpy as np
import tensorflow.keras as k
import tensorflow as tf

--## import and format data ##--

In [36]:
iris=pd.read_csv("C:\\Users\\nickolas.oddo\\Desktop\\Iris Dataset Integrated Gradients\\iris.csv")
iris_featurenames=list(iris.columns[:-1])
class_lib={0:"Iris-setosa", 1:"Iris-versicolor",2:"Iris-virginica" }
feature_lib={i:item for i,item in enumerate(iris_featurenames)}
X,Y=np.array(iris.iloc[:,:4]),np.array(iris.iloc[:,4])
Y=k.utils.to_categorical(Y)

--### create model and load weights ###--

In [37]:
folder_path='C:\\Users\\nickolas.oddo\\Desktop\\Iris Dataset Integrated Gradients\\model weights\\irismod'
model=k.models.load_model(folder_path)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     [(None, 4)]               0         
_________________________________________________________________
dense_layer (Dense)          (None, 10)                50        
_________________________________________________________________
output (Dense)               (None, 3)                 33        
Total params: 83
Trainable params: 83
Non-trainable params: 0
_________________________________________________________________


--### define functions for integrated gradients ###--

In [38]:
inputshape=X[np.newaxis,0].shape
outputshape=Y[np.newaxis,0].shape

#Using random baseline- can be used for expected gradients.
def baseline(input,sd=.25):
    noisy_input=input+np.random.normal(scale=sd,size=inputshape)
    noisy_input=noisy_input.reshape(inputshape)
    noisy_input=tf.convert_to_tensor(noisy_input,dtype='float32')
    return(noisy_input)

def convert_to_tensor(X,Y,input_index=0):
    x=np.array(X[input_index]).reshape(inputshape)
    y=np.array(Y[input_index,:]).reshape(outputshape)
#     b=np.array(baseline(x)).reshape((1,4))
    x=tf.convert_to_tensor(x, dtype='float32')
    y=tf.convert_to_tensor(y, dtype='float32')
#     b=tf.convert_to_tensor(b, dtype='float64')
    return(x,y)

#m_steps indicates the step size of the numerical integral to be calculated
m_steps=1000
alphas=tf.linspace(start=0.0,stop=1.0,num=m_steps+1)

def interpolate_features(baseline,features,alphas):
    alphas_x = alphas[:, tf.newaxis]
    alphas_x= tf.convert_to_tensor(alphas_x,dtype='float32')
    baseline_x =baseline
    # input_x = tf.expand_dims(features, axis=0)
    delta = features - baseline_x
    interp_feat = baseline_x +  alphas_x * delta
    return interp_feat

def compute_gradients(x,class_idx):
    with tf.GradientTape() as tape:
        tape.watch(x)
        yhat=model(x)[:,class_idx]
#         loss=tf.losses.categorical_crossentropy(y,yhat)
    grads=tape.gradient(yhat,x)
    return(grads)

def integral_approximation(gradients):
    # riemann_trapezoidal
    grads = (gradients[:-1] + gradients[1:]) / tf.constant(2.0)
    integrated_gradients = tf.math.reduce_mean(gradients, axis=0)
    return integrated_gradients

def integrated_gradients(X,Y,sample_idx,class_idx):
    x,y=convert_to_tensor(X,Y,sample_idx)
    b=baseline(X,sample_idx)
    interp_feat=interpolate_features(b,x,alphas)
    interp_grads=compute_gradients(interp_feat,class_idx)
    integ_gradients=(x-b)*integral_approximation(interp_grads)
    return(integ_gradients)

def calculate_integrated_gradients(input_index,baseline_vector,class_idx=0):
    x,y=convert_to_tensor(X,Y,input_index)
    b=baseline_vector
    interp_feat=interpolate_features(b,x,alphas)
    interp_grads=compute_gradients(interp_feat,class_idx)
    integ_gradients=(x-b)*integral_approximation(interp_grads)
    return(integ_gradients)

def repeated_ig(input_index,class_index,num_reps=10):
    a=calculate_integrated_gradients(input_index=input_index,class_idx=class_index).numpy()
    a=np.array([calculate_integrated_gradients(input_index=input_idx,class_idx=0).numpy() for i in range(1,num_reps)])
    return(np.mean(b,axis=0))

--### calculate integrated gradients ###--
integrated_gradients function:
X=features input
Y=target class
sample_idx= which observation to calculate on (0 to 149 observations) 
class_idx= which classification

In [48]:
input_idx=1 #which example to look at (0 through 150)

x,y=convert_to_tensor(X,Y,input_idx)
class_idx=np.argmax(model(x))
b=baseline(np.mean(X,axis=0)) #use mean of data with N(0,.25) noise
class_idx=np.argmax(model.predict(x))

ig=calculate_integrated_gradients(input_idx,b,class_idx)
tf.reduce_sum(ig).numpy()-(model(x)[class_idx]-model(b)[class_idx])

<tf.Tensor: shape=(3,), dtype=float32, numpy=array([-3.9875507e-04,  1.1775763e+00,  1.2191293e+00], dtype=float32)>

--### Convergence check: 
letting $\phi_i(x)$ be the integrated gradients feature attribution givien to feature $i$:

$\sum_{i=1}^k{\phi_i(x)}=F(x)-F(x{'})$ 

then
$\sum_{i=1}^k{\phi_i(x)}-[F(x)-F(x^\prime)] <\epsilon$  for some sufficiently small epsilon

in this case, the softmax output predicts three class probabilities.
A different feature attribution will be given to each class prediction probability.
showing the 
###--

In [54]:

f_x=model.predict(x)[0][class_idx]
f_b=model.predict(b)[0][class_idx]
ig=calculate_integrated_gradients(input_idx,class_idx)
# ig= repeated_ig(input_idx,class_idx,1)

conv_check=tf.reduce_sum(ig) - (f_x-f_b)
conv_check.numpy() #This should be close to zero
# calculate_integrated_gradients(input_idx,class_idx)

-0.24044162

In [51]:

a=ig
df=pd.DataFrame(a.numpy(), columns=iris_featurenames)
df.style.background_gradient(cmap='Greens', axis=1)

Unnamed: 0,sepal_l,sepal_w,petal_l,petal_w
0,0.854214,0.096945,-0.332911,-0.059522


In [198]:
repeated_ig(input_idx,class_idx,100

array([[-0.00019928,  0.00041836,  0.00290322,  0.01082668]],
      dtype=float32)

In [200]:
np.mean(b,axis=0)

array([[-0.00079678, -0.00023994,  0.00558045,  0.00111783]],
      dtype=float32)

In [178]:
input_idx=2
print(calculate_integrated_gradients(input_index=input_idx,class_idx=0))
print("predicted class: ",class_lib[np.argmax(model.predict(X[np.newaxis,input_idx]))])
print("true class: ", class_lib[np.argmax(Y[input_idx])])


tf.Tensor([[-0.04914541 -0.00057522  0.00023594  0.06285616]], shape=(1, 4), dtype=float32)
predicted class:  Iris-setosa
true class:  Iris-setosa


In [48]:
sum(model.predict(x))
integ_gradients

<tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0.04584731, 0.00743598, 0.05995739, 0.03946051]], dtype=float32)>

In [36]:
integ_gradients.numpy

<bound method _EagerTensorBase.numpy of <tf.Tensor: shape=(1, 4), dtype=float32, numpy=array([[0.04584731, 0.00743598, 0.05995739, 0.03946051]], dtype=float32)>>

<tf.Tensor: shape=(3,), dtype=int64, numpy=array([0, 0, 0], dtype=int64)>