Install TreeInterpreter <br>
[Github](https://github.com/andosa/treeinterpreter) <br>
[Blog Explainer](http://blog.datadive.net/interpreting-random-forests/)


In [None]:
!pip install treeinterpreter

Setting up


In [None]:
from treeinterpreter import treeinterpreter as ti
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import numpy as np
 #Load Dataset
from sklearn.datasets import load_boston
boston = load_boston()
#Set model type as Random Forest Regressor
rf = RandomForestRegressor()

#Calculating bias and feature prediction for two rows of data

In [None]:
# Lets use the first 300 rows of data
X = boston.data(:300) # Input Features
y = boston.target(:300) # Target Vector
rf.fit(boston.data[X, y) # Fit the model on X and y

In [None]:
instances = boston.data[[300, 309]]
#print(instances) # Two rows of houses
print ("Instance 0 prediction:", rf.predict([instances[0]]))
print ("Instance 1 prediction:", rf.predict([instances[1]]))

In [None]:
# Use TreeInterpreter to caluclate bias and contributions
prediction, bias, contributions = ti.predict(rf, instances)
# Finding bias and contributions for each prediction 
for i in range(len(instances)):
    print ("Instance", i)
    print ("Bias (trainset mean)", bias[i])
    print ("Feature contributions:")
    for contri, feature in sorted(zip(contributions[i], 
                                 boston.feature_names), 
                             key=lambda x: -abs(x[0])):
        print (feature, round(contri, 2))
    print ("-"*20 )

Verifiying the methodology by - <br>
Bias + Contribution = Prediction <br>
Which proves correct. ( As seen by comapring with previously calculated values)

In [None]:
print (prediction)
print (bias + np.sum(contributions, axis=1))

# Calculating feature contributions of the whole model


Splitting dataset into two (~100 rows)



In [None]:
ds1 = boston.data[300:400]
ds2 = boston.data[400:]
 
print (np.mean(rf.predict(ds1)))
print (np.mean(rf.predict(ds2)))

Predicting biases and contributions for both

In [None]:
prediction1, bias1, contributions1 = ti.predict(rf, ds1)
prediction2, bias2, contributions2 = ti.predict(rf, ds2)

Calculcating average contributions for each feature

In [None]:
totalc1 = np.mean(contributions1, axis=0) 
totalc2 = np.mean(contributions2, axis=0) 

In [None]:
print (np.sum(totalc1 - totalc2))
print (np.mean(prediction1) - np.mean(prediction2))

The sum of the feature contribution differences should be equal to the difference in average prediction

In [None]:
for c, feature in sorted(zip(totalc1 - totalc2, 
                             boston.feature_names), reverse=True):
    print (feature, round(c, 2))