In [2]:
!pip install interpret





In [3]:
import pandas as pd             # data mnipulation
import numpy as np              # number manipulation/crunching
import matplotlib.pyplot as plt # plotting
# Classification report
from sklearn.metrics import classification_report 
# Train Test split
from sklearn.model_selection import train_test_split
# Random forest classifier
from sklearn.ensemble import RandomForestClassifier

In [4]:
diabetes = pd.read_csv("Data.csv")
diabetes.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
Y = diabetes['Outcome']
X = diabetes[['Pregnancies','Glucose','BloodPressure', 'SkinThickness', 'Insulin','BMI','DiabetesPedigreeFunction','Age']]
X_featurenames = X.columns
# Split the data into train and test data:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2)

In [6]:
# Build the model with the random forest regression algorithm:
model = RandomForestClassifier(max_depth = 20, random_state = 0, n_estimators = 100)
model.fit(X_train, Y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=20, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [7]:
y_pred = model.predict(X_test)
print(classification_report(Y_test, y_pred, target_names=["Diabetes -ve", "Diabetes +ve"]))

              precision    recall  f1-score   support

Diabetes -ve       0.80      0.85      0.82       101
Diabetes +ve       0.67      0.58      0.63        53

    accuracy                           0.76       154
   macro avg       0.74      0.72      0.72       154
weighted avg       0.75      0.76      0.76       154



### Show blackbox model performance 

In [9]:
from interpret import show
from interpret.perf import ROC

blackbox_perf = ROC(model.predict_proba).explain_perf(X_test, Y_test, name='Blackbox')
show(blackbox_perf)

### Global Explainations 

In [11]:
from interpret.blackbox import MorrisSensitivity

sensitivity = MorrisSensitivity(predict_fn=model.predict_proba, data=X_train)
sensitivity_global = sensitivity.explain_global(name="Global Sensitivity")

show(sensitivity_global)

In [13]:
from interpret.blackbox import PartialDependence

pdp = PartialDependence(predict_fn=model.predict_proba, data=X_train)
pdp_global = pdp.explain_global(name='Partial Dependence')

show(pdp_global)

### Local explainations

In [14]:
from interpret.blackbox import LimeTabular
from interpret import show

#Blackbox explainers need a predict function, and optionally a dataset
lime = LimeTabular(predict_fn=model.predict_proba, data=X_train, random_state=1)

#Pick the instances to explain, optionally pass in labels if you have them
lime_local = lime.explain_local(X_test[:5], Y_test[:5], name='LIME')

show(lime_local)

In [17]:
show([blackbox_perf, lime_local, sensitivity_global, pdp_global])