```markdown
We are going to use Microsoft's `interpret` library and `lime` from that library here. Seems like it only works with binary classification problem. For multiclass classification it doesn't work.
```

In [68]:
import time
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score

In [69]:
data_set = load_breast_cancer()
data, target = data_set.data, data_set.target
feature_names = data_set.feature_names
print(feature_names)
print(f"Whole Data: {data.shape}")

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3, random_state=42)

print(f"Trainset: {X_train.shape}")
print(f"Testset: {X_test.shape}")

['mean radius' 'mean texture' 'mean perimeter' 'mean area'
 'mean smoothness' 'mean compactness' 'mean concavity'
 'mean concave points' 'mean symmetry' 'mean fractal dimension'
 'radius error' 'texture error' 'perimeter error' 'area error'
 'smoothness error' 'compactness error' 'concavity error'
 'concave points error' 'symmetry error' 'fractal dimension error'
 'worst radius' 'worst texture' 'worst perimeter' 'worst area'
 'worst smoothness' 'worst compactness' 'worst concavity'
 'worst concave points' 'worst symmetry' 'worst fractal dimension']
Whole Data: (569, 30)
Trainset: (398, 30)
Testset: (171, 30)


In [70]:
# Train the model

start = time.time()
print("Training the model...")
model = RandomForestClassifier(n_estimators=40, random_state=42)
model.fit(X_train, y_train)
print(f"Training time: {time.time() - start}")

Training the model...
Training time: 0.24277782440185547


In [71]:
# Predict the test set and calculate the accuracy
y_pred = model.predict(X_test)
print("Accuracy: ", accuracy_score(y_test, y_pred))
print("F1 score: ", f1_score(y_test, y_pred, average='macro'))


Accuracy:  0.9707602339181286
F1 score:  0.9682592716338123


In [72]:
# Explain the model using interpret
from interpret import show
from interpret.blackbox import LimeTabular

lime = LimeTabular(model, X_train, random_state=42, feature_names=feature_names)
lime_local = lime.explain_local(X_test[:5], y_test[:5])
show(lime_local, 0)