In [None]:
"""
ANCHOR EXPLANATIONS – IRIS DATASET (TABULAR EXAMPLE)

This script:
1. Loads the Iris dataset.
2. Trains a Random Forest classifier (treated as a black-box model).
3. Builds an AnchorTabular explainer using Alibi.
4. Generates an anchor explanation (IF-rule) for one test instance.
5. Prints a human-readable summary of the explanation.
"""

# ================================
# STEP 0 – Install dependencies
# ================================
# Run this ONCE in a notebook/Colab if these are not installed:
# !pip install alibi scikit-learn


In [12]:
# ================================
# STEP 1 – Import libraries
# Summary:
#   - We import scikit-learn for the dataset and model,
#   - and Alibi for the AnchorTabular explainer.
# ================================

import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from alibi.explainers import AnchorTabular


In [13]:
# ================================
# STEP 2 – Load the Iris dataset
# Summary:
#   - X: feature matrix (measurements of iris flowers)
#   - y: target labels (species IDs)
#   - feature_names: names of the input features (columns)
#   - class_names: human-readable class labels (species names)
# ================================

iris = load_iris()
X = iris.data
y = iris.target
feature_names = iris.feature_names
class_names = iris.target_names

print("Features:", feature_names)
print("Classes :", class_names)
print("Data shape (rows, cols):", X.shape)


Features: ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Classes : ['setosa' 'versicolor' 'virginica']
Data shape (rows, cols): (150, 4)


In [14]:
# ================================
# STEP 3 – Train–test split
# Summary:
#   - Split the data into training and test sets.
#   - We'll train the black-box model on the training set
#     and use the test set for both evaluation and explanation.
# ================================

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Train shape:", X_train.shape)
print("Test shape :", X_test.shape)


Train shape: (120, 4)
Test shape : (30, 4)


In [15]:
# ================================
# STEP 4 – Train the black-box model
# Summary:
#   - We use a RandomForestClassifier as our black-box model.
#   - We will NOT look inside its trees; we only use its predictions.
# ================================

model = RandomForestClassifier(
    n_estimators=200,
    random_state=42
)
model.fit(X_train, y_train)

train_acc = model.score(X_train, y_train)
test_acc = model.score(X_test, y_test)

print(f"Train accuracy: {train_acc:.3f}")
print(f"Test accuracy : {test_acc:.3f}")


Train accuracy: 1.000
Test accuracy : 0.900


In [17]:
# ================================
# STEP 5 – Define predict_fn and build Anchor explainer
# Summary:
#   - AnchorTabular expects a prediction function that returns CLASS LABELS.
#   - We create predict_fn(x) that calls model.predict(x).
#   - Then we configure AnchorTabular with feature names and (empty) categorical info.
#   - Finally, we fit the explainer so it can learn how to discretize features.
# ================================

# Prediction function for the explainer (must return labels, not probabilities)
def predict_fn(x: np.ndarray) -> np.ndarray:
    """
    Wrapper around the trained model's predict method.
    x: 2D numpy array of shape (n_samples, n_features)
    returns: 1D array of class labels
    """
    return model.predict(x)

# No categorical features in Iris; everything is numeric
categorical_names = {}

explainer = AnchorTabular(
    predictor=predict_fn,
    feature_names=feature_names,
    categorical_names=categorical_names
)

# disc_perc defines percentiles used to bucket continuous features into bins
explainer.fit(X_train, disc_perc=(25, 50, 75))

print("AnchorTabular explainer is fitted and ready.")

AnchorTabular explainer is fitted and ready.


In [18]:
# ================================
# STEP 6 – Choose a test instance to explain
# Summary:
#   - We pick one flower from the test set.
#   - We show its feature values.
#   - We get the model's predicted class and the true class.
# ================================

# Change index i to explain a different sample
i = 0
instance = X_test[i].reshape(1, -1)

print("Instance feature values:")
for name, value in zip(feature_names, instance[0]):
    print(f"  {name:20s}: {value:.2f}")

pred_label = model.predict(instance)[0]
true_label = y_test[i]

print("\nModel predicted class:", class_names[pred_label])
print("True class           :", class_names[true_label])

Instance feature values:
  sepal length (cm)   : 4.40
  sepal width (cm)    : 3.00
  petal length (cm)   : 1.30
  petal width (cm)    : 0.20

Model predicted class: setosa
True class           : setosa


In [19]:
# ================================
# STEP 7 – Generate Anchor explanation
# Summary:
#   - We ask AnchorTabular to find an anchor (IF-rule) for this instance.
#   - threshold = 0.95 means:
#       We want a rule such that, when it holds, the model
#       predicts the same class at least 95% of the time.
#   - The result includes:
#       * explanation.anchor   -> the IF-conditions (list of strings)
#       * explanation.precision -> how often the prediction is stable
#       * explanation.coverage  -> how often the rule applies in the data
# ================================

explanation = explainer.explain(
    instance[0],
    threshold=0.95
)

print("\nANCHOR IF-RULE:")
print(" AND ".join(explanation.anchor))

print("\nApproximate precision:", explanation.precision)
print("Approximate coverage :", explanation.coverage)



ANCHOR IF-RULE:
petal length (cm) <= 1.60

Approximate precision: 1.0
Approximate coverage : 0.3071


In [20]:
# ================================
# STEP 8 – Human-readable summary
# Summary:
#   - Convert the anchor explanation into simple English.
#   - This is what you might show to a non-technical stakeholder.
# ================================

predicted_class_name = class_names[pred_label]

print("\n=== HUMAN-READABLE EXPLANATION ===")
print(f"For this flower, the model predicted: {predicted_class_name}")
print("The following conditions form an ANCHOR for this prediction:")

for cond in explanation.anchor:
    print(f"  - {cond}")

print(
    f"\nThis means: If these conditions are true, "
    f"the model predicts '{predicted_class_name}' "
    f"with about {explanation.precision * 100:.1f}% precision."
)
print(
    f"The rule applies to roughly {explanation.coverage * 100:.1f}% of similar data points."
)



=== HUMAN-READABLE EXPLANATION ===
For this flower, the model predicted: setosa
The following conditions form an ANCHOR for this prediction:
  - petal length (cm) <= 1.60

This means: If these conditions are true, the model predicts 'setosa' with about 100.0% precision.
The rule applies to roughly 30.7% of similar data points.
