In [None]:
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import shap
import torch
import numpy as np
from lime.lime_text import LimeTextExplainer

# Load model and tokenizer
model_path = "../amharic_ner_model"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForTokenClassification.from_pretrained(model_path)
model.eval()

# ✅ Wrap the model in a Hugging Face NER pipeline (required for SHAP)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

# ✅ Define a wrapper function to make SHAP-compatible output (just returns dummy probabilities for tokens)
def shap_wrapper(texts):
    # SHAP expects a classifier-style output (float array), so we simulate it
    results = []
    for text in texts:
        output = ner_pipeline(text)
        scores = [ent['score'] for ent in output]  # confidence scores of entities
        avg_score = np.mean(scores) if scores else 0.0
        results.append([avg_score])  # dummy binary-style classifier output
    return np.array(results)

# ✅ Now use SHAP on text inputs
explainer = shap.Explainer(shap_wrapper, masker=shap.maskers.Text(tokenizer))
shap_values = explainer([  # pass a list of texts
    "አቶ መሀመድ አበበ በኢትዮ ማርት የንግድ ባለሞያ ነው"
])

# ✅ Visualize token attributions
shap.plots.text(shap_values[0])


LIME

In [None]:
from sklearn.pipeline import make_pipeline

class DummyNER:
    def __init__(self, pipeline):
        self.pipeline = pipeline

    def predict_proba(self, texts):
        # Return dummy "confidence" for compatibility
        results = []
        for text in texts:
            entities = self.pipeline(text)
            avg_score = np.mean([e['score'] for e in entities]) if entities else 0
            results.append([1 - avg_score, avg_score])
        return np.array(results)

lime_pipeline = make_pipeline(DummyNER(ner_pipeline))
lime_explainer = LimeTextExplainer(class_names=["not-entity", "entity"])
exp = lime_explainer.explain_instance(
    "አቶ መሀመድ አበበ በኢትዮ ማርት የንግድ ባለሞያ ነው",
    lime_pipeline.predict_proba,
    num_features=10
)
exp.show_in_notebook(text=True)
