In [None]:
# !pip install --quiet shap==0.39

### SHAP with transformers

This is a notebook providing a quick tutorial for loading and investigating the explainability of PLMs given a classification task such as sentiment using SHAP. Any classification task can be used really and *ideally*, the model should have been trained on that particular classification task. For more details on SHAP, see the package documentation [here](httsp://shap.readthedocs.io).




In [None]:
import argparse
import os
import sys
from datetime import datetime
from pathlib import Path

# import bios
import numpy as np
import scipy as sp
import shap
import torch
import transformers
from numpy.lib.histograms import _histogram_dispatcher
from torchnlp.encoders import Encoder
from torchnlp.encoders.text import stack_and_pad_tensors
from torchnlp.encoders.text.text_encoder import TextEncoder
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    TextClassificationPipeline,
)

# from tokenizer import Tokenizer

# add the sys path for models
sys.path.append("../")

from models.transformer_plms.hf_transformer_classifier import IncidentModel

#### Using Trained Classifier from transformer AutoSequenceForClassification

In [None]:
# set ckpt path to model trained on the classification task of interest
model_dir = "./model/"  # your directory to be put here

In [None]:
# load model
model = AutoModelForSequenceClassification.from_pretrained(model_dir)
tokenizer = AutoTokenizer.from_pretrained(model_dir)

In [None]:
# can print model to check its class
model

##### If using the automodelforseqeunce classification can use transformers pipeline

In [None]:
# check the model label idx
model.config.id2label

The transformer pipeline will only work with models with the untouched AutoModel class...

In [None]:
# # using the transformers pipeline - set device=0 if cuda is wanted
pipe = transformers.pipeline(
    "text-classification", model=model, tokenizer=tokenizer, return_all_scores=True
)

In [None]:
# function


def score_and_visualize(text):

    """
    Function:
        Wrapper around the pipe class to return predictions and push through shap
        explainer
    """

    explainer = shap.Explainer(pipe)
    shap_values = explainer(text)

    shap.plots.text(shap_values)

In [None]:
# explain the model's predictions
example_texts = [
    "patient fell down and broke their leg",
    "severe breathing problems",
    "no idea",
]

explainer = shap.Explainer(pipe)

In [None]:
# test on some examples
shap_values = explainer(example_texts)

Visualize the impact on all the output classes

In the plots below, when you hover your mouse over an output class you get the explanation for that output class. When you click an output class name then that class remains the focus of the explanation visualization until you click another class.

The base value is what the model outputs when the entire input text is masked, while
is the output of the model for the full original input. The SHAP values explain in an addive way how the impact of unmasking each word changes the model output from the base value (where the entire input is masked) to the final prediction value.

In [None]:
shap.plots.text(shap_values)

In [None]:
# can also use the wrapper
score_and_visualize(example_texts)

### Below is manual to use without transformers pipeline - 
__NOTE__ it is recommended to try using the pipeline method above

In [None]:
def model_prediction_gpu(x):
    # shap expects this form of list comprehension - it breaks when using
    # tokenizer as normal...
    tv = torch.tensor(
        [
            tokenizer.encode(v, padding="max_length", max_length=10, truncation=True)
            for v in x
        ]
    ).cuda()
    attention_mask = (tv != 0).type(torch.int64).cuda()
    outputs = model(tv, return_dict=True)
    logits = outputs.logits
    scores = torch.nn.Softmax(dim=-1)(logits)
    val = torch.logit(scores).detach().cpu().numpy()

    return val


def model_prediction_cpu(x):
    tv = torch.tensor(
        [
            tokenizer.encode(v, padding="max_length", max_length=10, truncation=True)
            for v in x
        ]
    )
    attention_mask = (tv != 0).type(torch.int64).cpu()
    outputs = model(tv, return_dict=True)
    logits = outputs.logits
    scores = torch.nn.Softmax(dim=-1)(logits)
    val = torch.logit(scores).detach().numpy()
    return val

In [None]:
model.cpu()
model_prediction_cpu(["one two three"])

In [None]:
model.cuda()
model_prediction_gpu(["one two three"])

In [None]:
x = ["one two three", "eight nice ten"]
encoded_inputs = torch.tensor(
    [
        tokenizer.encode(words, padding="max_length", truncation=True, max_length=512)
        for words in x
    ]
).cpu()

In [None]:
encoded_inputs

In [None]:
example_texts = [
    "patient was left waiting with a very high blood pressure for longer than advise",
    "Patient was left waiting for 10 minutes",
    "Nothing out of the ordinary",
]

In [None]:
model.class_labels

In [None]:
# cpu explainer
model.cpu()
cpu_explainer = shap.Explainer(
    model_prediction_cpu, tokenizer, output_names=["low", "high"]
)

In [None]:
shap_values = cpu_explainer(example_texts)

In [None]:
shap.plots.text(shap_values)

In [None]:
# [x[0] for x in sorted(model.model.config.label2id.items(), key=lambda x: x[1])]