In [21]:
import datasets
import pandas as pd
import transformers
import torch
import shap
import numpy as np

# load the emotion dataset
dataset = datasets.load_dataset("emotion", split="train")
data = pd.DataFrame({"text": dataset["text"], "emotion": dataset["label"]})

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [22]:
print("Is CUDA enabled?",torch.cuda.is_available())
data.head()

Is CUDA enabled? True


Unnamed: 0,text,emotion
0,i didnt feel humiliated,0
1,i can go from feeling so hopeless to so damned...,0
2,im grabbing a minute to post i feel greedy wrong,3
3,i am ever feeling nostalgic about the fireplac...,2
4,i am feeling grouchy,3


In [23]:
# load the model and tokenizer
tokenizer = transformers.AutoTokenizer.from_pretrained(
    "nateraw/bert-base-uncased-emotion", use_fast=True
)
model = transformers.AutoModelForSequenceClassification.from_pretrained(
    "nateraw/bert-base-uncased-emotion"
).cuda()

# build a pipeline object to do predictions
pred = transformers.pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    device=0,
    return_all_scores=True,
)



In [24]:
explainer = shap.Explainer(pred)

In [25]:
shap_values = explainer(data["text"][:3])

                                                   

In [26]:
print(data["text"][:3])

0                              i didnt feel humiliated
1    i can go from feeling so hopeless to so damned...
2     im grabbing a minute to post i feel greedy wrong
Name: text, dtype: object


In [27]:
shap.plots.text(shap_values)

In [28]:

shap.plots.text(shap_values[0][:, "anger"])



In [43]:
shap.plots.bar(shap_values)

TypeError: The clustering provided by the Explanation object does not seem to be a partition tree, which is all shap.plots.bar supports.

In [None]:
shap_values_reshaped = shap_values.reshape(shap_values.shape[0], -1)
shap.plots.bar(shap_values_reshaped[:, :, "joy"].mean(0), order=shap.Explanation.argsort)


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (3,) + inhomogeneous part.

In [None]:
# ...or acending order
shap.plots.bar(shap_values[:, :, "joy"].mean(0), order=shap.Explanation.argsort.flip)

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (3,) + inhomogeneous part.

In [None]:
logit_explainer = shap.Explainer(
    shap.models.TransformersPipeline(pred, rescale_to_logits=True)
)

logit_shap_values = logit_explainer(data["text"][:3])
shap.plots.text(logit_shap_values)

                                                   