# Explaining output of the model


References:
- [SHAP](https://github.com/shap/shap#natural-language-example-transformers)
- [refactor: raise error during init if masker is None for PermutationExplainer](https://github.com/shap/shap/pull/3315)
- [BERT, explain yourself! ](https://colab.research.google.com/github/ml6team/quick-tips/blob/main/nlp/2021_04_22_shap_for_huggingface_transformers/explainable_transformers_using_shap.ipynb#scrollTo=das1RvNUrsE_)
- [Using SHAP (SHapley Additive exPlanations) to explain the predictions of a zero-shot transformer pipeline for text classification (using Huggingface)?](https://www.reddit.com/r/MLQuestions/comments/qc7uo7/using_shap_shapley_additive_explanations_to/)
- [Positive vs. Negative Sentiment Classification](https://shap.readthedocs.io/en/latest/example_notebooks/text_examples/sentiment_analysis/Positive%20vs.%20Negative%20Sentiment%20Classification.html)
- [text plot](https://shap.readthedocs.io/en/latest/example_notebooks/api_examples/plots/text.html)

In [1]:
# @title Environment running
running_local = True
if running_local:
    running_colab = running_kaggle = False
else:
    running_colab = False  # @param {type:"boolean"}
    running_kaggle = not running_colab  # @param {type:"boolean"}

In [2]:
if running_colab:
    from google.colab import drive

    drive.mount("/content/drive")

In [3]:
if running_colab:
    !pip install shap

## Loading the model

In [4]:
import numpy as np
import pandas as pd
import shap
import torch
import torch.nn as nn

from transformers import BertTokenizer, BertModel

In [5]:
RANDOM_SEED = 103
TEST_SIZE = 0.2
MODEL_PATH = "neuralmind/bert-base-portuguese-cased"
TOKEN_MAX_LENGTH = 512
if running_local:
    GLASSDOOR_MODEL_PATH = "./bertimbau-glassdoor-reviews-epoch_5.bin"
if running_colab:
    GLASSDOOR_MODEL_PATH = "/content/drive/MyDrive/UFMT/Gestão e Ciência de Dados/Disciplinas/14 - Seminário e Metodologia da Pesquisa/Projetos/glassdoor-reviews-analysis-nlp/train_model/bertimbau-glassdoor-reviews-epoch_5.bin"

In [6]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"There are {torch.cuda.device_count()} GPU(s) available.")
    print("Device name:", torch.cuda.get_device_name(0))
else:
    print("No GPU available, using the CPU instead.")
    device = torch.device("cpu")

No GPU available, using the CPU instead.


In [7]:
if running_colab:
    dataset = pd.read_csv(
        "/content/drive/MyDrive/UFMT/Gestão e Ciência de Dados/Disciplinas/14 - Seminário e Metodologia da Pesquisa/Projetos/glassdoor-reviews-analysis-nlp/data_preparation/glassdoor_reviews_annotated.csv"
    )
else:
    if running_kaggle:
        dataset = pd.read_csv(
            "/kaggle/input/glassdoor-reviews-annotated/glassdoor_reviews_annotated.csv"
        )
    else:
        dataset = pd.read_csv("../data_preparation/glassdoor_reviews_annotated.csv")

In [8]:
filtered_dataset = dataset.filter(["review_text", "sentiment"])

In [9]:
filtered_dataset.shape

(2566, 2)

In [10]:
filtered_dataset["sentiment"].value_counts()

sentiment
 1    1284
-1    1035
 0     247
Name: count, dtype: int64

In [11]:
filtered_dataset.head(2)

Unnamed: 0,review_text,sentiment
0,"Companheirismo entre os colegas, oportunidade ...",1
1,Não tive nenhum ponto negativo,0


Replace negative sentiment (-1) to 2, to avoid PyTorch errors.

In [12]:
filtered_dataset["sentiment"] = filtered_dataset["sentiment"].apply(
    lambda x: 2 if x == -1 else x
)

In [15]:
filtered_dataset["sentiment"].value_counts()

sentiment
1    1284
2    1035
0     247
Name: count, dtype: int64

In [16]:
num_labels = len(filtered_dataset["sentiment"].value_counts())

In [17]:
num_labels

3

In [20]:
class_names = ["neutral", "positive", "negative"]

## Creating a PyTorch Model

In [18]:
class GlassdoorReviewsClassifier(nn.Module):
    def __init__(self, num_labels):
        super(GlassdoorReviewsClassifier, self).__init__()

        self.bert = BertModel.from_pretrained(MODEL_PATH)
        self.classifier = nn.Sequential(
            nn.Linear(self.bert.config.hidden_size, 300),
            nn.ReLU(),
            nn.Linear(300, 100),
            nn.ReLU(),
            nn.Linear(100, 50),
            nn.ReLU(),
            nn.Linear(50, num_labels),
        )

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        x = outputs["last_hidden_state"][:, 0, :]
        x = self.classifier(x)
        return x

In [19]:
model = GlassdoorReviewsClassifier(num_labels).to(device)
model.load_state_dict(torch.load(GLASSDOOR_MODEL_PATH, map_location=device))
model.eval()

GlassdoorReviewsClassifier(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(29794, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, el

## Prediction Explaining

In [22]:
tokenizer = BertTokenizer.from_pretrained(MODEL_PATH)

tokenizer_config.json:   0%|          | 0.00/43.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/210k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [25]:
def convert_to_str(input_value):
    if isinstance(input_value, np.ndarray):
        input_str = " ".join(input_value)
    else:
        input_str = input_value

    return input_str

In [26]:
def predict_sentiment(texts):
    outputs = []
    for txt in texts:
        encoded_texts = tokenizer(
            convert_to_str(texts),
            max_length=TOKEN_MAX_LENGTH,
            add_special_tokens=True,
            return_token_type_ids=False,
            padding="max_length",
            truncation=True,
            return_attention_mask=True,
            return_tensors="pt",
        )

        input_ids = encoded_texts["input_ids"].to(device)
        attention_mask = encoded_texts["attention_mask"].to(device)

        with torch.no_grad():
            output = model(input_ids, attention_mask)
            probabilities = torch.nn.functional.softmax(output, dim=1)
            outputs.append(probabilities.cpu().numpy())

    return np.concatenate(outputs, axis=0)

In [27]:
def explain_prediction(text):
    output_probabilities = predict_sentiment(text)
    predicted_index = np.argmax(output_probabilities)
    predicted_class = class_names[predicted_index]

    print(f"Predicted sentiment: {predicted_class}")

    explainer = shap.Explainer(
        model=predict_sentiment, masker=tokenizer, output_names=class_names
    )
    shap_values = explainer(text)

    # print(shap_values)

    shap.plots.text(shap_values[0, :, predicted_class])

In [28]:
explain_prediction(["A gestão atrapalha muito no desenvolvimento"])

Predicted sentiment: negative


In [29]:
explain_prediction(["Não tive nenhum ponto negativo"])

Predicted sentiment: neutral


In [30]:
explain_prediction(["Companheirismo entre os colegas, oportunidade de crescimento"])

Predicted sentiment: positive


  0%|          | 0/156 [00:00<?, ?it/s]