In [1]:
import os
import logging
import argparse

import random
import numpy as np
import torch

from transformers import pipeline
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification

import torchtext

from captum.attr import TokenReferenceBase
from captum.attr import LayerIntegratedGradients
from captum.attr import visualization

In [2]:
def main():
    use_cuda = False
    if torch.cuda.is_available():
        pass
        # use_cuda = True
    print('CUDA enabled:', use_cuda)

    modelName = 'distilbert-base-uncased-finetuned-sst-2-english'
    model = AutoModelForSequenceClassification.from_pretrained(modelName)
    print(model.config)
    id2label = model.config.to_dict()['id2label']
    if use_cuda:
        model = model.cuda()
    model.eval()
    tokenizer = AutoTokenizer.from_pretrained(modelName)
    if tokenizer._pad_token is not None:
        pad_token = tokenizer.pad_token
        pad_token_id = tokenizer.encode(pad_token)
        print(pad_token, pad_token_id)
    else:
        logging.error("Using pad_token, but it is not set yet.")
    # p_special_tokens(tokenizer)
    print(tokenizer.decode(0))
    print(tokenizer.decode(101))
    print(tokenizer.decode(102))
    print(tokenizer.decode(103))

    token_reference = TokenReferenceBase(reference_token_idx=pad_token_id[0])
    # vocab = torchtext.vocab.vocab(tokenizer.get_vocab())

    def custom_forward(input_ids, attention_mask):
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        return outputs.logits
        # print(outputs)
        # probs = torch.softmax(outputs.logits, dim=1)
        # print(probs)
        # label_idx = torch.argmax(probs, dim=1)
        # print(label_idx)
        # return label_idx.unsqueeze(0)

    lig = LayerIntegratedGradients(custom_forward, model.get_input_embeddings())

    samples = [('It was a fantastic performance !', 1), ('Best film ever', 1), ('Such a great show!', 1), ('It was a horrible movie', 0), ('I\'ve never watched something as bad', 0), ('That is a terrible movie.', 0)]
    vis_result = []


    for sentence, label in samples:
        print(sentence, label)
        inputs = tokenizer(sentence,
                           padding=True,
                           truncation=True,
                           max_length=512,
                           return_tensors="pt")
        print(inputs)
        model.zero_grad()
        if use_cuda:
            inputs = inputs.cuda()
        outputs = model(**inputs)
        print(outputs)
        probs = torch.softmax(outputs.logits, dim=1)
        print(probs)
        label_idx = torch.argmax(probs, dim=1)
        print(label_idx)
        print("inputs['input_ids'].shape:", inputs['input_ids'].shape)
        reference_indices = token_reference.generate_reference(inputs['input_ids'].shape[1], device=inputs['input_ids'].device).unsqueeze(0)

        # for key in inputs:
        #     inputs[key] = inputs[key].unsqueeze(0)
        print("inputs['input_ids'].shape:", inputs['input_ids'].shape)
        print('reference_indices.shape:', reference_indices.shape)
        print(inputs['input_ids'].dtype)
        print(reference_indices.dtype)
        # attributions_ig, delta = lig.attribute(inputs=(inputs['input_ids'], inputs['attention_mask']), baselines=reference_indices, target=label, n_steps=500, return_convergence_delta=True)
        attributions_ig, delta = lig.attribute(inputs=inputs['input_ids'],
                                               baselines=reference_indices,
                                               additional_forward_args=inputs['attention_mask'],
                                               target=label,
                                               n_steps=10,
                                               return_convergence_delta=True)

        attributions = attributions_ig.sum(dim=2).squeeze(0)
        attributions = attributions / torch.norm(attributions)
        attributions = attributions.cpu().detach().numpy()

        prob = torch.max(probs).item()
        print(attributions)
        print(probs)
        print(id2label[label_idx.item()])
        print(id2label[label])
        print(pad_token)
        print(attributions.sum())
        print(len(sentence))
        print(delta)

        text = [tokenizer.decode(x) for x in inputs['input_ids']][0].split(' ')
        # storing couple samples in an array for visualization purposes
        vis_result.append(visualization.VisualizationDataRecord(attributions,
                                                                prob,
                                                                id2label[label_idx.item()],
                                                                id2label[label],
                                                                pad_token,
                                                                attributions.sum(),
                                                                text,
                                                                delta))
    _ = visualization.visualize_text(vis_result)


In [3]:
main()

CUDA enabled: False


Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "finetuning_task": "sst-2",
  "hidden_dim": 3072,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.15.0",
  "vocab_size": 30522
}



Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

[PAD] [101, 0, 102]
[PAD]
[CLS]
[SEP]
[MASK]
It was a fantastic performance ! 1
{'input_ids': tensor([[  101,  2009,  2001,  1037, 10392,  2836,   999,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1]])}
SequenceClassifierOutput(loss=None, logits=tensor([[-4.3378,  4.6968]], grad_fn=<AddmmBackward0>), hidden_states=None, attentions=None)
tensor([[1.1919e-04, 9.9988e-01]], grad_fn=<SoftmaxBackward0>)
tensor([1])
inputs['input_ids'].shape: torch.Size([1, 8])
inputs['input_ids'].shape: torch.Size([1, 8])
reference_indices.shape: torch.Size([1, 8])
torch.int64
torch.int64
[ 0.         -0.00144865 -0.0661637   0.06876944  0.71151029  0.03989503
  0.13625756  0.68153239]
tensor([[1.1919e-04, 9.9988e-01]], grad_fn=<SoftmaxBackward0>)
POSITIVE
POSITIVE
[PAD]
1.5703523661532837
32
tensor([2.2318], dtype=torch.float64)
Best film ever 1
{'input_ids': tensor([[ 101, 2190, 2143, 2412,  102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1]])}
SequenceClassifierOutput(loss=None, logits=tensor

True Label,Predicted Label,Attribution Label,Attribution Score,Word Importance
POSITIVE,POSITIVE (1.00),[PAD],1.57,[CLS] it was a fantastic performance! [SEP]
,,,,
POSITIVE,POSITIVE (1.00),[PAD],1.8,[CLS] best film ever [SEP]
,,,,
POSITIVE,POSITIVE (1.00),[PAD],1.43,[CLS] such a great show! [SEP]
,,,,
NEGATIVE,NEGATIVE (1.00),[PAD],0.85,[CLS] it was a horrible movie [SEP]
,,,,
NEGATIVE,POSITIVE (0.97),[PAD],0.32,[CLS] i've never watched something as bad [SEP]
,,,,
