In [1]:
%load_ext autoreload
%autoreload 2

In [22]:
import os
while 'notebooks' in os.getcwd():
    os.chdir("..")

import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, AutoModel
from datasets import load_dataset
from src.attention_rollout import AttentionRollout
from src.gradient_rollout import AttentionGradRollout
from torch.utils.data import DataLoader, RandomSampler
from tqdm import tqdm

import seaborn as sns
import numpy as np
import pandas as pd
import wordcloud
from src.text_utils import remove_stopwords, remove_punctuation, get_word_frequencies, lemmatize_text
from copy import deepcopy
from IPython.display import clear_output
import matplotlib.pyplot as plt
import scienceplots
plt.style.use(['science', 'no-latex'])
from src.perturbation import perturb_text

In [3]:
device = 'cuda'

# Attention Rollout

## AG News

In [4]:
train_dataset = load_dataset("SetFit/ag_news", cache_dir='/Data', split = 'train')\
    .train_test_split(test_size=1000, train_size=2000)

test_dataset = load_dataset("SetFit/ag_news", cache_dir='/Data', split = 'test')

In [5]:
n_classes = len(np.bincount(train_dataset['train']['label']))

In [6]:
label_names = {
    0 : "World",
    1 : "Sports",
    2 : "Business",
    3 : "Sci/Tech"
}

In [7]:
model = AutoModelForSequenceClassification.from_pretrained(
    "peulsilva/bert-ag_news", 
    attn_implementation = 'eager', 
    num_labels = n_classes,
    cache_dir = '/Data'
)\
    .to(device)
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")


In [8]:
batch_size = 1
train_dataloader = DataLoader(train_dataset['train'], batch_size=batch_size)
val_dataloader = DataLoader(train_dataset['test'], batch_size=batch_size)

In [9]:
metric = AttentionRollout(model, attention_layer_name='attention')

In [15]:

accs = {}

ks = np.arange(0, 1., 0.1)

for k in ks:
    y_pred_test = torch.Tensor([])
    y_true_test = torch.Tensor([])
    for row in tqdm(val_dataloader):
        tokens = tokenizer(
            row['text'], 
            return_tensors='pt',
            padding = 'longest'
        )

        out, attn_matrix = metric(**tokens.to(device), output_attentions = True)

        new_input = perturb_text(tokens, attn_matrix.squeeze(), k, tokenizer)
        # print(new_input['input_ids'].shape)
        
        with torch.no_grad():
            out = model(**new_input.to(device))

        y_pred = out.logits.argmax().to('cpu').unsqueeze(dim= 0 )
        y_true = row['label'].to('cpu')


        y_pred_test = torch.concat([y_pred_test, y_pred])
        y_true_test = torch.concat([y_true_test, y_true])

        acc = (y_true_test == y_pred_test).sum()/len(val_dataloader)

        accs[k] = acc

    print(f"Removed {k} best tokens. Accuracy = {acc}")

100%|██████████| 1000/1000 [00:17<00:00, 56.50it/s]


Removed 0.0 best tokens. Accuracy = 0.8849999904632568


100%|██████████| 1000/1000 [00:18<00:00, 53.15it/s]


Removed 0.1 best tokens. Accuracy = 0.8709999918937683


100%|██████████| 1000/1000 [00:19<00:00, 52.58it/s]


Removed 0.2 best tokens. Accuracy = 0.8270000219345093


100%|██████████| 1000/1000 [00:18<00:00, 52.65it/s]


Removed 0.30000000000000004 best tokens. Accuracy = 0.7649999856948853


100%|██████████| 1000/1000 [00:19<00:00, 52.48it/s]


Removed 0.4 best tokens. Accuracy = 0.6980000138282776


100%|██████████| 1000/1000 [00:18<00:00, 52.79it/s]


Removed 0.5 best tokens. Accuracy = 0.6370000243186951


100%|██████████| 1000/1000 [00:18<00:00, 53.18it/s]


Removed 0.6000000000000001 best tokens. Accuracy = 0.5709999799728394


100%|██████████| 1000/1000 [00:18<00:00, 53.19it/s]


Removed 0.7000000000000001 best tokens. Accuracy = 0.49799999594688416


100%|██████████| 1000/1000 [00:19<00:00, 52.60it/s]


Removed 0.8 best tokens. Accuracy = 0.42500001192092896


100%|██████████| 1000/1000 [00:19<00:00, 52.13it/s]

Removed 0.9 best tokens. Accuracy = 0.3499999940395355





In [19]:
pd.Series(accs).apply(lambda x: x.item())\
    .to_pickle("data/results/attn_rollout_perturbation.pkl")

## Gradient attention rollout

In [23]:
metric = AttentionGradRollout(model, attention_layer_name='attention.self.dropout')

In [24]:

accs = {}

ks = np.arange(0, 1., 0.1)

for k in ks:
    y_pred_test = torch.Tensor([])
    y_true_test = torch.Tensor([])
    for row in tqdm(val_dataloader):
        tokens = tokenizer(
            row['text'], 
            return_tensors='pt',
            padding = 'longest'
        )

        tokens['labels'] = row['label']

        out, attn_matrix = metric(**tokens.to(device), output_attentions = True)

        new_input = perturb_text(tokens, attn_matrix.squeeze(), k, tokenizer)
        # print(new_input['input_ids'].shape)
        
        with torch.no_grad():
            out = model(**new_input.to(device))

        y_pred = out.logits.argmax().to('cpu').unsqueeze(dim= 0 )
        y_true = row['label'].to('cpu')


        y_pred_test = torch.concat([y_pred_test, y_pred])
        y_true_test = torch.concat([y_true_test, y_true])

        acc = (y_true_test == y_pred_test).sum()/len(val_dataloader)

        accs[k] = acc

    print(f"Removed {k} best tokens. Accuracy = {acc}")

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 0/1000 [00:00<?, ?it/s]


KeyError: 'labels'