## Creating heatmap --> ideally we want to pick out the most certain prediction

In [7]:
import sys; sys.path.append("../../../..") #NOTE: changing from the basic path we use for other experiments
import torch 
from src.experiment import AttentionExperiment
from src.dataset import ExperimentDataset
from src.params import Params

In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [35]:
params = Params.read_params("params.json")
print("layers = {}".format(params.intermediary_task["attention"]["layers"]))

layers = [11]


In [10]:
# Loading in the dataset that we are using in this experiments 
# typically this dataset is the small set of ground-truth labels
dataset = ExperimentDataset.init_dataset(params.dataset)

04/11/2020 12:12:44 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
386it [00:00, 3766.83it/s]


Analyzing the lengths of different sentences

In [11]:
# This code is repeated below - this is only for analyzing the different lengths of bias types
bias_labels = dataset.get_val('bias_label')
bias_labels_0_indices = (bias_labels == 0).nonzero()
bias_labels_1_indices = bias_labels.nonzero()

In [12]:
attention_dataloader = dataset.return_dataloader(batch_size=params.intermediary_task['attention']['attention_extraction_batch_size']) 
attention_experiment = AttentionExperiment.initialize_attention_experiment(params.intermediary_task, params.dataset, verbose=True)

04/11/2020 12:13:00 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
04/11/2020 12:13:00 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at ./cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
04/11/2020 12:13:00 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file ./cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmp5rgnwj31
04/11/2020 12:13:04 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_d

Instantiated joint model with pretrained weights.
Succesfully loaded in attention experiment!


```extract_attention_scores()``` works out of the box because the attention experiment has the config file saved, and knows what BERT model to use/load in, which layers to extract the attention scores from, and what the inference function is that should be used on this particular BERT model.

Attention_scores is then a list of dictionaries. The keys in this dictionary are the specific layers of a BERT model and the values are the corresponding attention distributions extracted from that particular layer.

In [13]:
attention_scores = attention_experiment.extract_attention_scores(attention_dataloader)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [29]:
from src.utils.data_utils import get_bias_indices
from src.utils.attention_utils import return_idx_attention_dist, reduce_attention_dist
bias_indices = get_bias_indices(dataset.get_val('pre_tok_label_ids'))

In [36]:
attention_scores_indexed = return_idx_attention_dist(attention_scores, bias_indices)
reduced_attention = reduce_attention_dist(attention_scores_indexed, params.intermediary_task["attention"])

In [39]:
reduced_attention.shape

torch.Size([324, 80])

In [41]:
from src.utils.data_utils import get_id2tok

In [42]:
id2tok  = get_id2tok(params.dataset)

04/11/2020 12:38:20 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


In [46]:
all_ids = dataset.get_val('pre_ids')

Getting Epistemological Example

In [321]:
eps_idx = 52 # just first instance 

In [322]:
epistemological_example_ids = all_ids[bias_labels_0_indices[eps_idx], :].squeeze()

In [323]:
epistemological_example_toks = [id2tok[id.item()] for id in epistemological_example_ids]

In [324]:
' '.join(epistemological_example_toks)

'alexander attempted to create a genuine yugoslavia . [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

In [325]:
reduced_attention[bias_labels_0_indices[eps_idx], :]

tensor([[0.1385, 0.2321, 0.0210, 0.0540, 0.0313, 0.3561, 0.1324, 0.0346, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])

In [326]:
bias_token = epistemological_example_toks[bias_indices[bias_labels_0_indices[eps_idx]]]
bias_token

'genuine'

Getting Framing Example

In [148]:
framing_idx = 6 # just first instance 

In [149]:
framing_example_ids = all_ids[bias_labels_1_indices[framing_idx], :].squeeze()

In [150]:
framing_example_toks = [id2tok[id.item()] for id in framing_example_ids]

In [151]:
' '.join(framing_example_toks)

'perhaps the most fascinating collection of archives amongst the charities section are those of the found ##ling hospital . [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]'

In [152]:
reduced_attention[bias_labels_1_indices[framing_idx], :]

tensor([[0.0358, 0.0325, 0.0282, 0.4169, 0.0319, 0.0297, 0.0331, 0.0369, 0.0262,
         0.0351, 0.0328, 0.0383, 0.0305, 0.0340, 0.0287, 0.0367, 0.0336, 0.0315,
         0.0276, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]])

In [153]:
bias_token = framing_example_toks[bias_indices[bias_labels_1_indices[framing_idx]]]
bias_token

'fascinating'