In [1]:
import os
import sys; sys.path.append("../../../../..")
import pickle

import torch
import numpy as np

from src.experiment import AttentionExperiment, ClassificationExperiment
from src.dataset import ExperimentDataset
from src.params import Params
from src.utils.attention_utils import reduce_attention_dist, return_idx_attention_dist, window_attention_dist
from src.utils.classification_utils import run_bootstrapping, average_data
from src.utils.shared_utils import get_bias_predictions

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
params = Params.read_params("gru-params.json")
print("layers = {}".format(params.intermediary_task["attention"]["layers"]))
print("reducer = {}".format(params.intermediary_task["attention"]["reducer"]))
print("n_components = {}".format(params.intermediary_task["attention"]["n_components"]))
print("dropout = {}".format(params.final_task["dropout"]))
print("hidden_dim = {}".format(params.final_task["hidden_dim"]))

layers = [10, 11]
reducer = avg
n_components = 20
dropout = 0.4
hidden_dim = 120


In [4]:
dev_dataset = ExperimentDataset.init_dataset(params.dataset)
train_dataset = pickle.load(open(params.dataset["weakly_labeled_data"], "rb"))

04/11/2020 22:32:34 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
100it [00:00, 5575.46it/s]


In [5]:
attention_dataloader_dev = dev_dataset.return_dataloader(batch_size=params.intermediary_task['attention']['attention_extraction_batch_size'])  
attention_dataloader_train = train_dataset.return_dataloader(batch_size=params.intermediary_task['attention']['attention_extraction_batch_size'])

In [6]:
attention_experiment = AttentionExperiment.initialize_attention_experiment(
    params.intermediary_task, 
    params.dataset,
    from_pretrained=True,
    verbose=True
)

04/11/2020 22:32:35 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
04/11/2020 22:32:35 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at ./cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
04/11/2020 22:32:35 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file ./cache/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmp9guni_r8
04/11/2020 22:32:39 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_d

Instantiated joint model with pretrained weights.
Succesfully loaded in attention experiment!


In [7]:
attention_scores_dev = attention_experiment.extract_attention_scores(attention_dataloader_dev)

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [8]:
# Saving out attention weights from the train dataset to facilitate future runs
attention_weights_file = "model_weights/attention_scores_train.pkl"
if os.path.exists(attention_weights_file):
    print("Loading in existing train attention weights")
    attention_scores_train = pickle.load(open(attention_weights_file, "rb"))
else:
    print("Generating new training attention weights ")
    if (not os.path.isdir("model_weights")):
            os.mkdir("model_weights")
    attention_scores_train = attention_experiment.extract_attention_scores(attention_dataloader_train)
    pickle.dump(attention_scores_train, open(attention_weights_file, "wb+"))

Loading in existing train attention weights


In [9]:
lengths_train = [int(d["pre_lens"].numpy()) for d in train_dataset]
reduced_attention_train = reduce_attention_dist(
    attention_scores_train, 
    params.intermediary_task["attention"], 
    lengths_train
)
attention_dist_train = reduced_attention_train

In [10]:
train_dataset.add_data(attention_dist_train, "attention_dist")
train_dataset.shuffle_data()
assert(attention_dist_train.shape[1] == params.final_task['input_dim'])

In [11]:
lengths_dev = [int(d["pre_lens"].numpy()) for d in dev_dataset]
reduced_attention_dev = reduce_attention_dist(
    attention_scores_dev, 
    params.intermediary_task["attention"],
    lengths_dev
)
attention_dist_dev = reduced_attention_dev

In [12]:
dev_dataset.add_data(attention_dist_dev, "attention_dist")
dev_dataset.add_data(dev_dataset.get_val('bias_label'),'weak_bias_label')
dev_dataset.shuffle_data()
assert(attention_dist_dev.shape[1] == params.final_task['input_dim'])

### This is where the classification experiment starts

We create a classification experiment that contains useful methods for classifying bias based on the attention distributions. 

In [13]:
train_dataloader = train_dataset.return_dataloader(batch_size=params.final_task['training_params']['batch_size'])
dev_dataloader = dev_dataset.return_dataloader(batch_size=32)

In [14]:
classification_experiment = ClassificationExperiment.init_cls_experiment(
    params.final_task, 
    params.intermediary_task["attention"]
)

In [15]:
losses, evals = classification_experiment.train_model(train_dataloader, dev_dataloader, input_key="attention_dist", label_key="weak_bias_label")
classification_experiment.save_model_weights("gru_pretrained_avg_last_2.weights")

HBox(children=(FloatProgress(value=0.0, description='epochs', max=20.0, style=ProgressStyle(description_width=…




In [28]:
avg_evaluations = [average_data(epoch_evaluations) for epoch_evaluations in evals]
max(avg_evaluations, key=lambda x: x["f1"])

{'num_examples': 81,
 'accuracy': 0.6172839506172839,
 'auc': 0.6502841465804428,
 'precision': 0.6660680882903105,
 'recall': 0.7119341563786008,
 'f1': 0.6856400259909032}

In [19]:
gru_attention_dataloader = dev_dataset.return_dataloader() 
batch_attention_scores = classification_experiment.extract_attention_scores(
    gru_attention_dataloader,
    input_key="attention_dist", 
    label_key="bias_label",
    seq_len_key="pre_lens",
    attention_mask_key="masks"
)
attention_scores = np.array([score.numpy() for batch in batch_attention_scores for score in batch])

In [20]:
from scipy.stats import entropy

labels = dev_dataset.get_val('bias_label')
labels_0_indices = (labels == 0).nonzero()
labels_1_indices = labels.nonzero()

attention_scores_0 = attention_scores[labels_0_indices].squeeze() # epistemological 
attention_scores_1 = attention_scores[labels_1_indices].squeeze() # framing

entropy_0 = entropy(attention_scores_0.T)
entropy_1 = entropy(attention_scores_1.T)

avg_entropy_0 = np.mean(entropy_0)
avg_entropy_1 = np.mean(entropy_1)

print("Entropy Epistemological: {} \t Entropy Framing {}".format(avg_entropy_0, avg_entropy_1))

Entropy Epistemological: 0.2490522861480713 	 Entropy Framing 0.6114786863327026
