In [None]:
import os
import sys; sys.path.append("../../../../..")
import pickle

import torch
import numpy as np

from src.experiment import AttentionExperiment, ClassificationExperiment
from src.dataset import ExperimentDataset
from src.params import Params
from src.utils.attention_utils import reduce_attention_dist, return_idx_attention_dist, window_attention_dist
from src.utils.classification_utils import run_bootstrapping, average_data
from src.utils.shared_utils import get_bias_predictions

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
params = Params.read_params("gru-params.json")
print("layers = {}".format(params.intermediary_task["attention"]["layers"]))
print("reducer = {}".format(params.intermediary_task["attention"]["reducer"]))
print("n_components = {}".format(params.intermediary_task["attention"]["n_components"]))
print("dropout = {}".format(params.final_task["dropout"]))
print("hidden_dim = {}".format(params.final_task["hidden_dim"]))

In [None]:
dev_dataset = ExperimentDataset.init_dataset(params.dataset)
train_dataset = pickle.load(open(params.dataset["weakly_labeled_data"], "rb"))

In [None]:
attention_dataloader_dev = dev_dataset.return_dataloader(batch_size=params.intermediary_task['attention']['attention_extraction_batch_size'])  
attention_dataloader_train = train_dataset.return_dataloader(batch_size=params.intermediary_task['attention']['attention_extraction_batch_size'])

In [None]:
attention_experiment = AttentionExperiment.initialize_attention_experiment(
    params.intermediary_task, 
    params.dataset,
    from_pretrained=True,
    verbose=True
)

In [None]:
attention_scores_dev = attention_experiment.extract_attention_scores(attention_dataloader_dev)

In [None]:
# Saving out attention weights from the train dataset to facilitate future runs
attention_weights_file = "model_weights/attention_scores_train.pkl"
if os.path.exists(attention_weights_file):
    print("Loading in existing train attention weights")
    attention_scores_train = pickle.load(open(attention_weights_file, "rb"))
else:
    print("Generating new training attention weights ")
    if (not os.path.isdir("model_weights")):
            os.mkdir("model_weights")
    attention_scores_train = attention_experiment.extract_attention_scores(attention_dataloader_train)
    
    pickle.dump(attention_scores_train, open(attention_weights_file, "wb+"))

In [None]:
lengths_train = [int(d["pre_lens"].numpy()) for d in train_dataset]
reduced_attention_train = reduce_attention_dist(
    attention_scores_train, 
    params.intermediary_task["attention"], 
    lengths_train
)
attention_dist_train = reduced_attention_train

In [None]:
train_dataset.add_data(attention_dist_train, "attention_dist")
train_dataset.shuffle_data()
assert(attention_dist_train.shape[1] == params.final_task['input_dim'])

In [None]:
lengths_dev = [int(d["pre_lens"].numpy()) for d in dev_dataset]
reduced_attention_dev = reduce_attention_dist(
    attention_scores_dev, 
    params.intermediary_task["attention"],
    lengths_dev
)
attention_dist_dev = reduced_attention_dev

In [None]:
dev_dataset.add_data(attention_dist_dev, "attention_dist")
dev_dataset.add_data(dev_dataset.get_val('bias_label'),'weak_bias_label')
dev_dataset.shuffle_data()
assert(attention_dist_dev.shape[1] == params.final_task['input_dim'])

### This is where the classification experiment starts

We create a classification experiment that contains useful methods for classifying bias based on the attention distributions. 

In [None]:
train_dataloader = train_dataset.return_dataloader(batch_size=params.final_task['training_params']['batch_size'])
dev_dataloader = dev_dataset.return_dataloader(batch_size=32)

In [None]:
classification_experiment = ClassificationExperiment.init_cls_experiment(
    params.final_task, 
    params.intermediary_task["attention"]
)

In [None]:
losses, evals = classification_experiment.train_model(train_dataloader, dev_dataloader, input_key="attention_dist", label_key="weak_bias_label")
classification_experiment.save_model_weights("gru_pretrained_concat_all.weights")

In [None]:
avg_evaluations = [average_data(epoch_evaluations) for epoch_evaluations in evals]
max(avg_evaluations, key=lambda x: x["f1"])

In [None]:
gru_attention_dataloader = dev_dataset.return_dataloader() 
batch_attention_scores = classification_experiment.extract_attention_scores(
    gru_attention_dataloader,
    input_key="attention_dist", 
    label_key="bias_label",
    seq_len_key="pre_lens",
    attention_mask_key="masks"
)
attention_scores = np.array([score.numpy() for batch in batch_attention_scores for score in batch])

In [None]:
from scipy.stats import entropy

labels = dev_dataset.get_val('bias_label')
labels_0_indices = (labels == 0).nonzero()
labels_1_indices = labels.nonzero()

attention_scores_0 = attention_scores[labels_0_indices].squeeze() # epistemological 
attention_scores_1 = attention_scores[labels_1_indices].squeeze() # framing

entropy_0 = entropy(attention_scores_0.T)
entropy_1 = entropy(attention_scores_1.T)

avg_entropy_0 = np.mean(entropy_0)
avg_entropy_1 = np.mean(entropy_1)

print("Entropy Epistemological: {} \t Entropy Framing {}".format(avg_entropy_0, avg_entropy_1))