## BOW -> Classification

This is a baseline where we convert sentences into a BOW matrix and then classify for type of bias in the entire sentence. The main difference to other weak labeling functions is that in this case we feed the entire input into the classifier with no knowledge of what the actual biased word is .

In [1]:
import sys; sys.path.append("../../../../..")
import torch 
from src.experiment import ClassificationExperiment
from src.dataset import ExperimentDataset
from src.params import Params

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
params = Params.read_params("experiment_params.json")

In [4]:
# Loading in the dataset that we are using in this experiments 
# typically this dataset is the small set of ground-truth labels
dev_dataset = ExperimentDataset.init_dataset(params.dataset)
try:
    del dev_dataset.data['marta_features']
except: 
    pass

04/01/2020 18:20:44 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
100it [00:00, 3167.42it/s]


In [5]:
import pickle 
train_dataset = pickle.load(open(params.dataset["weakly_labeled_data"], "rb"))

In [6]:
train_dataset.add_data(train_dataset.get_val('weak_bias_label'), 'bias_label')

In [7]:
joined_dataset = ExperimentDataset.merge_datasets(train_dataset, dev_dataset)

In [8]:
from src.utils.baseline_utils import get_bow_matrix

In [9]:
bow_features = get_bow_matrix(params.dataset, joined_dataset)

04/01/2020 18:20:46 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


In [10]:
joined_dataset.add_data(bow_features, "bow_features")

In [11]:
train_dataset_w_bow = ExperimentDataset.split_dataset(joined_dataset, len(train_dataset))
dev_dataset_w_bow = ExperimentDataset.split_dataset(joined_dataset, -len(dev_dataset))

In [12]:
train_dataset_w_bow

Length: 52275 Keys: dict_keys(['pos_ids', 'post_tok_label_ids', 'index', 'post_in_ids', 'post_out_ids', 'pre_tok_label_ids', 'masks', 'bias_label', 'categories', 'rel_ids', 'pre_lens', 'pre_ids', 'bow_features'])

In [13]:
train_dataset_w_bow.data['bow_features'].shape

torch.Size([52275, 74498])

### Start of Classification

In [19]:
params = Params.read_params("experiment_params.json")

In [20]:
num_vocab = bow_features.shape[1]
params.final_task['input_dim'] = num_vocab
params.final_task['hidden_dim'] = 20

In [21]:
train_dataset_w_bow.shuffle_data()
dev_dataset_w_bow.shuffle_data()
train_dataloader = train_dataset_w_bow.return_dataloader(batch_size=params.final_task['training_params']['batch_size'])
dev_dataloader = dev_dataset_w_bow.return_dataloader(batch_size=32)

In [22]:
classification_experiment = ClassificationExperiment.init_cls_experiment(params.final_task)

In [23]:
losses, evals = classification_experiment.train_model(train_dataloader, dev_dataloader, input_key="bow_features", label_key="bias_label", model_dtype=torch.float, disable_tqdm=False)

HBox(children=(FloatProgress(value=0.0, description='epochs', max=5.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))




In [24]:
evals

[[{'num_examples': 4, 'accuracy': 0.75, 'auc': 1.0},
  {'num_examples': 4, 'accuracy': 0.5, 'auc': 0.75},
  {'num_examples': 4, 'accuracy': 0.25, 'auc': 0.25},
  {'num_examples': 4, 'accuracy': 0.75, 'auc': 1.0},
  {'num_examples': 4, 'accuracy': 0.75, 'auc': 1.0},
  {'num_examples': 4, 'accuracy': 0.75, 'auc': 1.0},
  {'num_examples': 4, 'accuracy': 0.75, 'auc': 0.75},
  {'num_examples': 4, 'accuracy': 1.0, 'auc': 1.0},
  {'num_examples': 4, 'accuracy': 0.75, 'auc': 1.0},
  {'num_examples': 4, 'accuracy': 0.5, 'auc': 1.0},
  {'num_examples': 4, 'accuracy': 0.75, 'auc': 0.6666666666666666},
  {'num_examples': 4, 'accuracy': 0.75, 'auc': 0.5},
  {'num_examples': 4, 'accuracy': 0.75, 'auc': 1.0},
  {'num_examples': 4, 'accuracy': 0.75, 'auc': 1.0},
  {'num_examples': 4, 'accuracy': 0.5, 'auc': 0.33333333333333337},
  {'num_examples': 4, 'accuracy': 0.25, 'auc': 0.3333333333333333},
  {'num_examples': 4, 'accuracy': 0.5, 'auc': 0.6666666666666666},
  {'num_examples': 4, 'accuracy': 0.75, 

In [25]:
from src.utils.classification_utils import average_data

In [26]:
avg_evaluations = [average_data(epoch_evaluations) for epoch_evaluations in evals]

In [27]:
avg_evaluations

[{'num_examples': 72,
  'accuracy': 0.6527777777777777,
  'auc': 0.7916666666666667},
 {'num_examples': 72,
  'accuracy': 0.6944444444444444,
  'auc': 0.7916666666666667},
 {'num_examples': 72,
  'accuracy': 0.736111111111111,
  'auc': 0.7731481481481481},
 {'num_examples': 72,
  'accuracy': 0.736111111111111,
  'auc': 0.7592592592592593},
 {'num_examples': 72,
  'accuracy': 0.6944444444444444,
  'auc': 0.7592592592592593}]