Snorkel Time! - Evaluating predictions
------
In this experiment we simply evaluate the accuracy of the snorkel labels.

In [15]:
import sys; sys.path.append("../../../../..")
import torch 
from src.experiment import ClassificationExperiment
from src.dataset import ExperimentDataset
from src.params import Params
from metal.label_model import LabelModel # for labeling

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Loading in Datasets

In [16]:
params = Params.read_params("experiment_params.json")

In [23]:
# Loading in the dataset that we are using in this experiments 
# typically this dataset is the small set of ground-truth labels
train_dataset = ExperimentDataset.init_dataset(params.dataset)

03/06/2020 22:33:35 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
285it [00:00, 3516.78it/s]


In [22]:
dev_dataset = ExperimentDataset.init_dataset(params.dataset, data_path=params.dataset['labeled_data_dev'])

03/06/2020 22:33:31 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
100it [00:00, 2915.46it/s]


### Getting Featurizers 

#### Getting Marta Features 

In [34]:
# importing the Featurizer created by Pryzant et al.
from src.utils.weak_labeling_utils import get_marta_featurizer, extract_marta_features

In [35]:
featurizer = get_marta_featurizer(params.dataset)

03/06/2020 22:42:00 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


In [36]:
marta_features_train = extract_marta_features(train_dataset, featurizer)

HBox(children=(FloatProgress(value=0.0, max=242.0), HTML(value='')))




In [37]:
marta_features_dev = extract_marta_features(dev_dataset, featurizer)

HBox(children=(FloatProgress(value=0.0, max=81.0), HTML(value='')))




#### BERT Embedding features

In [38]:
from src.utils.weak_labeling_utils import get_bert_features

In [39]:
bert_embeddings_train = get_bert_features(train_dataset)

03/06/2020 22:42:05 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /sailhome/rdm/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
03/06/2020 22:42:05 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /sailhome/rdm/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmpssynxkho
03/06/2020 22:42:09 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 

HBox(children=(FloatProgress(value=0.0, max=242.0), HTML(value='')))




In [40]:
bert_embeddings_dev = get_bert_features(dev_dataset)

03/06/2020 22:44:45 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /sailhome/rdm/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
03/06/2020 22:44:45 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /sailhome/rdm/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /tmp/tmparczvwe4
03/06/2020 22:44:48 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "type_vocab_size": 

HBox(children=(FloatProgress(value=0.0, max=81.0), HTML(value='')))




#### POS features

In [41]:
from src.utils.weak_labeling_utils import get_pos_features_multi_dataset

In [42]:
pos_matrices = get_pos_features_multi_dataset(params.dataset, [train_dataset, dev_dataset])

03/06/2020 22:53:28 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at ./cache/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


HBox(children=(FloatProgress(value=0.0, max=242.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=81.0), HTML(value='')))




In [43]:
pos_features_train = pos_matrices[0]
pos_features_dev = pos_matrices[1]

### Snorkel 

First, we create predictions for all of our weak labeling functions which we then combine together. In creating predictions, we run 200 epochs on the training set and get our predicted labels for the entire training set.

In [44]:
train_dataset.add_data(pos_features_train, "pos_features")
train_dataset.add_data(marta_features_train, "marta_features")
train_dataset.add_data(bert_embeddings_train, "bert_embeddings")

In [45]:
train_dataset_dataloader = train_dataset.return_dataloader(batch_size = params.final_task['training_params']['batch_size'])

In [46]:
dev_dataset.add_data(pos_features_dev, "pos_features")
dev_dataset.add_data(marta_features_dev, "marta_features")
dev_dataset.add_data(bert_embeddings_dev, "bert_embeddings")

In [47]:
dev_dataset_dataloader = dev_dataset.return_dataloader(batch_size = params.final_task['training_params']['batch_size'])

Getting POS predictions

In [50]:
num_pos_tags = pos_features_train.shape[1]
params.final_task['input_dim'] = num_pos_tags
params.final_task['hidden_dim'] = num_pos_tags//2

In [51]:
classification_experiment_pos = ClassificationExperiment.init_cls_experiment(params.final_task)

In [52]:
pos_all_losses, pos_all_evaluations = classification_experiment_pos.train_model(train_dataloader=train_dataset_dataloader,
                                          eval_dataloader=train_dataset_dataloader,
                                          input_key="pos_features",
                                          label_key="bias_label")

HBox(children=(FloatProgress(value=0.0, description='epochs', max=200.0, style=ProgressStyle(description_width…




In [53]:
pos_predictions, _ = classification_experiment_pos.run_inference(dev_dataset_dataloader,
                                                                 input_key="pos_features")

Getting Marta Features

In [54]:
marta_features_size = marta_features.shape[1]
params.final_task['input_dim'] = marta_features_size
params.final_task['hidden_dim'] = marta_features_size//2

In [55]:
classification_experiment_marta = ClassificationExperiment.init_cls_experiment(params.final_task)

In [56]:
marta_all_losses, marta_all_evaluations = classification_experiment_marta.train_model(train_dataloader=train_dataset_dataloader,
                                          eval_dataloader=train_dataset_dataloader,
                                          input_key="marta_features",
                                          label_key="bias_label")

HBox(children=(FloatProgress(value=0.0, description='epochs', max=200.0, style=ProgressStyle(description_width…




In [57]:
marta_predictions, _ = classification_experiment_marta.run_inference(dev_dataset_dataloader,
                                                                 input_key="marta_features")

Getting BERT Features

In [58]:
bert_embedding_size = bert_embeddings.shape[1]
params.final_task['input_dim'] = bert_embedding_size
params.final_task['hidden_dim'] = bert_embedding_size//2

In [59]:
classification_experiment_bert = ClassificationExperiment.init_cls_experiment(params.final_task)

In [60]:
bert_all_losses, bert_all_evaluations = classification_experiment_bert.train_model(train_dataloader=train_dataset_dataloader,
                                          eval_dataloader=train_dataset_dataloader,
                                          input_key="bert_embeddings",
                                          label_key="bias_label")

HBox(children=(FloatProgress(value=0.0, description='epochs', max=200.0, style=ProgressStyle(description_width…




In [61]:
bert_predictions, _ = classification_experiment_bert.run_inference(dev_dataset_dataloader,
                                                                 input_key="bert_embeddings")

Running Snorkel labeling function

In [62]:
all_predictions = [pos_predictions, marta_predictions, bert_predictions]

In [63]:
from src.utils.weak_labeling_utils import generate_snorkel_matrix

In [64]:
lf_matrix = generate_snorkel_matrix(all_predictions)

In [65]:
lf_matrix.shape #checking that the dimensions are all as expeted

(81, 3)

In [66]:
label_model = LabelModel(k=2, seed=123)

In [67]:
label_model.train_model(lf_matrix)

Computing O...
Estimating \mu...
[1 epo]: TRAIN:[loss=1.567]
[2 epo]: TRAIN:[loss=1.505]
[3 epo]: TRAIN:[loss=1.388]
[4 epo]: TRAIN:[loss=1.226]
[5 epo]: TRAIN:[loss=1.031]
[6 epo]: TRAIN:[loss=0.816]
[7 epo]: TRAIN:[loss=0.600]
[8 epo]: TRAIN:[loss=0.403]
[9 epo]: TRAIN:[loss=0.245]
[10 epo]: TRAIN:[loss=0.143]
[11 epo]: TRAIN:[loss=0.105]
[12 epo]: TRAIN:[loss=0.125]
[13 epo]: TRAIN:[loss=0.186]
[14 epo]: TRAIN:[loss=0.259]
[15 epo]: TRAIN:[loss=0.316]
[16 epo]: TRAIN:[loss=0.336]
[17 epo]: TRAIN:[loss=0.315]
[18 epo]: TRAIN:[loss=0.259]
[19 epo]: TRAIN:[loss=0.188]
[20 epo]: TRAIN:[loss=0.118]
[21 epo]: TRAIN:[loss=0.063]
[22 epo]: TRAIN:[loss=0.031]
[23 epo]: TRAIN:[loss=0.021]
[24 epo]: TRAIN:[loss=0.027]
[25 epo]: TRAIN:[loss=0.043]
[26 epo]: TRAIN:[loss=0.062]
[27 epo]: TRAIN:[loss=0.079]
[28 epo]: TRAIN:[loss=0.090]
[29 epo]: TRAIN:[loss=0.094]
[30 epo]: TRAIN:[loss=0.089]
[31 epo]: TRAIN:[loss=0.079]
[32 epo]: TRAIN:[loss=0.064]
[33 epo]: TRAIN:[loss=0.048]
[34 epo]: TRAIN:[lo

In [68]:
predictions = label_model.predict(lf_matrix) - 1

### Calculating Accuracy and ROCAUC

In [73]:
gt = dev_dataset.get_val('bias_label').numpy()

In [74]:
from sklearn.metrics import roc_auc_score

In [75]:
roc_auc_score(predictions, gt) #ROC

0.8777439024390244

In [76]:
sum(gt == predictions)/len(gt) #Accuracy

0.8765432098765432