# Experiment: antonym-negation2

Train a model first on the Checklist-style antonym-negation-degree dataset.
Then continue with training on SQuad.

Augment dataset with arbitrary text from Wikipedia.

Does this additional training imrpove performance on Checklist, while hopefully imrpoving performance on Squad as well?


In [1]:
cd ../..

/home/sambeck/code/nlpfp


In [2]:
import datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification, \
    AutoModelForQuestionAnswering, Trainer, TrainingArguments, HfArgumentParser, pipeline
from helpers import prepare_dataset_nli, prepare_train_dataset_qa, \
    prepare_validation_dataset_qa, QuestionAnsweringTrainer, compute_accuracy
import os
import json

NUM_PREPROCESSING_WORKERS = 2

In [3]:
MODEL='google/electra-small-discriminator'
TASK='qa'
DATASET='squad'
MAX_LENGTH=128
TRAIN_MAX_SAMPLES=None

### Train on Checklist antonym-negation-degree dataset first

(you can make a basic Checklist dataset using the make_checklist_dataset notebook)

In [21]:
KEYS = ['id', 'title', 'context', 'question', 'answers']

In [4]:
dataset = datasets.Dataset.from_file('./experiments/antonym-negation/dataset.arrow')
dataset = dataset.shuffle()
dataset = dataset.train_test_split(test_size=0.1)

Loading cached shuffled indices for dataset at ./experiments/antonym-negation/cache-1cd0bdd2cd15cffb.arrow


In [12]:
sqd = datasets.load_dataset('squad')

Reusing dataset squad (/home/sambeck/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


In [39]:
lumped_data = {}
for key in KEYS:
    lumped_data[key] = sqd['train'][key]
    lumped_data[key].extend(sqd['validation'][key])
    lumped_data[key].extend(dataset['train'][key])
    

In [40]:
dataset = datasets.Dataset.from_dict(lumped_data)

In [41]:
dataset = dataset.shuffle()
dataset = dataset.train_test_split(test_size=0.1)


In [42]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 202933
    })
    test: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 22549
    })
})

### Model

In [29]:
model_class = AutoModelForQuestionAnswering
# Initialize the model and tokenizer from the specified pretrained model/checkpoint
model = model_class.from_pretrained(MODEL)
tokenizer = AutoTokenizer.from_pretrained(MODEL, use_fast=True)

Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForQuestionAnswering: ['discriminator_predictions.dense_prediction.bias', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.weight']
- This IS expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForQuestionAnswering were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['qa_outputs.bias', 'qa_outputs.

In [30]:
# model pipeline (includes tokenization)
mp = pipeline("question-answering", tokenizer=tokenizer, model=model, device=0)

In [31]:
pred = mp(question="Which color is the dog?", context="There is a black dog.", truncation=True, )
print(pred)

{'score': 0.026453964412212372, 'start': 0, 'end': 5, 'answer': 'There'}


### Set up dataset for training

In [43]:
train_dataset_featurized = None
eval_dataset_featurized = None
train_dataset = dataset['train']
eval_dataset = dataset['test']

In [44]:
prepare_train_dataset = lambda exs: prepare_train_dataset_qa(exs, tokenizer)
prepare_eval_dataset = lambda exs: prepare_validation_dataset_qa(exs, tokenizer)

if TRAIN_MAX_SAMPLES:
    train_dataset = train_dataset.select(range(TRAIN_MAX_SAMPLES))

print('featurize train...')
train_dataset_featurized = train_dataset.map(
    prepare_train_dataset,
    batched=True,
    num_proc=NUM_PREPROCESSING_WORKERS,
    remove_columns=train_dataset.column_names
)

print('featurize test...')
eval_dataset_featurized = eval_dataset.map(
    prepare_eval_dataset,
    batched=True,
    num_proc=NUM_PREPROCESSING_WORKERS,
    remove_columns=eval_dataset.column_names
)


featurize train...
featurize test...


In [45]:
trainer_class = Trainer
eval_kwargs = {}
# If you want to use custom metrics, you should define your own "compute_metrics" function.
# For an example of a valid compute_metrics function, see compute_accuracy in helpers.py.
compute_metrics = None
# For QA, we need to use a tweaked version of the Trainer (defined in helpers.py)
# to enable the question-answering specific evaluation metrics
trainer_class = QuestionAnsweringTrainer
eval_kwargs['eval_examples'] = eval_dataset
metric = datasets.load_metric('squad')
compute_metrics = lambda eval_preds: metric.compute(
    predictions=eval_preds.predictions, references=eval_preds.label_ids)

In [46]:
# This function wraps the compute_metrics function, storing the model's predictions
# so that they can be dumped along with the computed metrics
eval_predictions = None
def compute_metrics_and_store_predictions(eval_preds):
    global eval_predictions
    eval_predictions = eval_preds
    return compute_metrics(eval_preds)

### Train

In [47]:
# Initialize the Trainer object with the specified arguments and the model and dataset we loaded above
trainer = trainer_class(
    model=model,
    train_dataset=train_dataset_featurized,
    eval_dataset=eval_dataset_featurized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics_and_store_predictions
)

In [48]:
trainer.train()

***** Running training *****
  Num examples = 203224
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 76209


Step,Training Loss
500,2.9936
1000,2.0852
1500,1.7913
2000,1.5838
2500,1.4813
3000,1.3898
3500,1.3443
4000,1.2813
4500,1.2505
5000,1.1833


Saving model checkpoint to tmp_trainer/checkpoint-500
Configuration saved in tmp_trainer/checkpoint-500/config.json
Model weights saved in tmp_trainer/checkpoint-500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-500/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-500/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-1000
Configuration saved in tmp_trainer/checkpoint-1000/config.json
Model weights saved in tmp_trainer/checkpoint-1000/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-1000/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-1000/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-1500
Configuration saved in tmp_trainer/checkpoint-1500/config.json
Model weights saved in tmp_trainer/checkpoint-1500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-1500/tokenizer_config.json
Special tokens file saved in tmp_traine

tokenizer config file saved in tmp_trainer/checkpoint-12000/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-12000/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-12500
Configuration saved in tmp_trainer/checkpoint-12500/config.json
Model weights saved in tmp_trainer/checkpoint-12500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-12500/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-12500/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-13000
Configuration saved in tmp_trainer/checkpoint-13000/config.json
Model weights saved in tmp_trainer/checkpoint-13000/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-13000/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-13000/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-13500
Configuration saved in tmp_trainer/checkpoint-13500/config.json
Model we

Special tokens file saved in tmp_trainer/checkpoint-23500/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-24000
Configuration saved in tmp_trainer/checkpoint-24000/config.json
Model weights saved in tmp_trainer/checkpoint-24000/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-24000/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-24000/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-24500
Configuration saved in tmp_trainer/checkpoint-24500/config.json
Model weights saved in tmp_trainer/checkpoint-24500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-24500/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-24500/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-25000
Configuration saved in tmp_trainer/checkpoint-25000/config.json
Model weights saved in tmp_trainer/checkpoint-25000/pytorch_model.bin
tokenizer config fil

Saving model checkpoint to tmp_trainer/checkpoint-35500
Configuration saved in tmp_trainer/checkpoint-35500/config.json
Model weights saved in tmp_trainer/checkpoint-35500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-35500/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-35500/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-36000
Configuration saved in tmp_trainer/checkpoint-36000/config.json
Model weights saved in tmp_trainer/checkpoint-36000/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-36000/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-36000/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-36500
Configuration saved in tmp_trainer/checkpoint-36500/config.json
Model weights saved in tmp_trainer/checkpoint-36500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-36500/tokenizer_config.json
Special tokens file 

RuntimeError: [enforce fail at inline_container.cc:274] . unexpected pos 4791936 vs 4791824

In [34]:
pred = mp(question="Which color is the dog?", context="There is a black dog.", truncation=True, )
print(pred)

{'score': 0.711025059223175, 'start': 0, 'end': 5, 'answer': 'There'}


In [35]:
pred = mp(question='Who is the most awesome?', context='William is awesome, but John is more awesome', truncation=True, )
print(pred)

{'score': 0.9997568130493164, 'start': 24, 'end': 28, 'answer': 'John'}


In [36]:
pred = mp(question="Which thing is hot?", context="There is a cold gopher, a polar bear, and a hot snake.", truncation=True, )
print(pred)

{'score': 0.3963162302970886, 'start': 26, 'end': 36, 'answer': 'polar bear'}


In [37]:
pred = mp(question="Which thing is least hot?", context="There is a cold gopher, a polar bear, and a hot snake.", truncation=True, )
print(pred)

{'score': 0.26628661155700684, 'start': 16, 'end': 22, 'answer': 'gopher'}


In [38]:
pred = mp(question="Who is most hot?", context="John is not cold. Bob is cold.", truncation=True, )
print(pred)

{'score': 0.9961118102073669, 'start': 18, 'end': 21, 'answer': 'Bob'}


# retrain on Squad