# Baseline: standard Squad


In [1]:
cd ../..

/home/sambeck/code/nlpfp


In [2]:
import datasets
from transformers import AutoTokenizer, AutoModelForSequenceClassification, \
    AutoModelForQuestionAnswering, Trainer, TrainingArguments, HfArgumentParser, pipeline
from helpers import prepare_dataset_nli, prepare_train_dataset_qa, \
    prepare_validation_dataset_qa, QuestionAnsweringTrainer, compute_accuracy
import os
import json

NUM_PREPROCESSING_WORKERS = 2

In [3]:
MODEL='google/electra-small-discriminator'
TASK='qa'
DATASET='squad'
MAX_LENGTH=128
TRAIN_MAX_SAMPLES=None

In [4]:
cd experiments/stock_squad/

/home/sambeck/code/nlpfp/experiments/stock_squad


### Train on Checklist antonym-negation-degree dataset first

(you can make a basic Checklist dataset using the make_checklist_dataset notebook)

In [5]:
dataset = datasets.load_dataset('squad')

Reusing dataset squad (/home/sambeck/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


In [6]:
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 87599
    })
    validation: Dataset({
        features: ['id', 'title', 'context', 'question', 'answers'],
        num_rows: 10570
    })
})

### Model

In [7]:
model_class = AutoModelForQuestionAnswering
# Initialize the model and tokenizer from the specified pretrained model/checkpoint
model = model_class.from_pretrained(MODEL)
tokenizer = AutoTokenizer.from_pretrained(MODEL, use_fast=True)

Some weights of the model checkpoint at google/electra-small-discriminator were not used when initializing ElectraForQuestionAnswering: ['discriminator_predictions.dense_prediction.weight', 'discriminator_predictions.dense.bias', 'discriminator_predictions.dense.weight', 'discriminator_predictions.dense_prediction.bias']
- This IS expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing ElectraForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of ElectraForQuestionAnswering were not initialized from the model checkpoint at google/electra-small-discriminator and are newly initialized: ['qa_outputs.bias', 'qa_outputs.

In [8]:
# model pipeline (includes tokenization)
mp = pipeline("question-answering", tokenizer=tokenizer, model=model, device=0)

In [9]:
pred = mp(question="Which color is the dog?", context="There is a black dog.", truncation=True, )
print(pred)

{'score': 0.02378532849252224, 'start': 9, 'end': 10, 'answer': 'a'}


### Set up dataset for training

In [10]:
train_dataset_featurized = None
eval_dataset_featurized = None
train_dataset = dataset['train']
eval_dataset = dataset['validation']

In [11]:
prepare_train_dataset = lambda exs: prepare_train_dataset_qa(exs, tokenizer)
prepare_eval_dataset = lambda exs: prepare_validation_dataset_qa(exs, tokenizer)

if TRAIN_MAX_SAMPLES:
    train_dataset = train_dataset.select(range(TRAIN_MAX_SAMPLES))

print('featurize train...')
train_dataset_featurized = train_dataset.map(
    prepare_train_dataset,
    batched=True,
    num_proc=NUM_PREPROCESSING_WORKERS,
    remove_columns=train_dataset.column_names
)

print('featurize test...')
eval_dataset_featurized = eval_dataset.map(
    prepare_eval_dataset,
    batched=True,
    num_proc=NUM_PREPROCESSING_WORKERS,
    remove_columns=eval_dataset.column_names
)


featurize train...


Loading cached processed dataset at /home/sambeck/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-442457d121db22ce.arrow
Loading cached processed dataset at /home/sambeck/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-7e5255bd8606d15a.arrow


featurize test...


Loading cached processed dataset at /home/sambeck/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-c34bc99166e32dad.arrow
Loading cached processed dataset at /home/sambeck/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453/cache-49b59acbfe130203.arrow


In [12]:
trainer_class = Trainer
eval_kwargs = {}
# If you want to use custom metrics, you should define your own "compute_metrics" function.
# For an example of a valid compute_metrics function, see compute_accuracy in helpers.py.
compute_metrics = None
# For QA, we need to use a tweaked version of the Trainer (defined in helpers.py)
# to enable the question-answering specific evaluation metrics
trainer_class = QuestionAnsweringTrainer
eval_kwargs['eval_examples'] = eval_dataset
metric = datasets.load_metric('squad')
compute_metrics = lambda eval_preds: metric.compute(
    predictions=eval_preds.predictions, references=eval_preds.label_ids)

In [13]:
# This function wraps the compute_metrics function, storing the model's predictions
# so that they can be dumped along with the computed metrics
eval_predictions = None
def compute_metrics_and_store_predictions(eval_preds):
    global eval_predictions
    eval_predictions = eval_preds
    return compute_metrics(eval_preds)

### Train

In [14]:
# Initialize the Trainer object with the specified arguments and the model and dataset we loaded above
trainer = trainer_class(
    model=model,
    train_dataset=train_dataset_featurized,
    eval_dataset=eval_dataset_featurized,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics_and_store_predictions
)

In [15]:
trainer.train()

***** Running training *****
  Num examples = 87714
  Num Epochs = 3
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 32895


Step,Training Loss
500,3.216
1000,2.0935
1500,1.7854
2000,1.5805
2500,1.5404
3000,1.4753
3500,1.3928
4000,1.3721
4500,1.3914
5000,1.3583


Saving model checkpoint to tmp_trainer/checkpoint-500
Configuration saved in tmp_trainer/checkpoint-500/config.json
Model weights saved in tmp_trainer/checkpoint-500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-500/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-500/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-1000
Configuration saved in tmp_trainer/checkpoint-1000/config.json
Model weights saved in tmp_trainer/checkpoint-1000/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-1000/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-1000/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-1500
Configuration saved in tmp_trainer/checkpoint-1500/config.json
Model weights saved in tmp_trainer/checkpoint-1500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-1500/tokenizer_config.json
Special tokens file saved in tmp_traine

tokenizer config file saved in tmp_trainer/checkpoint-12000/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-12000/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-12500
Configuration saved in tmp_trainer/checkpoint-12500/config.json
Model weights saved in tmp_trainer/checkpoint-12500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-12500/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-12500/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-13000
Configuration saved in tmp_trainer/checkpoint-13000/config.json
Model weights saved in tmp_trainer/checkpoint-13000/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-13000/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-13000/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-13500
Configuration saved in tmp_trainer/checkpoint-13500/config.json
Model we

Special tokens file saved in tmp_trainer/checkpoint-23500/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-24000
Configuration saved in tmp_trainer/checkpoint-24000/config.json
Model weights saved in tmp_trainer/checkpoint-24000/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-24000/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-24000/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-24500
Configuration saved in tmp_trainer/checkpoint-24500/config.json
Model weights saved in tmp_trainer/checkpoint-24500/pytorch_model.bin
tokenizer config file saved in tmp_trainer/checkpoint-24500/tokenizer_config.json
Special tokens file saved in tmp_trainer/checkpoint-24500/special_tokens_map.json
Saving model checkpoint to tmp_trainer/checkpoint-25000
Configuration saved in tmp_trainer/checkpoint-25000/config.json
Model weights saved in tmp_trainer/checkpoint-25000/pytorch_model.bin
tokenizer config fil

TrainOutput(global_step=32895, training_loss=1.0727906191887184, metrics={'train_runtime': 9725.6734, 'train_samples_per_second': 27.056, 'train_steps_per_second': 3.382, 'total_flos': 7688358702452736.0, 'train_loss': 1.0727906191887184, 'epoch': 3.0})

In [16]:
pred = mp(question="Which color is the dog?", context="There is a black dog.", truncation=True, )
print(pred)

{'score': 0.9531130194664001, 'start': 11, 'end': 16, 'answer': 'black'}


In [17]:
pred = mp(question='Who is the most awesome?', context='William is awesome, but John is more awesome', truncation=True, )
print(pred)

{'score': 0.5969194173812866, 'start': 24, 'end': 28, 'answer': 'John'}


In [18]:
pred = mp(question="Which thing is hot?", context="There is a cold gopher, a polar bear, and a hot snake.", truncation=True, )
print(pred)

{'score': 0.7536969780921936, 'start': 48, 'end': 53, 'answer': 'snake'}


In [19]:
pred = mp(question="Which thing is least hot?", context="There is a cold gopher, a polar bear, and a hot snake.", truncation=True, )
print(pred)

{'score': 0.5181098580360413, 'start': 48, 'end': 53, 'answer': 'snake'}


In [20]:
pred = mp(question="Who is most hot?", context="John is not cold. Bob is cold.", truncation=True, )
print(pred)

{'score': 0.48150238394737244, 'start': 18, 'end': 21, 'answer': 'Bob'}


### Save

In [21]:
trainer.save_model('./standard_squad_model/')

Saving model checkpoint to ./standard_squad_model/
Configuration saved in ./standard_squad_model/config.json
Model weights saved in ./standard_squad_model/pytorch_model.bin
tokenizer config file saved in ./standard_squad_model/tokenizer_config.json
Special tokens file saved in ./standard_squad_model/special_tokens_map.json
