In [31]:
from datasets import load_dataset

squad = load_dataset("squad_v2")

In [22]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2")

def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    contexts = [c.strip() for c in examples["context"]]
    return tokenizer(questions, contexts, truncation=True, padding="max_length", max_length=384)

tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=['id', 'title', 'context', 'question', 'answers'])

In [23]:
import torch
from torch.utils.data import DataLoader
from transformers import DataCollatorWithPadding

eval_dataset = tokenized_squad["validation"]
data_collator = DataCollatorWithPadding(tokenizer)

eval_dataloader = DataLoader(eval_dataset, batch_size=16, collate_fn=data_collator)

In [24]:
from transformers import AutoModelForQuestionAnswering

model = AutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2")

In [26]:
def extract_answers(start_logits, end_logits, batch, tokenizer):
    answers = []
    for i in range(len(start_logits)):
        start_idx = torch.argmax(start_logits[i]).item()
        end_idx = torch.argmax(end_logits[i]).item()
        if start_idx <= end_idx:
            answer = tokenizer.decode(batch['input_ids'][i][start_idx:end_idx+1], skip_special_tokens=True)
        else:
            answer = ""
        answers.append(answer)
    return answers

In [27]:
model.eval()
all_predictions = []
example_ids = squad["validation"]["id"]
for batch in eval_dataloader:
    with torch.no_grad():
        inputs = {k: v.to(model.device) for k, v in batch.items() if k in tokenizer.model_input_names}
        outputs = model(**inputs)
        start_logits = outputs.start_logits
        end_logits = outputs.end_logits

        # Extract answers
        batch_predictions = extract_answers(start_logits, end_logits, batch, tokenizer)
        all_predictions.extend(batch_predictions)


In [30]:
import json

predictions = {id_: answer for id_, answer in zip(example_ids, all_predictions)}
with open("predictions.json", "w") as f:
    json.dump(predictions, f)

# Download the SQuAD evaluation script if you haven't already
# wget https://raw.githubusercontent.com/allenai/bi-att-flow/master/squad/evaluate-v2.0.py

# Run the evaluation script
import subprocess
subprocess.run(["python", "evaluate-v2.0.py", "squad/dev-v2.0.json", "predictions.json"])

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Traceback (most recent call last):
  File "/Users/tiril/Documents/IndividualProject/nuclear_repo/evaluate-v2.0.py", line 276, in <module>
    main()
  File "/Users/tiril/Documents/IndividualProject/nuclear_repo/evaluate-v2.0.py", line 232, in main
    with open(OPTS.data_file) as f:
FileNotFoundError: [Errno 2] No such file or directory: 'squad/dev-v2.0.json'


CompletedProcess(args=['python', 'evaluate-v2.0.py', 'squad/dev-v2.0.json', 'predictions.json'], returncode=1)