In [None]:
!pip install transformers datasets evaluate

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, TrainingArguments, Trainer
from datasets import load_dataset
import evaluate
import numpy as np

In [None]:
# Load SQuAD dataset (use only certain % if gpu access is low)
dataset = load_dataset("squad", split="train[:100%]")

In [None]:
# Models and tokenizers
model_names = [
    "distilbert-base-uncased-distilled-squad",
    "bert-base-uncased",
    "albert/albert-base-v1",
    "huawei-noah/TinyBERT_General_4L_312D",
    "microsoft/MiniLM-L12-H384-uncased"
]

In [None]:
models = {model_name: AutoModelForQuestionAnswering.from_pretrained(model_name) for model_name in model_names}
tokenizers = {model_name: AutoTokenizer.from_pretrained(model_name) for model_name in model_names}


In [None]:
def preprocess_function(examples, tokenizer):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(questions, examples["context"], max_length=384, truncation="only_second", return_offsets_mapping=True, padding="max_length")

    offset_mapping = inputs.pop("offset_mapping")
    answers = examples["answers"]
    start_positions, end_positions = [], []

    for i, offset in enumerate(offset_mapping):
        answer = answers[i]
        start_char = answer["answer_start"][0]
        end_char = start_char + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)

        idx = next((idx for idx, seq in enumerate(sequence_ids) if seq == 1), None)
        if idx is None:
            start_positions.append(0)
            end_positions.append(0)
            continue

        context_start = idx
        context_end = next((idx for idx, seq in enumerate(sequence_ids[idx:], start=idx) if seq != 1), len(sequence_ids)) - 1

        if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            try:
                start_idx = next(idx for idx in range(context_start, context_end + 1) if offset[idx][0] <= start_char and offset[idx][1] > start_char) - 1

            except StopIteration:
                start_idx = 0
            try:
                end_idx = next(idx for idx in range(context_end, context_start - 1, -1) if offset[idx][1] >= end_char)
            except StopIteration:
                end_idx = 0


            start_positions.append(start_idx)
            end_positions.append(end_idx)

    inputs["start_positions"], inputs["end_positions"] = start_positions, end_positions
    return inputs

In [None]:
tokenized_squad = [dataset.map(lambda x: preprocess_function(x, tokenizers[model_name]), batched=True, remove_columns=dataset.column_names)
                   for model_name in model_names]

In [None]:
metric = evaluate.load("squad")

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    start_logits, end_logits = predictions
    start_positions, end_positions = labels

    predicted_starts = np.argmax(start_logits, axis=1)
    predicted_ends = np.argmax(end_logits, axis=1)

    return {"start_accuracy": np.mean(predicted_starts == start_positions),
            "end_accuracy": np.mean(predicted_ends == end_positions)}

In [None]:
for i, model in enumerate(model_names):
    training_args = TrainingArguments(
        output_dir=f"./results_model_{i+1}",
        evaluation_strategy="epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=3,
        weight_decay=0.01,
        push_to_hub=False
    )

    trainer = Trainer(
        model=models[model],
        args=training_args,
        train_dataset=tokenized_squad[i],
        eval_dataset=tokenized_squad[i],
        tokenizer=tokenizers[model],
        compute_metrics=compute_metrics
    )

    trainer.train()
    trainer.evaluate()