In [1]:
import os
from google.colab import drive

drive.mount('/content/gdrive', force_remount=True)
os.chdir('/content/gdrive/MyDrive/colab/anadea_test')

Mounted at /content/gdrive


In [2]:
!pip install datasets transformers &> /dev/null

In [3]:
!curl https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -o dev-v2.0.json
!curl https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -o train-v2.0.json
!curl https://worksheets.codalab.org/rest/bundles/0x6b567e1cf2e041ec80d7098f031c5c9e/contents/blob/ -o evaluate-v2.0.py

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 4268k  100 4268k    0     0  8271k      0 --:--:-- --:--:-- --:--:-- 8271k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 40.1M  100 40.1M    0     0  13.7M      0  0:00:02  0:00:02 --:--:-- 13.7M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 10547    0 10547    0     0   7268      0 --:--:--  0:00:01 --:--:--  7273


In [12]:
%%writefile make_predictions_pretrained.py
import argparse
import json
import os
from pathlib import Path
from typing import Optional

import torch
import datasets
import transformers.pipelines.base
from datasets import load_dataset, load_from_disk
from transformers import pipeline


def get_predictions_with_pipeline(
        pipe: transformers.pipelines.base.Pipeline,
        dataset: datasets.arrow_dataset.Dataset,
        batch_size: int = 8,
        no_answer_threshold: Optional[float] = None
):
    enforce_threshold = no_answer_threshold is not None
    ids = dataset['id']
    dataset = dataset.remove_columns(
        [col for col in dataset.column_names
         if col not in ['context', 'question']])
    outs = pipe(dataset, batch_size=batch_size)

    predictions = {}
    na_probs = {}
    for id_, out in zip(ids, outs):
        if enforce_threshold:
            predictions[id_] = out['answer'] if 1 - out['score'] < no_answer_threshold else ''
            na_probs[id_] = float(1 - out['score'] >= no_answer_threshold)
        else:
            predictions[id_] = out['answer']
            na_probs[id_] = 1 - out['score']
    return predictions, na_probs


def get_predictions_pretrained(
        model_name: str,
        dataset: datasets.arrow_dataset.Dataset,
        batch_size: int = 8,
        device: Optional[torch.device] = None,
        no_answer_threshold: Optional[float] = None
):
    if device is None:
        device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
    qa_pipe = pipeline('question-answering',
                       model=model_name,
                       tokenizer=model_name,
                       device=device)
    return get_predictions_with_pipeline(
        pipe=qa_pipe,
        dataset=dataset,
        batch_size=batch_size,
        no_answer_threshold=no_answer_threshold,
    )


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_name', type=str, required=True)
    parser.add_argument('--data_path', type=str)
    parser.add_argument('--no_answer_threshold', type=float)
    parser.add_argument('--batch_size', type=int, default=8)
    parser.add_argument('--pred_path', type=str, default='pred.json')
    parser.add_argument('--na_prob_path', type=str, default='na_prob.json')

    parameters = parser.parse_args()
    if parameters.data_path is not None:
        dataset = load_from_disk(parameters.data_path)
    else:
        dataset = load_dataset('squad_v2')['validation']

    predictions, na_probs = get_predictions_pretrained(
        model_name=parameters.model_name,
        dataset=dataset,
        batch_size=parameters.batch_size,
        no_answer_threshold=parameters.no_answer_threshold,
    )
    os.makedirs(Path(parameters.pred_path).parent.resolve(), exist_ok=True)
    with open(parameters.pred_path, 'w') as f:
        json.dump(predictions, f)
    os.makedirs(Path(parameters.pred_path).parent.resolve(), exist_ok=True)
    with open(parameters.na_prob_path, 'w') as f:
        json.dump(na_probs, f)


if __name__ == '__main__':
    main()


Overwriting make_predictions_pretrained.py


In [13]:
!python make_predictions_pretrained.py \
--model_name deepset/tinyroberta-squad2 \
--pred_path pretrained/pred.json \
--na_prob_path pretrained/na_prob.json \
--batch_size 32

100% 2/2 [00:00<00:00, 347.20it/s]


In [16]:
!python evaluate-v2.0.py dev-v2.0.json pretrained/pred.json\
-o pretrained/metrics.json \
-n pretrained/na_prob.json \
-t 0.81 \
-p pretrained/out_images

In [17]:
import json
with open('pretrained/metrics.json') as f:
   pretrained_metrics = json.load(f)
pretrained_metrics

{'exact': 78.75852775204245,
 'f1': 81.3786550477157,
 'total': 11873,
 'HasAns_exact': 74.89878542510121,
 'HasAns_f1': 80.146553876776,
 'HasAns_total': 5928,
 'NoAns_exact': 82.60723296888142,
 'NoAns_f1': 82.60723296888142,
 'NoAns_total': 5945,
 'best_exact': 78.80064010780762,
 'best_exact_thresh': 0.7986318022012711,
 'best_f1': 81.46484977047996,
 'best_f1_thresh': 0.8249981105327606,
 'pr_exact_ap': 68.38289018410978,
 'pr_f1_ap': 75.96575700913677,
 'pr_oracle_ap': 85.97572482524812}