# Performance 

Of a few models from HuggingFace

In [1]:
from datasets import load_dataset
from evaluate import evaluator

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = load_dataset("squad", split="validation[9000:10000]")
data

Dataset({
    features: ['id', 'title', 'context', 'question', 'answers'],
    num_rows: 1000
})

In [3]:
task_evaluator = evaluator("question-answering")

In [4]:
models = ["distilbert-base-uncased-distilled-squad", 
          "huggingface-course/bert-finetuned-squad"]

In [5]:
# takes ~20 seconds each for batch of 1000 records
for m in models:
    results = task_evaluator.compute(
        model_or_pipeline=m,
        data=data,
        metric="squad",
        squad_v2_format=False
    )
    print(m)
    print(results)



distilbert-base-uncased-distilled-squad
{'exact_match': 80.0, 'f1': 87.86786542528225, 'total_time_in_seconds': 34.002620374998514, 'samples_per_second': 29.40949812018844, 'latency_in_seconds': 0.034002620374998516}
huggingface-course/bert-finetuned-squad
{'exact_match': 82.7, 'f1': 89.8188956890165, 'total_time_in_seconds': 61.45898283299903, 'samples_per_second': 16.271014486479142, 'latency_in_seconds': 0.06145898283299902}
