In [None]:
!pip install transformers datasets evaluate

Collecting datasets
  Downloading datasets-2.18.0-py3-none-any.whl (510 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.5/510.5 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.1-py3-none-any.whl (84 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m8.1 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━

In [None]:
from datasets import load_dataset

squad = load_dataset("squad")

Downloading readme:   0%|          | 0.00/7.62k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

In [None]:
train = squad['train']
val = squad['validation']

print('Features:', train.column_names)
print('No of train rows:', train.num_rows)
print('No of validation rows:', val.num_rows)

Features: ['id', 'title', 'context', 'question', 'answers']
No of train rows: 87599
No of validation rows: 10570


We dont have the test set for Squad-1.0, as it is unreleased. So we are going to split the train set into train and test sets:

In [None]:
train = train.train_test_split(test_size=0.2, seed=42)
test = train['test']
train = train['train']
print('No of train rows:', train.num_rows)
print('No of test rows:', test.num_rows)

No of train rows: 70079
No of test rows: 17520


Example row:

In [None]:
train[0]

{'id': '57319266497a881900249052',
 'title': 'Muammar_Gaddafi',
 'context': 'Gaddafi remained a controversial and divisive figure on the world stage throughout his life and after death. Supporters praised Gaddafi\'s administration for the creation of an almost classless society through domestic reform. They stress the regime\'s achievements in combating homelessness and ensuring access to food and safe drinking water. Highlighting that under Gaddafi, all Libyans enjoyed free education to a university level, they point to the dramatic rise in literacy rates after the 1969 revolution. Supporters have also applauded achievements in medical care, praising the universal free healthcare provided under the Gaddafist administration, with diseases like cholera and typhoid being contained and life expectancy raised. Biographers Blundy and Lycett believed that under the first decade of Gaddafi\'s leadership, life for most Libyans "undoubtedly changed for the better" as material conditions and wea

In [None]:
import numpy as np
context_lengths = np.array(list(map(len, train['context'])) + list(map(len, test['context'])) + list(map(len, val['context'])))
question_lengths = np.array(list(map(len, train['question'])) + list(map(len, test['question'])) + list(map(len, val['question'])))
answer_lengths = np.array(list(map(lambda x: len(x['text']), train['answers'])) + list(map(lambda x: len(x['text']), test['answers'])) + list(map(lambda x: len(x['text']), val['answers'])))

In [None]:
print('Context length:')
print(f'Average: {context_lengths.mean()}, Median: {np.median(context_lengths)}, Min: {context_lengths.min()}, Max: {context_lengths.max()}')
print('Question length:')
print(f'Average: {question_lengths.mean()}, Median: {np.median(question_lengths)}, Min: {question_lengths.min()}, Max: {question_lengths.max()}')
print('Answer length:')
print(f'Average: {answer_lengths.mean()}, Median: {np.median(answer_lengths)}, Min: {answer_lengths.min()}, Max: {answer_lengths.max()}')

Context length:
Average: 757.0149945502144, Median: 694.0, Min: 151, Max: 4063
Question length:
Average: 59.618790045737455, Median: 56.0, Min: 1, Max: 25651
Answer length:
Average: 1.246065458545977, Median: 1.0, Min: 1, Max: 6


Preprocessing:
1. Some examples in a dataset may have a very long `context` that exceeds the maximum input length of the model. To deal with longer sequences, truncate only the `context` by setting `truncation="only_second"`.
2. Next, map the start and end positions of the answer to the original `context` by setting
   `return_offset_mapping=True`.
3. With the mapping in hand, now you can find the start and end tokens of the answer. Use the [sequence_ids](https://huggingface.co/docs/tokenizers/main/en/api/encoding#tokenizers.Encoding.sequence_ids) method to
   find which part of the offset corresponds to the `question` and which corresponds to the `context`.

One example of the preprocessing involved:

In [None]:
example = train[10:12]
example

{'id': ['570b92846b8089140040f99b', '572767ee5951b619008f897f'],
 'title': ['Infrared', 'Carnival'],
 'context': ['The discovery of infrared radiation is ascribed to William Herschel, the astronomer, in the early 19th century. Herschel published his results in 1800 before the Royal Society of London. Herschel used a prism to refract light from the sun and detected the infrared, beyond the red part of the spectrum, through an increase in the temperature recorded on a thermometer. He was surprised at the result and called them "Calorific Rays". The term \'Infrared\' did not appear until late in the 19th century.',
  'Some of the best-known traditions, including carnal parades and masquerade balls, were first recorded in medieval Italy. The carnival of Venice was, for a long time, the most famous carnival (although Napoleon abolished it in 1797 and only in 1979 was the tradition restored). From Italy, Carnival traditions spread to Spain, Portugal and France and from France to New France i

In [None]:
print(example["context"][0])
print(example["context"][1])

The discovery of infrared radiation is ascribed to William Herschel, the astronomer, in the early 19th century. Herschel published his results in 1800 before the Royal Society of London. Herschel used a prism to refract light from the sun and detected the infrared, beyond the red part of the spectrum, through an increase in the temperature recorded on a thermometer. He was surprised at the result and called them "Calorific Rays". The term 'Infrared' did not appear until late in the 19th century.
Some of the best-known traditions, including carnal parades and masquerade balls, were first recorded in medieval Italy. The carnival of Venice was, for a long time, the most famous carnival (although Napoleon abolished it in 1797 and only in 1979 was the tradition restored). From Italy, Carnival traditions spread to Spain, Portugal and France and from France to New France in North America. From Spain and Portugal it spread with colonization to the Caribbean and Latin America. In the early 19th

In [None]:
questions = [q.strip() for q in example["question"]]
questions

['In what year did Herschel publish his work on infrared radiation?',
 'Who dismissed the petition of the Jewish community to stop the abuse of them?']

Using distillBERT tokenizer:

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

In [None]:
print('Max no of input tokens:', tokenizer.model_max_length)

Max no of input tokens: 512


Why truncation=only_second?:                  
Truncate to a maximum length specified with the argument max_length or to the maximum acceptable input length for the model if that argument is not provided. This will only truncate the second sequence of a pair if a pair of sequences (or a batch of pairs) is provided.       
So only context will be truncated.

In [None]:
inputs = tokenizer(
    questions,
    example["context"],
    truncation="only_second",
    return_offsets_mapping=True,
    padding="max_length"
)

In [None]:
inputs.keys()

dict_keys(['input_ids', 'attention_mask', 'offset_mapping'])

What is an attention mask?:       
The attention mask is a binary tensor indicating the position of the padded indices so that the model does not attend to them. For the BertTokenizer , 1 indicates a value that should be attended to, while 0 indicates a padded value.

In [None]:
inputs['attention_mask'][0]

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


What is offset mapping?                  
Offset mapping is a mapping between the tokens generated by the tokenizer and their corresponding character positions in the original sentence. It essentially creates a link between the tokenized output and the original text.         
Offset mapping is typically returned as a list of tuples. Each tuple represents a single token and contains two elements:

- Start Offset: This is the character position where the token begins in the original sentence.
- End Offset: This is the character position where the token ends in the original sentence (excluding the character at the end position itself).


Offset mapping of (0,0) indicates that token doesn't exist in original sentence.

In [None]:
inputs['offset_mapping'][0]

[(0, 0),
 (0, 2),
 (3, 7),
 (8, 12),
 (13, 16),
 (17, 21),
 (21, 25),
 (26, 33),
 (34, 37),
 (38, 42),
 (43, 45),
 (46, 54),
 (55, 64),
 (64, 65),
 (0, 0),
 (0, 3),
 (4, 13),
 (14, 16),
 (17, 25),
 (26, 35),
 (36, 38),
 (39, 47),
 (48, 50),
 (51, 58),
 (59, 63),
 (63, 67),
 (67, 68),
 (69, 72),
 (73, 83),
 (83, 84),
 (85, 87),
 (88, 91),
 (92, 97),
 (98, 102),
 (103, 110),
 (110, 111),
 (112, 116),
 (116, 120),
 (121, 130),
 (131, 134),
 (135, 142),
 (143, 145),
 (146, 150),
 (151, 157),
 (158, 161),
 (162, 167),
 (168, 175),
 (176, 178),
 (179, 185),
 (185, 186),
 (187, 191),
 (191, 195),
 (196, 200),
 (201, 202),
 (203, 208),
 (209, 211),
 (212, 215),
 (215, 218),
 (218, 219),
 (220, 225),
 (226, 230),
 (231, 234),
 (235, 238),
 (239, 242),
 (243, 251),
 (252, 255),
 (256, 264),
 (264, 265),
 (266, 272),
 (273, 276),
 (277, 280),
 (281, 285),
 (286, 288),
 (289, 292),
 (293, 301),
 (301, 302),
 (303, 310),
 (311, 313),
 (314, 322),
 (323, 325),
 (326, 329),
 (330, 341),
 (342, 350),


In [None]:
print(len(inputs['offset_mapping'][0]), len(inputs['attention_mask'][0]))

512 512


Now using offset mapping to convert start and end position of answers in original context to start and end tokens:

In [None]:
answers = example["answers"]
answers

[{'text': ['1800'], 'answer_start': [146]},
 {'text': ['Pope Gregory XVI'], 'answer_start': [928]}]

What is inputs.sequence_ids:                     
Return a list mapping the tokens to the id of their original sentences:

None for special tokens added around or between sequences,
0 for tokens corresponding to words in the first sequence,
1 for tokens corresponding to words in the second sequence when a pair of sequences was jointly encoded.


In [None]:
offset_mapping = inputs.pop("offset_mapping")
start_positions = []
end_positions = []

for i, offset in enumerate(offset_mapping):
    answer = answers[i]
    start_char = answer["answer_start"][0]
    end_char = answer["answer_start"][0] + len(answer["text"][0])
    sequence_ids = inputs.sequence_ids(i)

    # Find the start and end of the context
    idx = 0
    while sequence_ids[idx] != 1:
        idx += 1
    context_start = idx
    while sequence_ids[idx] == 1:
        idx += 1
    context_end = idx - 1

    # If the answer is not fully inside the context, label it (0, 0)
    if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
        start_positions.append(0)
        end_positions.append(0)
    else:
        # Otherwise it's the start and end token positions
        idx = context_start
        while idx <= context_end and offset[idx][0] <= start_char:
            idx += 1
        start_positions.append(idx - 1)

        idx = context_end
        while idx >= context_start and offset[idx][1] >= end_char:
            idx -= 1
        end_positions.append(idx + 1)

inputs["start_positions"] = start_positions
inputs["end_positions"] = end_positions

This is what a single tokenized encoded input to the model looks like:

In [None]:
inputs[0]

Encoding(num_tokens=512, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing])

In [None]:
# tokens
inputs[0].tokens

['[CLS]',
 'in',
 'what',
 'year',
 'did',
 'hers',
 '##chel',
 'publish',
 'his',
 'work',
 'on',
 'infrared',
 'radiation',
 '?',
 '[SEP]',
 'the',
 'discovery',
 'of',
 'infrared',
 'radiation',
 'is',
 'ascribed',
 'to',
 'william',
 'hers',
 '##chel',
 ',',
 'the',
 'astronomer',
 ',',
 'in',
 'the',
 'early',
 '19th',
 'century',
 '.',
 'hers',
 '##chel',
 'published',
 'his',
 'results',
 'in',
 '1800',
 'before',
 'the',
 'royal',
 'society',
 'of',
 'london',
 '.',
 'hers',
 '##chel',
 'used',
 'a',
 'prism',
 'to',
 'ref',
 '##rac',
 '##t',
 'light',
 'from',
 'the',
 'sun',
 'and',
 'detected',
 'the',
 'infrared',
 ',',
 'beyond',
 'the',
 'red',
 'part',
 'of',
 'the',
 'spectrum',
 ',',
 'through',
 'an',
 'increase',
 'in',
 'the',
 'temperature',
 'recorded',
 'on',
 'a',
 'the',
 '##rm',
 '##ometer',
 '.',
 'he',
 'was',
 'surprised',
 'at',
 'the',
 'result',
 'and',
 'called',
 'them',
 '"',
 'cal',
 '##ori',
 '##fi',
 '##c',
 'rays',
 '"',
 '.',
 'the',
 'term',
 "'

In [None]:
print('Index of start token of answer:', inputs['start_positions'][0])
print('Index of end token of answer:', inputs['end_positions'][0])

Index of start token of answer: 42
Index of end token of answer: 42


In [None]:
print('Context:', example['context'][0])
print('Question:', example['question'][0])
print('Answer tokens:', inputs[0].tokens[inputs['start_positions'][0]: inputs['end_positions'][0] + 1])

Context: The discovery of infrared radiation is ascribed to William Herschel, the astronomer, in the early 19th century. Herschel published his results in 1800 before the Royal Society of London. Herschel used a prism to refract light from the sun and detected the infrared, beyond the red part of the spectrum, through an increase in the temperature recorded on a thermometer. He was surprised at the result and called them "Calorific Rays". The term 'Infrared' did not appear until late in the 19th century.
Question: In what year did Herschel publish his work on infrared radiation?
Answer tokens: ['1800']


Creating a function for this preprocessing task:

In [None]:
def preprocess_function(examples):
    questions = [q.strip() for q in examples["question"]]
    inputs = tokenizer(
        questions,
        examples["context"],
        truncation="only_second",
        return_offsets_mapping=True,
        padding="max_length",
    )

    offset_mapping = inputs.pop("offset_mapping")
    answers = examples["answers"]
    start_positions = []
    end_positions = []

    for i, offset in enumerate(offset_mapping):
        answer = answers[i]
        start_char = answer["answer_start"][0]
        end_char = answer["answer_start"][0] + len(answer["text"][0])
        sequence_ids = inputs.sequence_ids(i)

        # Find the start and end of the context
        idx = 0
        while sequence_ids[idx] != 1:
            idx += 1
        context_start = idx
        while sequence_ids[idx] == 1:
            idx += 1
        context_end = idx - 1

        # If the answer is not fully inside the context, label it (0, 0)
        if offset[context_start][0] > end_char or offset[context_end][1] < start_char:
            start_positions.append(0)
            end_positions.append(0)
        else:
            # Otherwise it's the start and end token positions
            idx = context_start
            while idx <= context_end and offset[idx][0] <= start_char:
                idx += 1
            start_positions.append(idx - 1)

            idx = context_end
            while idx >= context_start and offset[idx][1] >= end_char:
                idx -= 1
            end_positions.append(idx + 1)

    inputs["start_positions"] = start_positions
    inputs["end_positions"] = end_positions
    return inputs

Applying the preprocessing function to the dataset:

In [None]:
tokenized_train = train.map(preprocess_function, batched=True, remove_columns=train.column_names)

Map:   0%|          | 0/70079 [00:00<?, ? examples/s]

In [None]:
tokenized_test = test.map(preprocess_function, batched=True, remove_columns=test.column_names)

Map:   0%|          | 0/17520 [00:00<?, ? examples/s]

In [None]:
tokenized_val = val.map(preprocess_function, batched=True, remove_columns=val.column_names)

Map:   0%|          | 0/10570 [00:00<?, ? examples/s]

In [None]:
from transformers import DefaultDataCollator

data_collator = DefaultDataCollator()

In [None]:
from transformers import AutoModelForQuestionAnswering, TrainingArguments, Trainer

model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased")

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForQuestionAnswering: ['vocab_projector.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_transform.bias']
- This IS expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForQuestionAnswering were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to

In [None]:
training_args = TrainingArguments(
    output_dir="model1",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=1,
    weight_decay=0.01,
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

In [None]:
trainer.train()



Epoch,Training Loss,Validation Loss
1,1.2855,1.168892


TrainOutput(global_step=4380, training_loss=1.45898916862871, metrics={'train_runtime': 3643.6927, 'train_samples_per_second': 19.233, 'train_steps_per_second': 1.202, 'total_flos': 9156038597142528.0, 'train_loss': 1.45898916862871, 'epoch': 1.0})

Saving the model:

In [None]:
model.save_pretrained('model-nlp')

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
model.push_to_hub('nlp-exp-910')



pytorch_model.bin:   0%|          | 0.00/265M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model.bin:   0%|          | 0.00/265M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/saadbelgi/nlp-exp-910/commit/e2c73e0da282480d479f805379dd563acaaeebcb', commit_message='Upload DistilBertForQuestionAnswering', commit_description='', oid='e2c73e0da282480d479f805379dd563acaaeebcb', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
tokenizer.push_to_hub('nlp-exp-910')



CommitInfo(commit_url='https://huggingface.co/saadbelgi/nlp-exp-910/commit/fe3372ae24d52ebf9ba201c4534286682db3d1e6', commit_message='Upload tokenizer', commit_description='', oid='fe3372ae24d52ebf9ba201c4534286682db3d1e6', pr_url=None, pr_revision=None, pr_num=None)

Evaluation:

In [None]:
from evaluate import load
metric = load("squad")

Downloading builder script:   0%|          | 0.00/4.53k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.32k [00:00<?, ?B/s]

In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("saadbelgi/nlp-exp-910")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/320 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/712k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

In [None]:
from transformers import AutoModelForQuestionAnswering
model = AutoModelForQuestionAnswering.from_pretrained("saadbelgi/nlp-exp-910")

config.json:   0%|          | 0.00/561 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/265M [00:00<?, ?B/s]

In [None]:
def predict(question, context):
  inputs = tokenizer(question, context, return_tensors="pt")
  outputs = model(**inputs)
  answer_start_index = outputs.start_logits.argmax()
  answer_end_index = outputs.end_logits.argmax()
  predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
  return tokenizer.decode(predict_answer_tokens)

In [None]:
test[0]

{'id': '573173d8497a881900248f0c',
 'title': 'Egypt',
 'context': 'The Pew Forum on Religion & Public Life ranks Egypt as the fifth worst country in the world for religious freedom. The United States Commission on International Religious Freedom, a bipartisan independent agency of the US government, has placed Egypt on its watch list of countries that require close monitoring due to the nature and extent of violations of religious freedom engaged in or tolerated by the government. According to a 2010 Pew Global Attitudes survey, 84% of Egyptians polled supported the death penalty for those who leave Islam; 77% supported whippings and cutting off of hands for theft and robbery; and 82% support stoning a person who commits adultery.',
 'question': 'What percentage of Egyptians polled support death penalty for those leaving Islam?',
 'answers': {'text': ['84%'], 'answer_start': [468]}}

In [None]:
predict(test[0]['question'], test[0]['context'])

'84 %'

In [None]:
import torch

predictions = []
omitted_indices = []
model = model.to('cuda')
references = [del]
try:
  for (j, i) in enumerate(test):
    question = i['question']
    context = i['context']
    inputs = tokenizer(question, context, return_tensors="pt").to('cuda')
    outputs = model(**inputs)
    answer_start_index = outputs.start_logits.argmax()
    answer_end_index = outputs.end_logits.argmax()
    predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
    # start_scores, end_scores = outputs.start_logits, outputs.end_logits
    # predicted_answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(torch.argmax(start_scores, dim=-1), torch.argmax(end_scores, dim=-1)))
    predictions.append(tokenizer.decode(predict_answer_tokens))
    references.append()
except:
  omitted_indices.append(j)

In [None]:
predictions

['84 %',
 'booksellers and books',
 'the executive',
 'anjiro',
 'loops',
 '2. 2 billion',
 'military governor',
 'the brown men',
 'resources',
 'honey ants',
 'the cossacks',
 'verdigris',
 '1970s',
 'sun jiadong',
 'his house master',
 'nine',
 'echiurans and sipunculan',
 'religion',
 'paralyzes muscles',
 'cardinal',
 'cbs television city in los angeles',
 '1720 and 1734',
 '1, 032, 949',
 'local fraternities of stonemasons',
 'north zhejiang',
 'over 70',
 'german football federation',
 'sony announced dualshock 3 ( trademarked dualshock 3 ), a playstation 3 controller with the same function and design as sixaxis, but with vibration capability included. hands - on accounts describe the controller as being noticeably heavier than the standard sixaxis controller and capable of vibration forces comparable to dualshock 2. it was released in japan',
 '150 to 300 mm',
 '2003',
 'because process theologians are so diverse and transdisciplinary',
 'popularity and skepticism',
 '2 %',
 '1