In [1]:
from transformers import BertTokenizer, BertForQuestionAnswering
import torch
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad').to('cuda')

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.weight', 'bert.pooler.dense.bias']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [2]:
question_context_pairs = [
    {
        "question": "Who developed the theory of relativity?",
        "context": "The theory of relativity, developed by Albert Einstein, revolutionized theoretical physics and astronomy during the 20th century."
    },
    {
        "question": "What is the capital of France?",
        "context": "France is a country whose territory consists of metropolitan France in Western Europe, as well as several overseas regions and territories. The capital of France is Paris."
    },
    {
        "question": "When was the Python programming language released?",
        "context": "Python is a high-level, interpreted programming language. Its design philosophy emphasizes code readability. Python was created by Guido van Rossum and first released in 1991."
    },
    {
        "question": "What causes COVID-19?",
        "context": "COVID-19 is an infectious disease caused by the most recently discovered coronavirus. This new virus and disease were unknown before the outbreak began in Wuhan, China, in December 2019."
    },
    {
        "question": "Where is the Great Barrier Reef located?",
        "context": "The Great Barrier Reef is the world's largest coral reef system composed of over 2,900 individual reefs and 900 islands stretching for over 2,300 kilometers. The reef is located in the Coral Sea, off the coast of Queensland, Australia."
    }
]

# Example of how to access a pair
question = question_context_pairs[1]["question"]
print(question)
context = question_context_pairs[1]["context"]
print(context)

What is the capital of France?
France is a country whose territory consists of metropolitan France in Western Europe, as well as several overseas regions and territories. The capital of France is Paris.


In [17]:
inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt").to('cuda')
print(inputs)
print(inputs['input_ids'].shape)

{'input_ids': tensor([[ 101, 2054, 2003, 1996, 3007, 1997, 2605, 1029,  102, 2605, 2003, 1037,
         2406, 3005, 3700, 3774, 1997, 4956, 2605, 1999, 2530, 2885, 1010, 2004,
         2092, 2004, 2195, 6931, 4655, 1998, 6500, 1012, 1996, 3007, 1997, 2605,
         2003, 3000, 1012,  102]], device='cuda:0'), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]], device='cuda:0')}
torch.Size([1, 40])


In [18]:
outputs = model(**inputs)
print(outputs)

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[-6.7909, -4.1555, -8.0475, -6.8801, -6.5031, -7.0851, -6.4636, -9.0271,
         -6.7907, -1.9230, -7.1330, -6.6507, -5.7740, -7.6736, -6.1460, -6.8851,
         -7.1528, -4.3491, -4.3381, -7.0451, -5.2822, -4.8498, -6.9500, -7.3055,
         -7.7678, -7.9914, -6.0956, -6.1501, -6.7638, -7.9117, -6.2149, -6.7906,
          2.9316,  0.8565, -2.8644,  0.1962, -3.5073,  6.8515, -5.4005, -6.7913]],
       device='cuda:0', grad_fn=<CloneBackward0>), end_logits=tensor([[-1.5065, -3.0832, -6.0236, -6.7441, -5.3194, -5.3763, -3.5639, -5.3751,
         -1.5063, -1.0769, -4.8500, -6.1928, -4.7044, -6.1315, -5.5946, -6.0360,
         -6.2464, -4.3485, -1.7541, -5.8366, -4.8620, -2.3913, -2.5722, -6.0619,
         -5.9968, -6.2176, -6.2443, -5.2960, -4.7480, -6.8405, -3.7108, -1.5061,
         -2.6234, -1.5669, -3.6511,  0.2896, -3.3590,  7.7160,  3.3112, -1.5077]],
       device='cuda:0', grad_fn=<CloneBackward0>), hidden_states=None, 

In [19]:
print(outputs.start_logits.argmax())

tensor(37, device='cuda:0')


In [20]:
print(outputs.end_logits.argmax())

tensor(37, device='cuda:0')


In [23]:
tokenizer.decode(inputs['input_ids'].squeeze()[outputs.start_logits.argmax():outputs.end_logits.argmax()+1])

'paris'