In [1]:
import pandas as pd
import torch
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

In [2]:
QA_input = [{'question': 'How many programming languages does BLOOM support?',
             'context' : 'BLOOM has 176 billion parameters, BLOOM is able to generate text in 46 natural languages and 13 programming languages.'},
            {'question': 'Why is model conversion important?',
             'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.'},
            {'question': 'What are the key responsabilities?',
             'context': '''
             Senior Embedded Software Engineer sought after by an expert in developing cutting-edge RF, microwave, and mmWave technologies.

              As a Senior Embedded Software Engineer, you'll have the opportunity to develop intricate solutions for captivating endeavours that encompass sectors like Telecommunications Infrastructure, Critical Communications and Aerospace/Space.

              If you are a Senior Embedded Software Engineer and sense that your creative potential is constrained, then this is the ideal opportunity for you. Here, you'll have the chance to engage in a diverse array of projects thanks to the multitude of global clients they serve.

              Key Responsibilities:

              Embedded C/C++
              Bare-metal / Bootloader / U-Boot
              RTOS/FreeRTOS

              For security purposes, you must be eligible for SC Clearance

              If you're interested in learning more about this exceptional opportunity, reach out to Danny Beecroft at IC Resources.'''}]

In [3]:
model_name = 'deepset/roberta-base-squad2'

Method 1

In [4]:
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [5]:
inputs0 = tokenizer(QA_input[0]['question'], QA_input[0]['context'], return_tensors='pt')
output0 = model(**inputs0)

In [6]:
inputs1 = tokenizer(QA_input[1]['question'], QA_input[1]['context'], return_tensors='pt')
output1 = model(**inputs1)

In [7]:
inputs2 = tokenizer(QA_input[2]['question'], QA_input[2]['context'], return_tensors='pt')
output2 = model(**inputs2)

In [8]:
output0

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[  2.1341,  -9.2678,  -9.0902,  -9.5148,  -9.9579,  -9.6713,  -9.5384,
          -9.8904, -10.0625,  -9.6074,  -9.9357,  -9.7291,  -9.7478,  -5.3833,
          -7.8231,  -8.5859,  -7.7111,  -2.4021,  -7.0713,  -6.8286,  -8.0475,
          -3.7740,  -7.1794,  -8.2997,  -7.4724,  -6.7086,  -7.4467,  -6.3232,
          -6.3538,  -5.9786,  -0.1657,  -5.1392,  -6.4773,  -5.0963,   7.0305,
          -4.4435,  -4.7365,  -4.7513,  -9.5651]], grad_fn=<CloneBackward0>), end_logits=tensor([[ 2.6057, -8.2150, -7.7487, -8.0617, -7.2629, -7.9822, -8.2034, -7.7669,
         -7.1772, -7.5019, -7.2047, -7.7352, -7.7530, -8.6603, -8.6695, -6.5531,
         -8.8167, -3.1745, -6.0737, -5.6397, -6.1613, -7.8476, -8.0833, -5.3424,
         -8.6863, -8.5810, -8.6701, -8.3157, -6.4040, -6.9607, -0.1846, -5.2936,
         -2.9321, -6.6288,  7.4622, -0.8284,  2.5859,  0.1496, -8.0390]],
       grad_fn=<CloneBackward0>), hidden_states=None, attentions=

In [9]:
answer_start_idx = torch.argmax(output0.start_logits)
answer_end_idx = torch.argmax(output0.end_logits)

answer_tokens = inputs0.input_ids[0, answer_start_idx: answer_end_idx + 1]
answer = tokenizer.decode(answer_tokens)
print("question: {}\nanswer: {}".format(QA_input[0]['question'], answer))

question: How many programming languages does BLOOM support?
answer:  13


In [10]:
output2

QuestionAnsweringModelOutput(loss=None, start_logits=tensor([[ 2.4696, -8.7165, -9.3362, -9.1561, -8.6623, -8.5286, -9.6264, -9.8025,
         -9.2953, -9.0738, -2.7862, -4.4684, -5.0018, -5.6218, -5.6525, -5.6242,
         -5.5909, -5.7339, -5.9586, -6.0620, -6.0880, -6.0728, -5.7667, -3.7222,
         -6.7972, -8.4619, -8.1601, -8.2489, -7.1260, -9.0762, -8.4882, -7.6637,
         -7.2542, -8.1110, -5.3226, -4.2663, -8.1566, -7.5099, -5.3129, -8.6934,
         -6.9070, -9.0393, -8.5723, -6.6231, -8.3778, -6.8358, -7.2430, -1.4065,
         -3.9166, -4.6505, -5.6852, -5.7012, -5.6472, -5.6742, -5.8375, -5.9592,
         -6.1104, -6.2380, -6.2929, -6.0424, -5.3851, -5.1946, -7.6683, -5.6413,
         -7.5875, -8.6912, -8.4220, -8.6689, -9.0514, -5.7001, -8.3736, -7.1103,
         -6.1159, -5.6111, -6.4596, -4.4880, -5.1615, -5.3911, -7.5417, -5.6260,
         -8.0467, -6.9243, -8.3828, -8.4066, -7.8817, -5.8914, -8.8078, -4.5961,
         -7.5314, -8.3544, -5.9047, -8.1234, -8.3838, -6

In [11]:
answer2_start_idx = torch.argmax(output2.start_logits)
answer2_end_idx = torch.argmax(output2.end_logits)

answer2_tokens = inputs2.input_ids[0, answer2_start_idx: answer2_end_idx + 1]
answer2 = tokenizer.decode(answer2_tokens)
print("question: {}\nanswer: {}".format(QA_input[2]['question'], answer2))

question: What are the key responsabilities?
answer: 


Method 2

In [12]:
qa = pipeline('question-answering', model=model_name, tokenizer=model_name)

In [13]:
output_0 = qa(QA_input[0]['question'], QA_input[0]['context'])
print(output_0)

{'score': 0.9752431511878967, 'start': 93, 'end': 95, 'answer': '13'}


In [14]:
output_1 = qa(QA_input[1]['question'], QA_input[1]['context'])
print(output_1)

{'score': 0.21171429753303528, 'start': 59, 'end': 84, 'answer': 'gives freedom to the user'}
