In [1]:
#!pip install transformers
#Import libraries. pip install if library does not exist in your environment
from transformers import BertForQuestionAnswering
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')
import torch
import re
import numpy as np


In [2]:
# Loading BERT model finetuned on SQuAD Question Answer Dataset
model = BertForQuestionAnswering.from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad')

In [6]:
def bert_answering_machine ( question, passage, max_len =  512):
    ''' Function to provide answer from passage for question asked.
        This function takes question as well as the passage 
        It retuns answer from the passage, along with start/end token index for the answer and start/end token scores
        The scores can be used to rank answers if we are searching answers for same question in multiple passages
        Value of maxlen can not exceed 512. If length of question + passage + special tokens is bigger than maxlen, function will truncate extra portion.
        
    '''
  
    #Tokenize input question and passage
    input_ids = tokenizer.encode ( question, passage,  max_length= max_len, truncation=True)  # Tokenizing question and answer
    
    #Getting number of tokens in 1st sentence (question) and 2nd sentence (passage)
    cls_index = input_ids.index(102) #Getting index of first CLS token
    len_question = cls_index + 1       # length of question (1st sentence)
    len_answer = len(input_ids)- len_question  # length of answer (2nd sentence)
    
    
    #BERT need Segment Ids to understand which tokens belong to sentence 1 and which to sentence 2
    segment_ids =  [0]*len_question + [1]*(len_answer)  #Segment ids will be 0 for question and 1 for answers
    
    #Converting token ids to tokens
    tokens = tokenizer.convert_ids_to_tokens(input_ids) 
    
    
    # getting start and end scores for answer 
    start_scores, end_scores = model(torch.tensor([input_ids]), token_type_ids=torch.tensor([segment_ids]) )

    #Converting scores tensors to numpy
    start_scores = start_scores.detach().numpy().flatten()
    end_scores = end_scores.detach().numpy().flatten()
    
    #Getting start and end index of answers based on highest scores
    answer_start_index = np.argmax(start_scores)
    answer_end_index = np.argmax(end_scores)

    #Rounding start and end scores
    start_score = np.round(start_scores[answer_start_index], 2)
    end_score = np.round(end_scores[answer_end_index], 2)
    
   
    #Combining subwords starting with ## so that we can see fullwords in output. Note tokenizer breaks words not in its vocab.
    answer = tokens[answer_start_index]
    for i in range(answer_start_index + 1, answer_end_index + 1):
        if tokens[i][0:2] == '##':
            answer += tokens[i][2:]
        else:
            answer += ' ' + tokens[i]
    
    # Few patterns indicating  BERT does not get answer from passage for question given
    if ( answer_start_index == 0) or (start_score < 0 ) or  (answer == '[SEP]') or ( answer_end_index <  answer_start_index):
        answer = "NO ANSWER FROM BERT"
    
    return ( answer_start_index, answer_end_index, start_score, end_score,  answer)


#Testing function
bert_answering_machine ('Which state john lives', 'My name is John. I live in San Jose, California. I am learning BERT')


(17, 17, 5.72, 6.34, 'california')

In [8]:
passage="John is a 10 years old boy. He is the son of Robert Smith.  Elizabeth Davis is Robert's wife. Sophia Smith is Elizabeth's daughter. She studies at UC Davis"


print('Passage:\n', passage )

question1 ="Who is John's sister"
print ('\nQuestion:\n', question1)
#Getting answer from BERT
_, _ , _ , _, ans  = bert_answering_machine ( question1, passage)
print('\nAnswer from BERT: ', ans ,  '\n')


question2 ="Which college does John's sister attend"
print ('\nQuestion:\n', question2)
#Getting answer from BERT
_, _ , _ , _, ans  = bert_answering_machine ( question2, passage)
print('\nAnswer from BERT: ', ans ,  '\n')

Passage:
 John is a 10 years old boy. He is the son of Robert Smith.  Elizabeth Davis is Robert's wife. Sophia Smith is Elizabeth's daughter. She studies at UC Davis

Question:
 Who is John's sister

Answer from BERT:  sophia smith 


Question:
 Which college does John's sister attend

Answer from BERT:  uc davis 



In [9]:
passage = " The Miami Marlins' home opening game against the Baltimore Orioles tonight has been canceled due to a number of players and staff testing \
positive for Covid-19, according to multiple reports. According to ESPN's Jeff Passan, eight players and two coaches have tested positive for coronavirus, \
bringing the total cases on the team to at least 14.Miami played their first three games of the shortened 60-game season in Philadelphia this past weekend, \
winning two of three.The Marlins did not travel back to Miami after the three-game series ended Sunday as they remained overnight for more testing. \
This is the first game to be canceled this season. CNN has reached out to Major League Baseball and the Marlins for comment."

question1 ="Which 2 teams are part of the game"
print ('\nQuestion:\n', question1)
#Getting answer from BERT
_, _ , _ , _, ans  = bert_answering_machine ( question1, passage)
print('\nAnswer from BERT: ', ans ,  '\n')


question2 ="How many got infected with Coronavirus"
print ('\nQuestion:\n', question2)
#Getting answer from BERT
_, _ , _ , _, ans  = bert_answering_machine ( question2, passage)
print('\nAnswer from BERT: ', ans ,  '\n')


Question:
 Which 2 teams are part of the game

Answer from BERT:  miami marlins ' home opening game against the baltimore orioles 


Question:
 How many got infected with Coronavirus

Answer from BERT:  at least 14 



In [10]:
passage = " Apple has told employees it'll provide them with paid time off to vote in the US presidential election on Nov. 3, according to a report. \
Workers, who wish to vote that Tuesday will be given up to four hours of pay, Bloomberg reported Friday citing an internal Apple memo. \
It follows Twitter in June making Election Day a paid holiday for US employees. For retail team members and hourly workers across the company, \
if you are scheduled to work this Election Day, we will be providing up to four hours of paid time off if you need it to get to the polls, \
said Deirdre O'Brien, Apple's senior vice president of retail and people, in the reported memo. Teams can also use this time to volunteer as an \
election worker at one of your local polling stations. Apple didn't immediately respond to a request for comment. \
Since Election Day in the US falls on a Tuesday, it can be difficult for people to find time outside of work to visit a polling place and vote "

question1 ="When we have Independence Day"
print ('\nQuestion:\n', question1)
#Getting answer from BERT
_, _ , _ , _, ans  = bert_answering_machine ( question1, passage)
print('\nAnswer from BERT: ', ans ,  '\n')

question2 ="What's issue in voting for Apple employees"
print ('\nQuestion:\n', question2)
#Getting answer from BERT
_, _ , _ , _, ans  = bert_answering_machine ( question2, passage)
print('\nAnswer from BERT: ', ans ,  '\n')




Question:
 When we have Independence Day

Answer from BERT:  NO ANSWER FROM BERT 


Question:
 What's issue in voting for Apple employees

Answer from BERT:  it can be difficult for people to find time outside of work to visit a polling place and vote 



In [11]:
passage = "Indian actor Sushant Singh Rajput was found dead in his Mumbai residence on Sunday, according to a statement from Mumbai police. He was 34. \
Mumbai police say he took his own life, and an investigation is underway.\
The actor made his debut in Hindi cinema in 2013 in Kai Po Che!, according to the film site IMDb. He was awarded best actor (male) at the Indian Film Festival of Melbourne in 2017 for his movie - M.S. Dhoni: The Untold Story, in which he played Indian cricket star Mahendra Singh Dhoni.\
He also played the role of Manav in the TV series Pavitra Rishta, or Sacred Ties, from 2009 to 2014.\
Rajput's public relations team released a statement on behalf of the family after his death.\
It pains us to share that Sushant Singh Rajput is no longer with us. We request his fans to keep him in their thoughts and celebrate his life, and his work like they have done so far. We request the media to help us maintain privacy at this moment of grief, the statement said. \
Indian Prime Minister Narendra Modi tweeted his condolences on Sunday. \
 Singh Rajput...a bright young actor gone too soon. He excelled on TV and in films. His rise in the world of entertainment inspired many and he leaves behind several memorable performances. Shocked by his passing away. My thoughts are with his family and fans. Om Shanti. "

question1 ="Which is first film of Sushant"
print ('\nQuestion:\n', question1)
#Getting answer from BERT
_, _ , _ , _, ans  = bert_answering_machine ( question1, passage)
print('\nAnswer from BERT: ', ans ,  '\n')

question2 ="Which film Sushant got award"
print ('\nQuestion:\n', question2)
#Getting answer from BERT
_, _ , _ , _, ans  = bert_answering_machine ( question2, passage)
print('\nAnswer from BERT: ', ans ,  '\n')

question3 ="Who supported Sushant"
print ('\nQuestion:\n', question3)
#Getting answer from BERT
_, _ , _ , _, ans  = bert_answering_machine ( question3, passage)
print('\nAnswer from BERT: ', ans ,  '\n')



Question:
 Which is first film of Sushant

Answer from BERT:  kai po che ! 


Question:
 Which film Sushant got award

Answer from BERT:  m . s . dhoni : the untold story 


Question:
 Who supported Sushant

Answer from BERT:  indian prime minister narendra modi 

