## HuggingFace Transformers for QA

### Installing dependences

In [2]:
#!python -m pip install --upgrade pip
#!pip install tensorflow

In [3]:
#!pip install transformers[tf-cpu]

In [4]:
#!pip install torch

In [5]:
#!pip install transformers[sentencepiece]

### Imports

In [4]:
from transformers import pipeline

In [5]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
import torch as pt

### Simple QA

In [9]:
text = r"""
    🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
    architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
    Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
    TensorFlow 2.0 and PyTorch.
    """

In [10]:
questions = [
    "How many pretrained models are available in 🤗 Transformers?",
    "How many languages are available in 🤗 Transformers?",
    "What does 🤗 Transformers provide?",
     "🤗 Transformers provides interoperability between which frameworks?",
]

In [5]:
qanlp = pipeline('question-answering')

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

In [6]:
result = qanlp(question=questions, context=text)
for res in result:
    print(f"Answer: '{res['answer']}', score: {round(res['score'], 4)}, start: {res['start']}, end: {res['end']}")

Answer: 'over 32+', score: 0.5266, start: 268, end: 276
Answer: '100+', score: 0.5444, start: 298, end: 302
Answer: 'general-purpose
    architectures', score: 0.9541, start: 98, end: 131
Answer: 'TensorFlow 2.0 and PyTorch', score: 0.8424, start: 351, end: 377


#### Selected model (example)

In [7]:
MODEL = 'csarron/bert-base-uncased-squad-v1'
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForQuestionAnswering.from_pretrained(MODEL)

In [8]:
for question in questions:
    inputs = tokenizer(question, text, add_special_tokens=True, return_tensors="pt")
    input_ids = inputs["input_ids"].tolist()[0]

    text_tokens = tokenizer.convert_ids_to_tokens(input_ids)
    outputs = model(**inputs)
    answer_start_scores = outputs.start_logits
    answer_end_scores = outputs.end_logits
    
    answer_start = pt.argmax(answer_start_scores)  # Get the most likely beginning of answer with the argmax of the score
    answer_end = pt.argmax(answer_end_scores) + 1  # Get the most likely end of answer with the argmax of the score
    
    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    start_predictions = pt.softmax(answer_start_scores, dim=1).tolist()[0]
    end_predictions = pt.softmax(answer_end_scores, dim=1).tolist()[0]

    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print(f"Start Confidence: {start_predictions[answer_start]}")
    print(f"End Confidence: {end_predictions[answer_end]}")

Question: How many pretrained models are available in 🤗 Transformers?
Answer: 32 +
Start Confidence: 0.6249579191207886
End Confidence: 0.0002813107566908002
Question: How many languages are available in 🤗 Transformers?
Answer: 100 +
Start Confidence: 0.8915950059890747
End Confidence: 0.17126747965812683
Question: What does 🤗 Transformers provide?
Answer: general - purpose architectures
Start Confidence: 0.9812468886375427
End Confidence: 7.347790233325213e-05
Question: 🤗 Transformers provides interoperability between which frameworks?
Answer: tensorflow 2. 0 and pytorch
Start Confidence: 0.9922741055488586
End Confidence: 0.09956756979227066


### QA

https://huggingface.co/transformers/task_summary.html#question-answering

In [None]:
# BERT

    # https://huggingface.co/bert-large-uncased-whole-word-masking-finetuned-squad

    #https://huggingface.co/csarron/bert-base-uncased-squad-v1
#MODEL = 'csarron/bert-base-uncased-squad-v1'

    #https://huggingface.co/phiyodr/bert-base-finetuned-squad2
#MODEL = 'phiyodr/bert-base-finetuned-squad2'

    #https://huggingface.co/phiyodr/bert-large-finetuned-squad2
#MODEL = 'phiyodr/bert-large-finetuned-squad2'


In [1]:
# ROBERTA

    #https://huggingface.co/csarron/roberta-base-squad-v1
#MODEL = 'csarron/roberta-base-squad-v1'

    #https://huggingface.co/phiyodr/roberta-large-finetuned-squad2
#MODEL = 'phiyodr/roberta-large-finetuned-squad2'

    #https://huggingface.co/deepset/roberta-base-squad2
#MODEL = 'deepset/roberta-base-squad2'

In [3]:
MODELS = [ 'phiyodr/roberta-large-finetuned-squad2', 'deepset/roberta-base-squad2',
          'phiyodr/bert-large-finetuned-squad2', 'phiyodr/bert-base-finetuned-squad2', 
          'valhalla/t5-base-squad',
          'tli8hf/unqover-bert-base-uncased-newsqa', 'tli8hf/unqover-roberta-base-newsqa',
          'tli8hf/unqover-bert-large-uncased-newsqa', 'tli8hf/unqover-roberta-large-newsqa']

#'bert-large-uncased-whole-word-masking-finetuned-squad',
#'csarron/roberta-base-squad-v1', 'csarron/bert-base-uncased-squad-v1',

In [49]:
MODEL = 'phiyodr/bert-base-finetuned-squad2'

In [50]:
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForQuestionAnswering.from_pretrained(MODEL)

In [51]:
# 'ConvBertForQuestionAnswering', 'LEDForQuestionAnswering', 'DistilBertForQuestionAnswering', 'AlbertForQuestionAnswering', 'CamembertForQuestionAnswering', 'BartForQuestionAnswering', 'MBartForQuestionAnswering', 'LongformerForQuestionAnswering', 'XLMRobertaForQuestionAnswering', 'SqueezeBertForQuestionAnswering', 
# 'FlaubertForQuestionAnsweringSimple', 'MobileBertForQuestionAnswering', 'XLMForQuestionAnsweringSimple', 'ElectraForQuestionAnswering', 'ReformerForQuestionAnswering', 'FunnelForQuestionAnswering', 'LxmertForQuestionAnswering', 'MPNetForQuestionAnswering', 'DebertaForQuestionAnswering'
# 'RobertaForQuestionAnswering', 'BertForQuestionAnswering', 'XLNetForQuestionAnsweringSimple', 
qa_specific = pipeline('question-answering', model=model, tokenizer=tokenizer)

In [52]:
beyonce_question = 'Jay Z and Beyonce attended which event together in August of 2011?'
amazon_question = 'Which name is also used to describe the Amazon rainforest in English?'
question_example = 'When was the Tower Theatre built?'
#response_example = '1939'
context_example = 'The popular neighborhood known as the Tower District is centered around the historic Tower Theatre, which is included on the National List of Historic Places. The theater was built in 1939 and is at Olive and Wishon Avenues in the heart of the Tower District. (The name of the theater refers to a well-known landmark water tower, which is actually in another nearby area). The Tower District neighborhood is just north of downtown Fresno proper, and one-half mile south of Fresno City College. Although the neighborhood was known as a residential area prior, the early commercial establishments of the Tower District began with small shops and services that flocked to the area shortly after World War II. The character of small local businesses largely remains today. To some extent, the businesses of the Tower District were developed due to the proximity of the original Fresno Normal School, (later renamed California State University at Fresno). In 1916 the college moved to what is now the site of Fresno City College one-half mile north of the Tower District.'
amazon_context_example= "The Amazon rainforest (Portuguese: Floresta Amazônica or Amazônia; Spanish: Selva Amazónica, Amazonía or usually Amazonia; French: Forêt amazonienne; Dutch: Amazoneregenwoud), also known in English as Amazonia or the Amazon Jungle, is a moist broadleaf forest that covers most of the Amazon basin of South America. This basin encompasses 7,000,000 square kilometres (2,700,000 sq mi), of which 5,500,000 square kilometres (2,100,000 sq mi) are covered by the rainforest. This region includes territory belonging to nine nations. The majority of the forest is contained within Brazil, with 60% of the rainforest, followed by Peru with 13%, Colombia with 10%, and with minor amounts in Venezuela, Ecuador, Bolivia, Guyana, Suriname and French Guiana. States or departments in four nations contain 'Amazonas' in their names. The Amazon represents over half of the planet's remaining rainforests, and comprises the largest and most biodiverse tract of tropical rainforest in the world, with an estimated 390 billion individual trees divided into 16,000 species."
beyonce_context= 'In August, the couple attended the 2011 MTV Video Music Awards, at which Beyoncé performed "Love on Top" and started the performance saying "Tonight I want you to stand up on your feet, I want you to feel the love that\'s growing inside of me". At the end of the performance, she dropped her microphone, unbuttoned her blazer and rubbed her stomach, confirming her pregnancy she had alluded to earlier in the evening. Her appearance helped that year\'s MTV Video Music Awards become the most-watched broadcast in MTV history, pulling in 12.4 million viewers; the announcement was listed in Guinness World Records for "most tweets per second recorded for a single event" on Twitter, receiving 8,868 tweets per second and "Beyonce pregnant" was the most Googled term the week of August 29, 2011.'

In [53]:
print(qa_specific(question=question_example, context=context_example))
print(qa_specific(question=amazon_question, context=amazon_context_example))
print(qa_specific(question=beyonce_question, context=beyonce_context))

{'score': 0.9739360213279724, 'start': 184, 'end': 188, 'answer': '1939'}
{'score': 0.6646409034729004, 'start': 201, 'end': 230, 'answer': 'Amazonia or the Amazon Jungle'}
{'score': 0.5714967846870422, 'start': 40, 'end': 62, 'answer': 'MTV Video Music Awards'}


In [11]:
result = qa_specific(question=questions, context=text)
for res in result:
    print(f"Answer: '{res['answer']}', score: {round(res['score'], 4)}, start: {res['start']}, end: {res['end']}")

Answer: 'over 32+', score: 0.6992, start: 268, end: 276
Answer: '100+', score: 0.4942, start: 298, end: 302
Answer: 'general-purpose
    architectures', score: 0.2448, start: 98, end: 131
Answer: 'TensorFlow 2.0 and PyTorch.', score: 0.9836, start: 351, end: 378


In [5]:
# XLNET - No funciona

#https://huggingface.co/jkgrad/xlnet-base-squadv2
#MODEL = 'jkgrad/xlnet-base-squadv2'

#MODEL = 'jkgrad/xlnet-base-cased-squad-quoref'

# https://huggingface.co/saburbutt/xlnet_large_tweetqa
#MODEL = 'saburbutt/xlnet_large_tweetqa'

#from transformers import XLNetTokenizerFast, XLNetForQuestionAnsweringSimple

#tokenizer = XLNetTokenizerFast.from_pretrained(MODEL)
#model = XLNetForQuestionAnsweringSimple.from_pretrained(MODEL)

In [20]:
qa_specific = pipeline('question-answering', model=model, tokenizer=tokenizer)

In [21]:
result = qa_specific(question=questions, context=text)
for res in result:
    print(f"Answer: '{res['answer']}', score: {round(res['score'], 4)}, start: {res['start']}, end: {res['end']}")

Answer: 'over 32+', score: 0.6992, start: 268, end: 276
Answer: '100+', score: 0.4942, start: 298, end: 302
Answer: 'general-purpose
    architectures', score: 0.2448, start: 98, end: 131
Answer: 'TensorFlow 2.0 and PyTorch.', score: 0.9836, start: 351, end: 378


In [74]:
# T5
# https://huggingface.co/ozcangundes/T5-base-for-BioQA
MODEL = 'ozcangundes/T5-base-for-BioQA'

In [138]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer

tokenizer = T5Tokenizer.from_pretrained(MODEL)
model = T5ForConditionalGeneration.from_pretrained(MODEL)

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


In [76]:
def get_answer(question,context):
    source_encoding=tokenizer(
        question,
        context,
        max_length=512,
        padding="max_length",
        truncation="only_second",
        return_attention_mask=True,
        add_special_tokens=True,
        return_tensors="pt")
  
    generated_ids=model.generate(
          input_ids=source_encoding["input_ids"],
          attention_mask=source_encoding["attention_mask"])

    preds=[tokenizer.decode(gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True) for gen_id in generated_ids]

    return {'answer': "".join(preds)}

In [77]:
for query in questions:
    result = get_answer(query, text)
    print(result)

{'answer': '32'}
{'answer': '100+'}
{'answer': 'Transformers provides general-purpose architectures (BERT, GPT-2, Ro'}
{'answer': 'TensorFlow 2.0'}


In [139]:
# https://huggingface.co/valhalla/t5-base-squad
MODEL = "valhalla/t5-base-squad"

In [140]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL)

The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.
The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.


In [141]:
def get_answer(question, context):
    input_text = "question: %s  context: %s </s>" % (question, context)
    features = tokenizer([input_text], return_tensors='pt')
    
    out = model.generate(input_ids=features['input_ids'], attention_mask=features['attention_mask'])
    return tokenizer.decode(out[0], skip_special_tokens=True, clean_up_tokenization_spaces=True)

In [142]:
for query in questions:
    result = get_answer(query, text)
    print(result)

NameError: name 'questions' is not defined

### Load dataset

In [1]:
import pandas as pd
import json

In [3]:
MODELS = [ 'phiyodr/roberta-large-finetuned-squad2', 'deepset/roberta-base-squad2',
          'phiyodr/bert-large-finetuned-squad2', 'phiyodr/bert-base-finetuned-squad2', 
          'valhalla/t5-base-squad',
          'tli8hf/unqover-bert-base-uncased-newsqa', 'tli8hf/unqover-roberta-base-newsqa',
          'tli8hf/unqover-bert-large-uncased-newsqa', 'tli8hf/unqover-roberta-large-newsqa']

#'bert-large-uncased-whole-word-masking-finetuned-squad',
#'csarron/roberta-base-squad-v1', 'csarron/bert-base-uncased-squad-v1',

In [8]:
#SQUAD 2.0 DEV

In [9]:
#squad_dev = pd.read_excel('BERT-NER-POS-SQUAD-dev-queries.xlsx') #'NER-POS-SQUAD-dev-queries.xlsx')

In [153]:
squad_dev = pd.read_excel('BERT-NER-POS-SQUAD-train.xlsx') #'NER-POS-SQUAD-train-queries.xlsx')

In [154]:
#squad_dev = pd.read_excel('BERT-NER-POS-F-NEWSQA-ftunNEWSQA-ftunSQUAD.xlsx')

In [155]:
squad_dev.columns

Index(['title', 'id', 'query', 'answer', 'impossible', 'plausible', 'dataset',
       'context', 'query_ner', 'context_ner', 'answer_ner', 'query_pos',
       'answer_pos', 'plausible_pos', 'context_pos',
       'phiyodr/roberta-large-finetuned-squad2', 'deepset/roberta-base-squad2',
       'phiyodr/bert-large-finetuned-squad2',
       'phiyodr/bert-base-finetuned-squad2', 'valhalla/t5-base-squad'],
      dtype='object')

In [132]:
#MODEL = #'phiyodr/bert-base-finetuned-squad2'
          ##'phiyodr/bert-large-finetuned-squad2' 

In [134]:
#squad_dev[MODEL] = None

In [156]:
sum(squad_dev[MODEL].isna())

4353

In [137]:
#tokenizer = AutoTokenizer.from_pretrained(MODEL)
#model = AutoModelForQuestionAnswering.from_pretrained(MODEL)

In [144]:
# 'ConvBertForQuestionAnswering', 'LEDForQuestionAnswering', 'DistilBertForQuestionAnswering', 'AlbertForQuestionAnswering', 'CamembertForQuestionAnswering', 'BartForQuestionAnswering', 'MBartForQuestionAnswering', 'LongformerForQuestionAnswering', 'XLMRobertaForQuestionAnswering', 'SqueezeBertForQuestionAnswering', 
# 'FlaubertForQuestionAnsweringSimple', 'MobileBertForQuestionAnswering', 'XLMForQuestionAnsweringSimple', 'ElectraForQuestionAnswering', 'ReformerForQuestionAnswering', 'FunnelForQuestionAnswering', 'LxmertForQuestionAnswering', 'MPNetForQuestionAnswering', 'DebertaForQuestionAnswering'
# 'RobertaForQuestionAnswering', 'BertForQuestionAnswering', 'XLNetForQuestionAnsweringSimple', 
#qa_specific = pipeline('question-answering', model=model, tokenizer=tokenizer)

In [117]:
print(len(squad_dev.loc[squad_dev[MODEL]=='{}']))

0


In [167]:
# & (squad_dev['dataset'] == 'test')
context_restantes = squad_dev.loc[(squad_dev[MODEL].isna()), 'context']
print(len(context_restantes))

0


In [1]:
#context_restantes.values[0]

In [128]:
for context in context_restantes.unique():
    queries = squad_dev.loc[squad_dev['context'] == context, 'query'].values.tolist()
    queries = [element for element in queries if len(element) > 0 and str(element) != 'nan']
    if len(queries) > 0 and len(context) > 0:
        answers = qa_specific(question=queries, context=context)
        if len(answers) > 0:
            for query in queries:
                try:
                    squad_dev.loc[(squad_dev['context'] == context) & (squad_dev['query'] == query), 
                        MODEL] = str(answers[queries.index(query)])
                except:
                    squad_dev.loc[(squad_dev['context'] == context) & (squad_dev['query'] == query), 
                        MODEL] = '{}'

In [48]:
#squad_dev.to_excel('BERT-NER-POS-F-NEWSQA-ftunNEWSQA-ftunSQUAD-act.xlsx', index=False)

In [170]:
squad_dev.to_excel('BERT-NER-POS-SQUAD-train.xlsx', index=False)

In [62]:
# BERT-NER-POS-F-NEWSQA-ftunNEWSQA-ftunSQUAD.xlsx
#newsqa_fn.to_excel('BERT-NER-POS-F-NEWSQA-ftunNEWSQA-ftunSQUAD.xlsx')

In [162]:
print(sum(squad_dev[MODEL].isna()))

3435


In [163]:
for context in context_restantes.unique():
    queries = squad_dev.loc[squad_dev['context'] == context, 'query'].values.tolist()
    for query in queries:
        answers = get_answer(query, context)
        squad_dev.loc[(squad_dev['context'] == context) & (squad_dev['query'] == query), MODEL] = str(answers)
#    print(sum(squad_dev[MODEL].isna()))

In [54]:
squad_dev.head()

Unnamed: 0,title,id,query,answer,impossible,plausible,dataset,context,query_ner,context_ner,...,context_pos,bert-large-uncased-whole-word-masking-finetuned-squad,csarron/roberta-base-squad-v1,csarron/bert-base-uncased-squad-v1,phiyodr/roberta-large-finetuned-squad2,deepset/roberta-base-squad2,phiyodr/bert-base-finetuned-squad2,phiyodr/bert-large-finetuned-squad2,ozcangundes/T5-base-for-BioQA,valhalla/t5-base-squad
0,Normans,56ddde6b9a695914005b9628,In what country is Normandy located?,France,False,,dev,The Normans (Norman: Nourmands; French: Norman...,{'LOCATION': ['Normandy']},"{'MISC': ['Normans Norman Nourmands', 'Carolin...",...,"[Tag(word='The', pos='DT', lemma='the'), Tag(w...","{'score': 0.9912782907485962, 'start': 159, 'e...","{'score': 0.9869462251663208, 'start': 159, 'e...","{'score': 0.9820709824562073, 'start': 159, 'e...","{'score': 0.9877960681915283, 'start': 159, 'e...","{'score': 0.9942156076431274, 'start': 159, 'e...","{'score': 0.9931656122207642, 'start': 159, 'e...","{'score': 0.991849422454834, 'start': 159, 'en...",{'answer': 'France'},France
1,Normans,56ddde6b9a695914005b9629,When were the Normans in Normandy?,10th and 11th centuries,False,,dev,The Normans (Norman: Nourmands; French: Norman...,{'LOCATION': ['Normandy']},"{'MISC': ['Normans Norman Nourmands', 'Carolin...",...,"[Tag(word='The', pos='DT', lemma='the'), Tag(w...","{'score': 0.7390105724334717, 'start': 94, 'en...","{'score': 0.6863948702812195, 'start': 94, 'en...","{'score': 0.6613354086875916, 'start': 94, 'en...","{'score': 0.6423813700675964, 'start': 94, 'en...","{'score': 0.6264273524284363, 'start': 94, 'en...","{'score': 0.7914076447486877, 'start': 94, 'en...","{'score': 0.852700412273407, 'start': 94, 'end...",{'answer': '10th and 11th centuries'},10th and 11th centuries
2,Normans,56ddde6b9a695914005b962a,From which countries did the Norse originate?,"Denmark, Iceland and Norway",False,,dev,The Normans (Norman: Nourmands; French: Norman...,{},"{'MISC': ['Normans Norman Nourmands', 'Carolin...",...,"[Tag(word='The', pos='DT', lemma='the'), Tag(w...","{'score': 0.993444561958313, 'start': 256, 'en...","{'score': 0.9788983464241028, 'start': 256, 'e...","{'score': 0.985059916973114, 'start': 256, 'en...","{'score': 0.9993707537651062, 'start': 256, 'e...","{'score': 0.9779964685440063, 'start': 256, 'e...","{'score': 0.47998306155204773, 'start': 256, '...","{'score': 0.9851764440536499, 'start': 256, 'e...","{'answer': 'Denmark, Iceland and Norway'}","Denmark, Iceland and Norway"
3,Normans,56ddde6b9a695914005b962b,Who was the Norse leader?,Rollo,False,,dev,The Normans (Norman: Nourmands; French: Norman...,"{'MISC': ['Norse'], 'TITLE': ['leader']}","{'MISC': ['Normans Norman Nourmands', 'Carolin...",...,"[Tag(word='The', pos='DT', lemma='the'), Tag(w...","{'score': 0.9917401671409607, 'start': 308, 'e...","{'score': 0.9590807557106018, 'start': 308, 'e...","{'score': 0.9961839914321899, 'start': 308, 'e...","{'score': 0.9961422085762024, 'start': 308, 'e...","{'score': 0.9769473075866699, 'start': 308, 'e...","{'score': 0.9280317425727844, 'start': 308, 'e...","{'score': 0.9988728761672974, 'start': 308, 'e...",{'answer': 'Rollo'},Rollo
4,Normans,56ddde6b9a695914005b962c,What century did the Normans first gain their ...,10th century,False,,dev,The Normans (Norman: Nourmands; French: Norman...,"{'DURATION': ['century'], 'ORDINAL': ['first']}","{'MISC': ['Normans Norman Nourmands', 'Carolin...",...,"[Tag(word='The', pos='DT', lemma='the'), Tag(w...","{'score': 0.44821697473526, 'start': 671, 'end...","{'score': 0.5641764998435974, 'start': 671, 'e...","{'score': 0.5815108418464661, 'start': 671, 'e...","{'score': 0.8355527520179749, 'start': 671, 'e...","{'score': 0.6819514632225037, 'start': 671, 'e...","{'score': 0.5060123205184937, 'start': 671, 'e...","{'score': 0.8878768086433411, 'start': 671, 'e...",{'answer': '10th century'},10th


In [159]:
squad_dev[MODEL].value_counts()

{'score': 0.39727702736854553, 'start': 569, 'end': 586, 'answer': '1,230 km (760 mi)'}                                                                           2
{'score': 0.5906504988670349, 'start': 22, 'end': 56, 'answer': 'a body of treaties and legislation'}                                                             2
{'score': 0.38328251242637634, 'start': 55, 'end': 59, 'answer': '1700'}                                                                                          2
{'score': 0.6890050768852234, 'start': 27, 'end': 32, 'answer': 'three'}                                                                                          2
{'score': 0.09566855430603027, 'start': 1304, 'end': 1390, 'answer': 'ensure that in the interpretation and application of the Treaties the law is observed"'}    2
                                                                                                                                                                 ..
{'score': 0.3989

## Other pre-trained models for QA with TF

In [35]:
from transformers import AutoTokenizer, TFAutoModelForQuestionAnswering, TFBertForMaskedLM, TFBertForQuestionAnswering
import tensorflow as tf

In [36]:
def get_pretrained_squad_model(model_name):
    model, tokenizer = None, None
    if model_name == "distilbertsquad1":
        tokenizer = AutoTokenizer.from_pretrained("distilbert-base-cased-distilled-squad",use_fast=True)
        model = TFBertForQuestionAnswering.from_pretrained("distilbert-base-cased-distilled-squad", from_pt=True)
    elif model_name == "distilbertsquad2": 
        tokenizer = AutoTokenizer.from_pretrained("twmkn9/distilbert-base-uncased-squad2",use_fast=True)
        model = TFAutoModelForQuestionAnswering.from_pretrained("twmkn9/distilbert-base-uncased-squad2", from_pt=True)
    elif model_name == "bertsquad2": 
        tokenizer = AutoTokenizer.from_pretrained("deepset/bert-base-cased-squad2",use_fast=True)
        model = TFBertForQuestionAnswering.from_pretrained("deepset/bert-base-cased-squad2", from_pt=True)
    elif model_name == "bertlargesquad2": 
        tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased",use_fast=True)
        model = TFBertForQuestionAnswering.from_pretrained("deepset/bert-large-uncased-whole-word-masking-squad2", from_pt=True)
    elif model_name == "albertbasesquad2": 
        tokenizer = AutoTokenizer.from_pretrained("twmkn9/albert-base-v2-squad2",use_fast=True)
        model = TFBertForQuestionAnswering.from_pretrained("twmkn9/albert-base-v2-squad2", from_pt=True)
    elif model_name == "distilrobertasquad2": 
        tokenizer = AutoTokenizer.from_pretrained("twmkn9/distilroberta-base-squad2",use_fast=True)
        model = TFBertForQuestionAnswering.from_pretrained("twmkn9/distilroberta-base-squad2", from_pt=True)
    elif model_name == "robertasquad2": 
        tokenizer = AutoTokenizer.from_pretrained("deepset/roberta-base-squad2",use_fast=True)
        model = TFAutoModelForQuestionAnswering.from_pretrained("deepset/roberta-base-squad2", from_pt=True)
    elif model_name == "bertlm":
        tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased",use_fast=True)
        model = TFBertForMaskedLM.from_pretrained("bert-base-uncased", from_pt=True)

    return model, tokenizer

In [34]:
# Choose from any set of HuggingFace models to use!
bqa_model, bqa_tokenizer = get_pretrained_squad_model("bertsquad2")

Downloading:   0%|          | 0.00/433M [00:00<?, ?B/s]

All PyTorch model weights were used when initializing TFBertForQuestionAnswering.

All the weights of TFBertForQuestionAnswering were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForQuestionAnswering for predictions without further training.
