<a href="https://colab.research.google.com/github/mclabs74/inference_nbs/blob/dev/huggingface/question_answering_inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from pathlib import Path

def create_requirements_file():
    if not Path("requirements.txt").exists():
        requirements = ["transformers"]
        with open('requirements.txt', 'w') as filehandle:
            for listitem in requirements:
                filehandle.write('%s\n' % listitem)
    
create_requirements_file()

In [None]:
!pip install -q -r requirements.txt

In [None]:
!wget -q https://ml-inference.s3-us-west-2.amazonaws.com/hugging_face_question_answering_training.zip

In [None]:
!unzip hugging_face_question_answering_training.zip

In [None]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, AutoConfig
import torch
from pathlib import Path

model_path = Path("model")

tokenizer_new = AutoTokenizer.from_pretrained(str(model_path/"tokenizer"), config=AutoConfig.from_pretrained(model_path))
model_new = AutoModelForQuestionAnswering.from_pretrained(model_path)
model_new.cpu();

In [None]:
!pip install -q git+https://github.com/matthewchung74/inference_params.git

In [None]:
from inference_params.inference_params import inference_test, FieldType, inference_predict

# text input will be the label for the app input
input = {"question": FieldType.Text, "context": FieldType.Text}
# result will be the label for the app output
output = {"result": FieldType.Text}

@inference_predict(input=input, output=output)
def predict(inputs):
    question = inputs["question"]
    context = inputs["context"]
    inputs = tokenizer_new.encode_plus(question, context,add_special_tokens=True, return_tensors="pt")
    input_ids = inputs["input_ids"].tolist()[0]

    output = model_new(**inputs)
    answer_start_scores = output.start_logits
    answer_end_scores = output.end_logits

    answer_start = torch.argmax(answer_start_scores)
    answer_end = torch.argmax(answer_end_scores) + 1

    answer = tokenizer_new.convert_tokens_to_string(tokenizer_new.convert_ids_to_tokens(input_ids[answer_start:answer_end]))
    return {"result": answer}

In [None]:
from inference_params.inference_params import in_colab

if in_colab():
    context = r"""
    🤗 Transformers (formerly known as pytorch-transformers and pytorch-pretrained-bert) provides general-purpose
    architectures (BERT, GPT-2, RoBERTa, XLM, DistilBert, XLNet…) for Natural Language Understanding (NLU) and Natural
    Language Generation (NLG) with over 32+ pretrained models in 100+ languages and deep interoperability between
    TensorFlow 2.0 and PyTorch.
    """

    params = {"question":"How many pretrained models are available in Transformers?",
            "context":context}

    inference_test(predict_func=predict, params=params)