# Question Answering with PyTorch Transformers: Part 3

Read the full article: https://medium.com/@patonw/question-answering-with-pytorch-transformers-part-3-d67ac06a23b7

> Welcome back! This is the third part of an on-going series about building a question answering service using the Transformers library. The prior article looked at using scikit-learn to build an indexing service for fetching relevant articles to feed into Transformers.
>
> This time we’ll really start working with the library in more depth. In this article we’re going to peel back a layer to examine the inner workings of the Transformers question answering pipeline. Then we’ll use the model API to build our own pipeline. Finally we’ll wrap it all up in a simple Flask service that can be accessed over a network

In [None]:
# Prepare for Paperspace. Manage these via conda or pipenv on your own machine
!pip --quiet install flask torch transformers sklearn pyarrow seaborn spacy[cuda92]

In [None]:
import os
import random
import pandas as pd
import json
import sklearn
import spacy

import numpy as np
import torch
import torch.nn.functional as F
from itertools import islice
from tqdm import tqdm
import seaborn as sns

from sklearn.feature_extraction.text import TfidfVectorizer
from transformers import *
%matplotlib inline

In [None]:
from constants import *

In [None]:
# Skip this if you've already run Part 2
%run "Questionable - Part 2.ipynb"

In [None]:
# This takes a while the first time, since from_pretrained() downloads and caches the model weights
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertForQuestionAnswering \
    .from_pretrained('bert-large-uncased-whole-word-masking-finetuned-squad') \
    .to(device)

In [None]:
question_df = pd.read_feather("cache/question_context.feather")
question_df

In [None]:
question, context = question_df[["question", "context"]].iloc[1]
question, context

In [None]:
# Example from https://huggingface.co/transformers/model_doc/bert.html#bertforquestionanswering
input_text = "[CLS] " + question + " [SEP] " + context + " [SEP]"
input_ids = tokenizer.encode(input_text, add_special_tokens=False)
token_type_ids = [0 if i <= input_ids.index(102) else 1 for i in range(len(input_ids))]
input_ids[:10], token_type_ids[:20]

In [None]:
with torch.no_grad():
    start_scores, end_scores = model(torch.tensor([input_ids], device=device),
                                     token_type_ids=torch.tensor([token_type_ids], device=device))
    all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
print(' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1]))
print(f'score: {torch.max(start_scores)}')

In [None]:
sns.distplot(start_scores.cpu(), kde=False, rug=True)

In [None]:
prefix = tokenizer.decode(input_ids[torch.argmax(start_scores)-8:torch.argmax(start_scores) ])
answer = tokenizer.decode(input_ids[torch.argmax(start_scores) : torch.argmax(end_scores)+1])
suffix = tokenizer.decode(input_ids[torch.argmax(end_scores)+1:torch.argmax(end_scores)+8 ])

"..." + prefix + " >>>" + answer + "<<< " + suffix + "..."

In [None]:
question, context = question_df[["question", "context"]].iloc[0]
question, context

In [None]:
# Example from https://huggingface.co/transformers/model_doc/bert.html#bertforquestionanswering
input_text = "[CLS] " + question + " [SEP] " + context + " [SEP]"
input_ids = tokenizer.encode(input_text, add_special_tokens=False)
token_type_ids = [0 if i <= input_ids.index(102) else 1 for i in range(len(input_ids))]

In [None]:
with torch.no_grad():
    start_scores, end_scores = model(torch.tensor([input_ids], device=device),
                                     token_type_ids=torch.tensor([token_type_ids], device=device))
    all_tokens = tokenizer.convert_ids_to_tokens(input_ids)
print(' '.join(all_tokens[torch.argmax(start_scores) : torch.argmax(end_scores)+1]))
print(f'score: {torch.max(start_scores)}')
sns.distplot(start_scores.cpu(), kde=False, rug=True)

## Prepare batch for inference

In [None]:
question_df["encoded"] = question_df.apply(lambda row: tokenizer.encode("[CLS] " + row["question"] + " [SEP] " + row["context"] + " [SEP]", add_special_tokens=False), axis=1)
question_df["tok_type"] = question_df.apply(lambda row: [0 if i <= row["encoded"].index(102) else 1 for i in range(len(row["encoded"]))], axis=1)
question_df.iloc[:3]

In [None]:
%%time
with torch.no_grad():
    X = torch.nn.utils.rnn.pad_sequence([torch.tensor(row) for row in question_df["encoded"]], batch_first=True).to(device)
    T = torch.nn.utils.rnn.pad_sequence([torch.tensor(row) for row in question_df["tok_type"]], batch_first=True).to(device)
    start_scores, end_scores = model(X, token_type_ids=T)
    max_score, max_start = torch.max(start_scores, axis=1)
    soft_max = F.softmax(max_score, dim=0)

In [None]:
answer_df = question_df[["context", "encoded"]].copy()
answer_df["answer_score"] = max_score.cpu().numpy()
answer_df["answer_start"] = max_start.cpu().numpy()
answer_df["answer_softmax"] = soft_max.cpu().numpy()
answer_df

In [None]:
max_len = torch.zeros_like(max_start)
for i in range(max_start.shape[0]):
    max_len[i] = torch.argmax(end_scores[i,max_start[i]:]) + 1
    
answer_df["answer_length"] = max_len.cpu().numpy()

In [None]:
answer_df = answer_df[answer_df.answer_score > 1.0].sort_values(by="answer_score", ascending=False)
answer_df.head()

In [None]:
def decode_answer(row):
    input_ids = row.encoded
    offset = row.answer_start
    length = np.clip(row.answer_length, 0, 20)
    return tokenizer.decode(input_ids[offset:][:length])

In [None]:
answer_df["answer"] = answer_df.apply(decode_answer, axis=1)
answer_df[["answer_softmax", "answer_score", "answer"]].head()

In [None]:
answer_df[["answer_softmax", "answer_score", "answer", "context"]].iloc[:3].to_dict(orient="records")

## Bonus: Questionable Web Service

Start a terminal from the jupyter file manager and create a new Terminal to run:

```
python questionable.py
```

In [None]:
import requests
resp = requests.get("http://localhost:8765/answer", params=dict(
  q="When did the last country to adopt the Gregorian calendar start using it?"))
resp.json()

In [None]:
! curl -G http://localhost:8765/answer \
        --data-urlencode "q=When did the last country to adopt the Gregorian calendar start using it?" \
        | json_pp