## Question Answering Demo Application

In [1]:
import sys
import html
import pandas
import pickle
import json
import spacy
import warnings
from IPython.display import display,HTML
warnings.filterwarnings('ignore') #Some operations warn inside a loop, we"ll only need to see the first warning
sys.path.append("../..")
from aips import *
from transformers import pipeline
engine = get_engine()
outdoors_collection = engine.get_collection("outdoors")
from functools import reduce

In [2]:
nlp = spacy.load("en_core_web_sm")
nlp.remove_pipe("ner")
def get_query_from_question(question):
    words = [token.text for token in nlp(question)
             if not (token.lex.is_stop or token.lex.is_punct)]
    return " ".join(words)

## Listing 14.15

In [3]:
def retriever(question):
    contexts = {"id": [], "question": [], "context": [], "url": []}
    query = get_query_from_question(question)
    request = {"query": query,
               "query_fields": ["body"],
               "return_fields": ["id", "url", "body"],
               "filters": [("post_type", "answer")],               
               "limit": 5}
    docs = outdoors_collection.search(**request)["docs"]
    for doc in docs:
        contexts["id"].append(doc["id"])
        contexts["url"].append(doc["url"])
        contexts["question"].append(question)
        contexts["context"].append(doc["body"])
    return pandas.DataFrame(contexts)

In [4]:
example_contexts = retriever('What are minimalist shoes?')
example_contexts[0:10]

Unnamed: 0,id,question,context,url
0,18376,What are minimalist shoes?,"Minimalist shoes or ""barefoot"" shoes are shoes...",https://outdoors.stackexchange.com/questions/1...
1,18370,What are minimalist shoes?,There was actually a project done on the defin...,https://outdoors.stackexchange.com/questions/1...
2,16427,What are minimalist shoes?,"One summer job, I needed shoes to walk on a ro...",https://outdoors.stackexchange.com/questions/1...
3,18375,What are minimalist shoes?,The answer to this question will vary on your ...,https://outdoors.stackexchange.com/questions/1...
4,13540,What are minimalist shoes?,"Barefoot Shoes Also known as minimalist shoes,...",https://outdoors.stackexchange.com/questions/1...


## Listing 14.16

In [5]:
import tqdm
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline
import torch

def get_processor_device():
    return 0 if torch.cuda.is_available() else -1

device = get_processor_device()

model_name = '../../data/roberta-base-squad2-outdoors'
qa_nlp = pipeline("question-answering", model=model_name, tokenizer=model_name, device=device)

def reader(contexts):
    answers = []
    for _, row in contexts.iterrows():
        answer = qa_nlp({"question": row["question"],
                         "context": row["context"]})
        answer["id"] = row["id"]
        answer["url"] = row["url"]
        answers.append(answer)
    return answers

OSError: Can't load the configuration of '../../data/roberta-base-squad2-outdoors'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure '../../data/roberta-base-squad2-outdoors' is the correct path to a directory containing a config.json file

## Listing 14.17

In [None]:
def reranker(answers):
    return sorted(answers, key=lambda k: k["score"], reverse=True)

## Listing 14.18

In [None]:
import urllib.parse
def print_answer(question, reranked):
    url = "https://outdoors.stackexchange.com/search?q=" + urllib.parse.quote(question)
    html = f'<h2><a href="{url}" target=_blank>{question}</a></h2>'
    for answer in reranked:
         html += f'<h3><a href="{answer["url"]}" target=_blank> {answer["id"]} </a>'
         html += '<strong>' + answer["answer"] + '</strong><em> (' + str(round(answer["score"], 3)) + ')</em></h3>'
    display(HTML(html))

def ask(question):
    documents = retriever(question)
    answers = reader(documents)
    reranked = reranker(answers)
    print_answer(question, reranked)

In [None]:
ask('What is the best mosquito repellent?')

In [None]:
ask('What is the best waterproof boot?')

In [None]:
ask('How many people fit inside a 2-person tent?')

In [None]:
ask('What hiking boots work with crampons?')

In [None]:
ask('How many miles can a person hike per day?')

In [None]:
ask('How much water does a person need per day?')

In [None]:
ask('What material are good walking poles made of?')

Up next: [Chapter 15 - Foundation Models and Emerging Search Paradigms](../ch15/1.llm-exploration.ipynb)