In [19]:
import os
from haystack.document_stores import InMemoryDocumentStore
from haystack.utils import fetch_archive_from_http
from haystack.pipelines.standard_pipelines import TextIndexingPipeline
from haystack.nodes import BM25Retriever
from haystack.nodes import FARMReader
from haystack.pipelines import ExtractiveQAPipeline
from pprint import pprint
from haystack.utils import print_answers

In [20]:
document_store = InMemoryDocumentStore(use_bm25=True)
doc_dir = "source_documents"
files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
indexing_pipeline = TextIndexingPipeline(document_store)
indexing_pipeline.run_batch(file_paths=files_to_index)

Converting files: 100%|██████████| 1/1 [00:00<00:00, 506.07it/s]
Preprocessing: 100%|██████████| 1/1 [00:00<00:00, 350.05docs/s]
Updating BM25 representation...: 100%|██████████| 5/5 [00:00<00:00, 6563.86 docs/s]


{'documents': [<Document: {'content': "Pichai Sundararajan (born June 10, 1972[3][4][5]), better known as Sundar Pichai (/ˈsʊndɑːr pɪˈtʃaɪ/), is an Indian-American business executive. He is the chief executive officer (CEO) of Alphabet Inc. and its subsidiary Google.[6]\n\nPichai began his career as a materials engineer. Following a short stint at the management consulting firm McKinsey & Co., Pichai joined Google in 2004,[7] where he led the product management and innovation efforts for a suite of Google's client software products, including Google Chrome and ChromeOS, as well as being largely responsible for Google Drive. In addition, he went on to oversee the development of other applications such as Gmail and Google Maps. In 2010, Pichai also announced the open-sourcing of the new video codec VP8 by Google and introduced the new video format, WebM. The Chromebook was released in 2012. In 2013, Pichai added Android to the list of Google products that he oversaw.\n\nPichai was select

In [21]:
retriever = BM25Retriever(document_store=document_store)

In [22]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

In [23]:
pipe = ExtractiveQAPipeline(reader, retriever)

In [29]:
prediction = pipe.run(
    query="who is Pichai?",
    params={
        "Retriever": {"top_k": 2},
        "Reader": {"top_k": 1}
    }
)

Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.01s/ Batches]


In [30]:
pprint(prediction)

{'answers': [<Answer {'answer': 'Indian-American business executive', 'type': 'extractive', 'score': 0.4402552843093872, 'context': 'better known as Sundar Pichai (/ˈsʊndɑːr pɪˈtʃaɪ/), is an Indian-American business executive. He is the chief executive officer (CEO) of Alphabet Inc.', 'offsets_in_document': [{'start': 109, 'end': 143}], 'offsets_in_context': [{'start': 58, 'end': 92}], 'document_ids': ['d9a4cec3ca2506db886ef152a7503e6'], 'meta': {'_split_id': 0}}>],
 'documents': [<Document: {'content': 'On October 24, 2015, he stepped into the new position at the completion of the formation of Alphabet Inc., the new holding company for the Google company family.[34][32][35]\n\nPichai had been suggested as a contender for Microsoft\'s CEO in 2014, a position that was eventually given to Satya Nadella.[36][37] In August 2017, Pichai drew publicity for firing a Google employee who wrote a ten-page manifesto criticizing the company\'s diversity policies.[38][39][40][41][42]\n\nIn December

In [44]:
prediction['answers'][0].context

'better known as Sundar Pichai (/ˈsʊndɑːr pɪˈtʃaɪ/), is an Indian-American business executive. He is the chief executive officer (CEO) of Alphabet Inc.'