In [1]:
#from haystack.utils import launch_es
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.utils import fetch_archive_from_http
from haystack import Pipeline
from haystack.nodes import TextConverter, PreProcessor
from haystack.nodes import BM25Retriever
from haystack.nodes import FARMReader
from haystack.utils import print_answers

import os
from pprint import pprint
import logging

logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("haystack").setLevel(logging.INFO)

In [2]:
#launch_es()

In [4]:
# Get the host where Elasticsearch is running, default to localhost
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")

document_store = ElasticsearchDocumentStore(
    host=host,
    username="",
    password="",
    index="document"
)



In [5]:
len(document_store.get_all_documents())

2861

In [5]:
doc_dir = "data/build_a_scalable_question_answering_system"

fetch_archive_from_http(
    url="https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt3.zip", 
    output_dir=doc_dir
)

INFO:haystack.utils.import_utils:Found data stored in 'data/build_a_scalable_question_answering_system'. Delete this first if you really want to fetch new data.


False

In [6]:
indexing_pipeline = Pipeline()
text_converter = TextConverter()
preprocessor = PreProcessor(
    clean_whitespace=True,
    clean_header_footer=True,
    clean_empty_lines=True,
    split_by="word",
    split_length=200,
    split_overlap=20,
    split_respect_sentence_boundary=True,
)

In [7]:
indexing_pipeline.add_node(component=text_converter, name="TextConverter", inputs=["File"])
indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["TextConverter"])
indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["PreProcessor"])

In [8]:
files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
indexing_pipeline.run_batch(file_paths=files_to_index)

INFO:haystack.pipelines.base:It seems that an indexing Pipeline is run, so using the nodes' run method instead of run_batch.


Converting files:   0%|          | 0/183 [00:00<?, ?it/s]

Preprocessing:   0%|          | 0/183 [00:00<?, ?docs/s]



{'documents': [<Document: {'content': "\n\n'''''Game of Thrones: Seven Kingdoms''''' was a fantasy massively multiplayer online role-playing game (MMORPG) under development by Bigpoint and Artplant, in collaboration with HBO. The game was based on the HBO TV series ''Game of Thrones'', which is itself an adaptation of the ''A Song of Ice and Fire'' book series by George R. R. Martin.\n\nThe game was being built using the Unity platform, and will be playable in the browser using the Unity Web Player plugin.\n\n==Gameplay==\n''Game of Thrones: Seven Kingdoms'' was to be set within the fictional realm of Westeros, and will use a third-person viewpoint. Gameplay was planned to be mainly based around player vs player (PvP) combat, which would involve small group combat, one on one duels and siege battles, large scale battles in which players must capture keeps, forts and castles. Player vs Environment (PvE) combat was planned to be available at launch, although this was not a major priority

In [13]:
retriever = BM25Retriever(document_store=document_store)

In [14]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CUDA:0 - Number of GPUs: 1
INFO - haystack.modeling.utils -  Using devices: CUDA:0 - Number of GPUs: 1
Downloading (…)lve/main/config.json: 100%|██████████| 571/571 [00:00<00:00, 115kB/s]
INFO - haystack.modeling.model.language_model -   * LOADING MODEL: 'deepset/roberta-base-squad2' (Roberta)
Downloading pytorch_model.bin: 100%|██████████| 496M/496M [00:02<00:00, 243MB/s] 
INFO - haystack.modeling.model.language_model -  Auto-detected model language: english
INFO - haystack.modeling.model.language_model -  Loaded 'deepset/roberta-base-squad2' (Roberta model) from model hub.
Downloading (…)okenizer_config.json: 100%|██████████| 79.0/79.0 [00:00<00:00, 30.4kB/s]
Downloading (…)olve/main/vocab.json: 100%|██████████| 899k/899k [00:00<00:00, 2.42MB/s]
Downloading (…)olve/main/merges.txt: 100%|██████████| 456k/456k [00:00<00:00, 1.32MB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 772/772 [00:00<00:00, 278kB/s]
INFO - haystack.mode

In [15]:
querying_pipeline = Pipeline()
querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])

In [16]:
prediction = querying_pipeline.run(
    query="Who is the father of Arya Stark?",
    params={
        "Retriever": {"top_k": 10},
        "Reader": {"top_k": 5}
    }
)

Inferencing Samples: 100%|██████████| 1/1 [00:13<00:00, 13.75s/ Batches]


In [17]:
pprint(prediction)

{'answers': [<Answer {'answer': 'Eddard', 'type': 'extractive', 'score': 0.993372917175293, 'context': "s Nymeria after a legendary warrior queen. She travels with her father, Eddard, to King's Landing when he is made Hand of the King. Before she leaves,", 'offsets_in_document': [{'start': 207, 'end': 213}], 'offsets_in_context': [{'start': 72, 'end': 78}], 'document_ids': ['9e3c863097d66aeed9992e0b6bf1f2f4'], 'meta': {'_split_id': 4}}>,
             <Answer {'answer': 'Ned', 'type': 'extractive', 'score': 0.9753614664077759, 'context': "k in the television series.\n\n====Season 1====\nArya accompanies her father Ned and her sister Sansa to King's Landing. Before their departure, Arya's h", 'offsets_in_document': [{'start': 630, 'end': 633}], 'offsets_in_context': [{'start': 74, 'end': 77}], 'document_ids': ['7d3360fa29130e69ea6b2ba5c5a8f9c8'], 'meta': {'_split_id': 13}}>,
             <Answer {'answer': 'Lord Eddard Stark', 'type': 'extractive', 'score': 0.9566048383712769, 'context':

In [20]:
print_answers(
    prediction,
    details="medium" ## Choose from `minimum`, `medium` and `all`
)


Query: Who is the father of Arya Stark?
Answers:
[   {   'answer': 'Eddard',
        'context': 's Nymeria after a legendary warrior queen. She travels '
                   "with her father, Eddard, to King's Landing when he is made "
                   'Hand of the King. Before she leaves,',
        'score': 0.993372917175293},
    {   'answer': 'Ned',
        'context': 'k in the television series.\n'
                   '\n'
                   '====Season 1====\n'
                   'Arya accompanies her father Ned and her sister Sansa to '
                   "King's Landing. Before their departure, Arya's h",
        'score': 0.9753614664077759},
    {   'answer': 'Lord Eddard Stark',
        'context': 'rk daughters.\n'
                   '\n'
                   'During the Tourney of the Hand to honour her father Lord '
                   'Eddard Stark, Sansa Stark is enchanted by the knights '
                   'performing in the event.',
        'score': 0.9566048383712769},
 