In [None]:
### MUST BE RUNNING AN ELASTICSEARCH INSTANCE BEFORE RUNNING NOTEBOOK
# docker command - `docker run --name elastic -p 9200:9200 -e "discovery.type=single-node" -m 1G -itd docker.elastic.co/elasticsearch/elasticsearch:7.9.2`

In [2]:
import logging
import os
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.utils import fetch_archive_from_http
from haystack import Pipeline
from haystack.nodes import TextConverter, PreProcessor, BM25Retriever, FARMReader, PDFToTextConverter

from dotenv import load_dotenv


In [3]:
load_dotenv()

True

In [4]:
logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
log = logging.getLogger("haystack")
log.setLevel(logging.INFO)

In [None]:
# Get the host where Elasticsearch is running, default to localhost
host = os.environ.get("ELASTICSEARCH_HOST", "localhost")

document_store = ElasticsearchDocumentStore(host=host, username="", password="", index="document")

In [5]:
doc_dir = "../data/pdfs/"

In [None]:

indexing_pipeline = Pipeline()
pdf_converter = PDFToTextConverter()
preprocessor = PreProcessor(
    clean_whitespace=True,
    clean_header_footer=True,
    clean_empty_lines=True,
    split_by="word",
    split_length=200,
    split_overlap=20,
    split_respect_sentence_boundary=True,
)

In [None]:
indexing_pipeline.add_node(component=pdf_converter, name="TextConverter", inputs=["File"])
indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["TextConverter"])
indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["PreProcessor"])


In [None]:
files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
indexing_pipeline.run_batch(file_paths=files_to_index)
#As an alternative, you can cast you text data into Document objects and write them into the DocumentStore using DocumentStore.write_documents().

In [None]:
retriever = BM25Retriever(document_store=document_store)

In [None]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)


In [None]:
querying_pipeline = Pipeline()
querying_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
querying_pipeline.add_node(component=reader, name="Reader", inputs=["Retriever"])

In [None]:
prediction = querying_pipeline.run(
    query="Can I service my air-conditioning myself?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
)

In [None]:
from pprint import pprint
from haystack.utils import print_answers

# print_answers(prediction, details="minimum")  ## Choose from `minimum`, `medium` and `all`

pprint(prediction)

## Using Embeddings Instead of Text Document Search

In [6]:
from haystack.document_stores import FAISSDocumentStore
from haystack.utils import print_answers
from haystack.nodes import EmbeddingRetriever
from haystack.pipelines import ExtractiveQAPipeline

In [7]:
FAISS_INDEX_PATH = "./faiss_index_pdf.faiss"
if os.path.exists(FAISS_INDEX_PATH) and os.path.exists(FAISS_INDEX_PATH.replace(".faiss",".json")):
    log.warn(f"Reading FAISS Index from {FAISS_INDEX_PATH}")
    document_store = FAISSDocumentStore.load(FAISS_INDEX_PATH)
    retriever = EmbeddingRetriever(
        document_store=document_store, embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1"
        )
else:
    log.warn(f"Creating new FAISS Index")
    document_store = FAISSDocumentStore(sql_url="sqlite:///faiss_document_store_pdf.db", faiss_index_factory_str="Flat")
    # Let's first get some files that we want to use

    indexing_pipeline = Pipeline()
    pdf_converter = PDFToTextConverter()
    preprocessor = PreProcessor(
        clean_whitespace=True,
        clean_header_footer=True,
        clean_empty_lines=True,
        split_by="word",
        split_length=200,
        split_overlap=20,
        split_respect_sentence_boundary=True,
    )
    indexing_pipeline.add_node(component=pdf_converter, name="TextConverter", inputs=["File"])
    indexing_pipeline.add_node(component=preprocessor, name="PreProcessor", inputs=["TextConverter"])
    indexing_pipeline.add_node(component=document_store, name="DocumentStore", inputs=["PreProcessor"])
    files_to_index = [doc_dir + "/" + f for f in os.listdir(doc_dir)]
    indexing_pipeline.run_batch(file_paths=files_to_index)

    retriever = EmbeddingRetriever(
        document_store=document_store, embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1"
        )
    # Important:
    # Now that we initialized the Retriever, we need to call update_embeddings() to iterate over all
    # previously indexed documents and update their embedding representation.
    # While this can be a time consuming operation (depending on the corpus size), it only needs to be done once.
    # At query time, we only need to embed the query and compare it to the existing document embeddings, which is very fast.
    document_store.update_embeddings(retriever)

    document_store.save(FAISS_INDEX_PATH)

  log.warn(f"Reading FAISS Index from {FAISS_INDEX_PATH}")
INFO - haystack.modeling.utils -  Using devices: CPU - Number of GPUs: 0
INFO - haystack.nodes.retriever.dense -  Init retriever using embeddings of model sentence-transformers/multi-qa-mpnet-base-dot-v1
  return self.fget.__get__(instance, owner)()


In [8]:
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2", use_gpu=True)

INFO - haystack.modeling.utils -  Using devices: CPU - Number of GPUs: 0
INFO - haystack.modeling.utils -  Using devices: CPU - Number of GPUs: 0
INFO - haystack.modeling.model.language_model -   * LOADING MODEL: 'deepset/roberta-base-squad2' (Roberta)
INFO - haystack.modeling.model.language_model -  Auto-detected model language: english
INFO - haystack.modeling.model.language_model -  Loaded 'deepset/roberta-base-squad2' (Roberta model) from model hub.
INFO - haystack.modeling.utils -  Using devices: CPU - Number of GPUs: 0


In [9]:
pipe = ExtractiveQAPipeline(reader, retriever)

In [None]:
prediction = pipe.run(
    query="What brand of motor oil should I use?", params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}}
)

In [None]:
print_answers(prediction, details="minimum")

## Create an Agent to Use the QA

In [10]:
from haystack.agents import Agent, Tool
from haystack.nodes import PromptNode
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
from torch import float16 as torchfloat16

In [20]:
# quantization_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_compute_dtype=torchfloat16,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_use_double_quant=True,
# )

# MODEL_ID = "EleutherAI/pythia-1b"
# model = AutoModelForCausalLM.from_pretrained(
#     MODEL_ID,
#     trust_remote_code=True,
#     # quantization_config=quantization_config
# )

# tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

# prompt_node = PromptNode(MODEL_ID, model_kwargs={"model":model, "tokenizer": tokenizer})

In [25]:
HUGGINGFACE_API_KEY = os.environ.get("HUGGINGFACEHUB_API_TOKEN")

MODEL_ID = "google/flan-t5-xxl"
# MODEL_ID = "tiiuae/falcon-7b"
# MODEL_ID = "EleutherAI/pythia-1b"
# MODEL_ID = "PY007/TinyLlama-1.1B-step-50K-105b"
prompt_node = PromptNode(model_name_or_path=MODEL_ID, stop_words=["Observation:"], api_key=HUGGINGFACE_API_KEY)

agent = Agent(prompt_node=prompt_node)

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [26]:
search_tool = Tool(
    name="F150_Car_Truck_QA",
    pipeline_or_node=pipe,
    description="useful for when you need to answer questions related to vehicles, trucks, F150",
    output_variable="answers",
)
agent.add_tool(search_tool)

In [28]:
result = agent.run("What PSI should I inflate my tires to?")

print(result["transcript"].split("---")[0])


Agent zero-shot-react started with {'query': 'What PSI should I inflate my tires to?', 'params': None}


HuggingFaceInferenceError: HuggingFace Inference returned an error.
Status code: 503
Response body: {"error":"Model google/flan-t5-xxl is currently loading","estimated_time":1802.7086181640625}

In [None]:
print(result)

In [None]:
## NEXT - https://haystack.deepset.ai/tutorials/02_finetune_a_model_on_your_data