In [8]:
# Packages
import os
import yfinance as yf
from haystack.nodes import LinkContentFetcher
from haystack.document_stores import InMemoryDocumentStore, FAISSDocumentStore
from haystack.nodes import  BM25Retriever, AnswerParser, DensePassageRetriever, EmbeddingRetriever
from haystack.nodes.prompt import PromptTemplate
from haystack.nodes import PromptNode
from haystack import Pipeline
!pip install "farm-haystack[faiss]"
from haystack.utils import print_answers



In [9]:
# Model configurations
hfAPIKey = os.getenv("HF_API_KEY")
hfModelName = os.getenv("HF_MODEL_NAME")

In [10]:
gold = yf.Ticker("GC=F")
news = gold.news

In [11]:
links = []

for new in news:
    links.append(new["link"])

In [12]:
lcf = LinkContentFetcher()
docs = []

for link in links:
    docs.append(lcf.fetch(link)[0])

In [13]:
documentStore = FAISSDocumentStore(faiss_index_factory_str="Flat", return_embedding=True)

documentStore.delete_documents()
documentStore.write_documents(docs)

# retriever = DensePassageRetriever(
#                 document_store=documentStore,
#                 query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
#                 passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
#                 use_gpu=True,
#                 embed_title=True,
#     )

retriever = EmbeddingRetriever(
    document_store=documentStore,
   embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1",
   model_format="sentence_transformers"
)

Writing Documents: 10000it [00:00, 206359.79it/s]       
  return self.fget.__get__(instance, owner)()


In [14]:
promptTemplate = PromptTemplate(
    prompt="""
    Answer the question truthfully based solely on the given documents. If the documents do not contain the answer to the question, say that answering is not possible given the available information. Your answer should be no longer than 1000 words.
    Documents:{join(documents)}
    Question:{query}
    Answer:
    """,
    output_parser=AnswerParser(),
)

# Prompt Node initialization
promptNode = PromptNode(hfModelName,api_key=hfAPIKey,default_prompt_template = promptTemplate)

In [15]:
# Pipeline
pipeline = Pipeline()
pipeline.add_node(component=retriever, name="retriever", inputs=["Query"])
pipeline.add_node(component=promptNode, name="promptNode", inputs=["retriever"])

In [20]:
output = pipeline.run(query="Summarize recent performance of gold",documents=documentStore.get_all_documents())
print_answers(output, details="minimum")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Expected prompt parameter 'documents' to be provided but it is missing. Continuing with an empty list of documents.


'Query: Summarize recent performance of gold'
'Answers:'
[   {   'answer': '1. Gold is a precious metal that has been used for '
                  'centuries as a store of value and a medium of exchange. It '
                  'is also used in jewelry and other decorative items.\n'
                  '2. Gold prices have been volatile in recent years, with '
                  'prices reaching record highs in 2011 and then falling '
                  'sharply in 2013.\n'
                  '3. The price of gold is determined by a number of factors, '
                  'including supply and demand, economic conditions, and '
                  'geopolitical events'}]
