In [3]:
import tqdm as notebook_tqdm
from haystack.utils import Secret

In [4]:
import os
from dotenv import load_dotenv
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
os.environ['PINECONE_API_KEY'] = PINECONE_API_KEY

In [5]:
PINECONE_API_KEY

'aa8a98f3-7968-49ee-ac1f-3d9a9b1e27aa'

In [24]:
from haystack import Document
from haystack_integrations.document_stores.pinecone import PineconeDocumentStore
document_store = PineconeDocumentStore(
	environment="gcp-starter",
	index="default",
	namespace="default",
	dimension=768
)


In [25]:
document_store

<haystack_integrations.document_stores.pinecone.document_store.PineconeDocumentStore at 0x21656d86a60>

In [26]:
from haystack.components.converters import PyPDFToDocument
from pathlib import Path # type: ignore
converter = PyPDFToDocument()
docs = converter.run(sources=[Path(r"C:\Users\sunny\haystak-end-to-end\data\Retrieval-Augmented-Generation-for-NLP.pdf")])

In [27]:
docs

{'documents': [Document(id=2f4a29ed8bf31ff58961f77b64072ca5d257cb9dbfeaacd872a01dfbb746070e, content: 'Retrieval-Augmented Generation for
  Knowledge-Intensive NLP Tasks
  Patrick Lewis‚Ä†‚Ä°, Ethan Perez‚ãÜ,
  Alek...', meta: {'file_path': 'C:\\Users\\sunny\\haystak-end-to-end\\data\\Retrieval-Augmented-Generation-for-NLP.pdf'})]}

In [28]:
print(docs['documents'][0].content)

Retrieval-Augmented Generation for
Knowledge-Intensive NLP Tasks
Patrick Lewis‚Ä†‚Ä°, Ethan Perez‚ãÜ,
Aleksandra Piktus‚Ä†, Fabio Petroni‚Ä†, Vladimir Karpukhin‚Ä†, Naman Goyal‚Ä†, Heinrich K√ºttler‚Ä†,
Mike Lewis‚Ä†, Wen-tau Yih‚Ä†, Tim Rockt√§schel‚Ä†‚Ä°, Sebastian Riedel‚Ä†‚Ä°, Douwe Kiela‚Ä†
‚Ä†Facebook AI Research;‚Ä°University College London;‚ãÜNew York University;
plewis@fb.com
Abstract
Large pre-trained language models have been shown to store factual knowledge
in their parameters, and achieve state-of-the-art results when Ô¨Åne-tuned on down-
stream NLP tasks. However, their ability to access and precisely manipulate knowl-
edge is still limited, and hence on knowledge-intensive tasks, their performance
lags behind task-speciÔ¨Åc architectures. Additionally, providing provenance for their
decisions and updating their world knowledge remain open research problems. Pre-
trained models with a differentiable access mechanism to explicit non-parametric
memory have so far been only 

In [29]:
print(docs['documents'][0].embedding)

None


In [None]:
'''document_store.write_documents([
    Document(content="This is first", embedding=[0.0]*5), 
    Document(content="This is second",embedding=[0.1, 0.2, 0.3, 0.4, 0.5])
    ])'''

In [30]:
from haystack import Pipeline
from haystack.components.writers import DocumentWriter
from haystack.components.preprocessors import DocumentSplitter
from haystack.components.embedders import SentenceTransformersDocumentEmbedder

In [22]:
SentenceTransformersDocumentEmbedder()

<haystack.components.embedders.sentence_transformers_document_embedder.SentenceTransformersDocumentEmbedder object at 0x000002162933E580>
Inputs:
  - documents: List[Document]
Outputs:
  - documents: List[Document]

In [31]:
indexing = Pipeline()

In [32]:
indexing.add_component("converter", PyPDFToDocument())
indexing.add_component("splitter", DocumentSplitter(split_by="sentence", split_length=2))
indexing.add_component("embedder", SentenceTransformersDocumentEmbedder())
indexing.add_component("writer", DocumentWriter(document_store))
indexing.connect("converter", "splitter")
indexing.connect("splitter", "embedder")
indexing.connect("embedder", "writer")

<haystack.core.pipeline.pipeline.Pipeline object at 0x0000021656D89E50>
üöÖ Components
  - converter: PyPDFToDocument
  - splitter: DocumentSplitter
  - embedder: SentenceTransformersDocumentEmbedder
  - writer: DocumentWriter
üõ§Ô∏è Connections
  - converter.documents -> splitter.documents (List[Document])
  - splitter.documents -> embedder.documents (List[Document])
  - embedder.documents -> writer.documents (List[Document])

In [33]:
indexing.run({"converter": {"sources": ["C:\\Users\\sunny\\haystak-end-to-end\\data\\Retrieval-Augmented-Generation-for-NLP.pdf"]}})

Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 17/17 [00:23<00:00,  1.38s/it]
Upserted vectors: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 528/528 [00:12<00:00, 43.62it/s]


{'writer': {'documents_written': 528}}

In [73]:

from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.builders import PromptBuilder
from haystack_integrations.document_stores.pinecone import PineconeDocumentStore
from haystack_integrations.components.retrievers.pinecone import PineconeEmbeddingRetriever
from haystack.components.generators import HuggingFaceTGIGenerator

In [None]:
'''import os
from dotenv import load_dotenv
load_dotenv()
#from haystack import Document

# Make sure you have the PINECONE_API_KEY environment variable set
document_store = PineconeDocumentStore(metric="cosine", dimension=768, index="YOUR_INDEX_NAME", environment = "YOUR_ENVIRONMENT")
'''

In [74]:
prompt_template = """Answer the following query based on the provided context. If the context does
                     not include an answer, reply with 'I don't know'.\n
                     Query: {{query}}
                     Documents:
                     {% for doc in documents %}
                        {{ doc.content }}
                     {% endfor %}
                     Answer: 
                  """

In [75]:
query_pipeline = Pipeline()

In [77]:
query_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder())
query_pipeline.add_component("retriever", PineconeEmbeddingRetriever(document_store=document_store))
query_pipeline.add_component("prompt_builder", PromptBuilder(template=prompt_template))
query_pipeline.add_component("llm", HuggingFaceTGIGenerator(model="mistralai/Mistral-7B-v0.1", token=Secret.from_token("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")))

ValueError: A component named 'text_embedder' already exists in this pipeline: choose another name.

In [78]:
query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
query_pipeline.connect("retriever.documents", "prompt_builder.documents")
query_pipeline.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x0000021655E72250>
üöÖ Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: PineconeEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: HuggingFaceTGIGenerator
üõ§Ô∏è Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [88]:
query = "What is RAG-Token?"

In [89]:
results = query_pipeline.run(
    {
        "text_embedder": {"text": query},
        "prompt_builder": {"query": query},
    }
)

Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 21.46it/s]


In [90]:
results


{'llm': {'replies': ['\n                     RAG-Token is a model that can predict each target token based on a different document.\n                     It can be seen as a standard, autoregressive seq2seq generator with transition probability:\n                     p‚Ä≤\nŒ∏(yi|x,y 1:i‚àí1) =‚àë\nz‚ààtop-k(p(¬∑|x))pŒ∑(zi|x)pŒ∏(yi|x,zi,y1:i‚àí1)To decode, we can plug p‚Ä≤\nŒ∏(yi|x,y 1:i‚àí1)into a standard beam decoder.\n                     For RAG-Sequence, the likelihood p(y|x)does not break into a conventional per-\ntoken likelihood, hence we cannot solve it with a single beam search.\n                     An interactive demo of RAG models can be found at https://huggingface.co/rag/\n2https://github.\n                     An interactive demo of a RAG model can be found athttps://huggingface.co/rag/\n2https://github.\n                     RAG-Tok. 17.\n                     RAG-Token 43.5 54.\n                     RAG-Token 44.\n                     RAG-Token-Frozen 37.8 50.\n       

In [91]:
print(results['llm']['replies'][0])



                     RAG-Token is a model that can predict each target token based on a different document.
                     It can be seen as a standard, autoregressive seq2seq generator with transition probability:
                     p‚Ä≤
Œ∏(yi|x,y 1:i‚àí1) =‚àë
z‚ààtop-k(p(¬∑|x))pŒ∑(zi|x)pŒ∏(yi|x,zi,y1:i‚àí1)To decode, we can plug p‚Ä≤
Œ∏(yi|x,y 1:i‚àí1)into a standard beam decoder.
                     For RAG-Sequence, the likelihood p(y|x)does not break into a conventional per-
token likelihood, hence we cannot solve it with a single beam search.
                     An interactive demo of RAG models can be found at https://huggingface.co/rag/
2https://github.
                     An interactive demo of a RAG model can be found athttps://huggingface.co/rag/
2https://github.
                     RAG-Tok. 17.
                     RAG-Token 43.5 54.
                     RAG-Token 44.
                     RAG-Token-Frozen 37.8 50.
                     RAG-Token 77.
           