# Haystack 27 - first rag pipeline

https://github.com/deepset-ai/haystack-tutorials/blob/main/tutorials/27_First_RAG_Pipeline.ipynb

**Note**: needed to `mamba install bitsandbytes accelerate`

In [1]:
from haystack.telemetry import tutorial_running

tutorial_running(27)

In [2]:
# from haystack_integrations.components.generators.ollama import OllamaGenerator
from haystack import Pipeline, PredefinedPipeline

In [3]:
from haystack.document_stores.in_memory import InMemoryDocumentStore

document_store = InMemoryDocumentStore()

In [4]:
from datasets import load_dataset
from haystack import Document

dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]

In [5]:
from haystack.components.embedders import SentenceTransformersDocumentEmbedder

doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
doc_embedder.warm_up()

In [6]:
docs_with_embeddings = doc_embedder.run(docs)
document_store.write_documents(docs_with_embeddings["documents"])

Batches:   0%|          | 0/5 [00:00<?, ?it/s]

151

In [7]:
from haystack.components.embedders import SentenceTransformersTextEmbedder

text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")

In [8]:
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever

retriever = InMemoryEmbeddingRetriever(document_store)

In [9]:
from haystack.components.builders import PromptBuilder

template = """
Given the following information, answer the question.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{question}}
Answer:
"""

prompt_builder = PromptBuilder(template=template)

In [10]:
import os
from getpass import getpass
import torch
# from haystack.components.generators import OpenAIGenerator
# from haystack.components.generators import 
from haystack.components.generators import HuggingFaceLocalGenerator

# os.environ["OPENAI_API_KEY"] = getpass("Enter OpenAI API key: ")
# generator = OpenAIGenerator(model="gpt-3.5-turbo")
# generator = HuggingFaceLocalGenerator("HuggingFaceH4/zephyr-7b-beta",
#                                  huggingface_pipeline_kwargs={
#                                      # "device_map":"auto",
#                                                "model_kwargs":{"load_in_4bit":True,
#                                                 "bnb_4bit_use_double_quant":True,
#                                                 "bnb_4bit_quant_type":"nf4",
#                                                 "bnb_4bit_compute_dtype":torch.bfloat16}},
#                                  generation_kwargs={"max_new_tokens": 350})

# https://docs.haystack.deepset.ai/docs/huggingfacelocalgenerator
generator = HuggingFaceLocalGenerator(model="google/flan-t5-large",
                                      task="text2text-generation",
                                      generation_kwargs={
                                        "max_new_tokens": 400,
                                        "temperature": 0.8,
                                        })

In [11]:
from haystack import Pipeline

basic_rag_pipeline = Pipeline()
# Add components to your pipeline
basic_rag_pipeline.add_component("text_embedder", text_embedder)
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
basic_rag_pipeline.add_component("llm", generator)

# Now, connect the components to each other
basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
basic_rag_pipeline.connect("retriever", "prompt_builder.documents")
basic_rag_pipeline.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7fc9040af910>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: PromptBuilder
  - llm: HuggingFaceLocalGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (List[float])
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [12]:
question = "What does Rhodes Statue look like?"

response = basic_rag_pipeline.run({"text_embedder": {"text": question}, "prompt_builder": {"question": question}})

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (2707 > 512). Running this sequence through the model will result in indexing errors


In [13]:
print(response["llm"]["replies"][0])

a statue of Helios
