# 35 - Model_Based_Evaluation_of_RAG_Pipelines
https://github.com/deepset-ai/haystack-tutorials/blob/main/tutorials/35_Model_Based_Evaluation_of_RAG_Pipelines.ipynb

working


In [1]:
from datasets import load_dataset
from haystack import Document
from haystack.document_stores.in_memory import InMemoryDocumentStore

In [2]:
document_store = InMemoryDocumentStore()

dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]
# or from: https://docs.haystack.deepset.ai/docs/huggingfacelocalgenerator
# docstore.write_documents([Document(content="Rome is the capital of Italy"), Document(content="Paris is the capital of France")])

document_store.write_documents(docs)

151

In [3]:
import os
# from getpass import getpass
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.components.retrievers.in_memory import InMemoryBM25Retriever

retriever = InMemoryBM25Retriever(document_store)

In [4]:
from haystack.components.generators import HuggingFaceLocalGenerator

generator = HuggingFaceLocalGenerator(model="google/flan-t5-large",
                                      task="text2text-generation",
                                      generation_kwargs={
                                        "max_new_tokens": 100,
                                        "temperature": 0.9,
                                        })

In [5]:
generator.warm_up()
print(generator.run("Who is the best American actor?"))
# {'replies': ['john wayne']}

{'replies': ['john wayne']}




In [6]:
# query = "How many wonders are there?"
query = "What statue is a wonder?"

In [7]:
template = """
Given the following information, answer the question.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{query}}
Answer:
"""

prompt_builder = PromptBuilder(template=template)

In [8]:
from haystack import Pipeline
pipe = Pipeline()

# pipe.add_component("retriever", InMemoryBM25Retriever(document_store=docstore))
# pipe.add_component("prompt_builder", PromptBuilder(template=template))

pipe.add_component("retriever", retriever)
pipe.add_component("prompt_builder", prompt_builder)

pipe.add_component("llm", generator)
pipe.connect("retriever", "prompt_builder.documents")
pipe.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7f39d81cfd40>
🚅 Components
  - retriever: InMemoryBM25Retriever
  - prompt_builder: PromptBuilder
  - llm: HuggingFaceLocalGenerator
🛤️ Connections
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [9]:

res=pipe.run({
    "prompt_builder": {
        "query": query
    },
    "retriever": {
        "query": query
    }
})

print(res)

Ranking by BM25...:   0%|          | 0/151 [00:00<?, ? docs/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (3210 > 512). Running this sequence through the model will result in indexing errors


{'llm': {'replies': ['The Statue of Zeus at Olympia']}}
