# RAG

https://www.youtube.com/watch?v=okmk357t9W8&ab_channel=Memgraph

In [1]:
!uv pip install haystack-ai 
!uv pip install neo4j-haystack

[2mAudited [1m1 package[0m in 5ms[0m
[2K[2mResolved [1m38 packages[0m in 2.09s[0m                                                [0m
[2K[36m[1mBuilding[0m[39m neo4j[2m==5.20.0[0m...                                 [0m
[2K[1A[36m[1mBuilding[0m[39m neo4j[2m==5.20.0[0m                            [0m[1A
[2K[1A[36m[1mBuilding[0m[39m neo4j[2m==5.20.0[0m                            [0m[1A
[2K[1A   [32m[1mBuilt[0m[39m neo4j[2m==5.20.0[0m                            [0m[1A
[2K[1A   [32m[1mBuilt[0m[39m neo4j[2m==5.20.0[0m                            [0m[1A
[2K[1A   [32m[1mBuilt[0m[39m neo4j[2m==5.20.0[0m                            [0m[1A
[2K[1A   [32m[1mBuilt[0m[39m neo4j[2m==5.20.0[0m                                                          [2mDownloaded [1m2 packages[0m in 786ms[0m
[2K[2mInstalled [1m2 packages[0m in 1ms[0m0                                        [0m
 [32m+[39m [1mneo4j[0m[2m==5.20.0[0m
 

In [8]:
from haystack import GeneratedAnswer, Pipeline
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.utils import Secret

from neo4j_haystack import Neo4jClientConfig, Neo4jDynamicDocumentRetriever

from haystack import component
import numpy as np
import httpx

NEO4J_URI = "bolt://localhost:9893"
NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "neo4jneo4j"
NEO4J_DATABASE = "neo4j"  # Can't create multiple dbs in community edition


# Create a custom component to use our transformer API
@component
class CustomEmbedder:
    """Call our custom embedding API"""
    @component.output_types(embedding=np.array)
    def run(self, text: str):
        query_embedding = httpx.post("http://localhost:9891/embed", data={"text": text})
        return {"embedding": np.array(query_embedding.json().get("embedding"))}


@component
class CustomLLM:
    """Call our custom LLM API"""
    @component.output_types(answer=str)
    def run(self, query: str):
        res = httpx.post("http://localhost:9894/ask", data={"query": query})
        return res.json()

In [2]:


client_config = Neo4jClientConfig(
    url=NEO4J_URI,
    username=NEO4J_USERNAME,
    password=NEO4J_PASSWORD,
    database=NEO4J_DATABASE,
)

rag_cypher_query = """
CALL db.index.vector.queryNodes($index, $top_k, $query_embedding)
YIELD node as movie, score
MATCH (movie)
WITH movie, score
MATCH (actor:Person)-[:ACTED_IN]->(movie), (director:Person)-[:DIRECTED]->(movie)
WITH movie, score, COLLECT(distinct actor.name) AS actors, COLLECT(distinct director.name) AS directors
RETURN movie{.*, content: movie.tagline, score, actors, directors}
ORDER BY score DESC LIMIT $top_k
"""

prompt_template = """
Given the list of Movies with it's title, tagline and movie directors pick up
one of the taglines which matches the given guess and write a short email letter to the movie
director of the matched tagline explaining the meaning of the tagline.
The letter should be concise and have no more than 3 sentences.
Sign the letter with the name: "{{letter_from}}".

\nMovies:
{% for doc in documents %}
  - Title: {{ doc.meta['title'] }}, Tagline: {{ doc.meta['tagline'] }}, Directors: {{ doc.meta['directors'] }}
{% endfor %}

\nTagline Guess: {{tagline_guess}}
\nLetter to the director:
"""

pipe = Pipeline()
pipe.add_component("query_embedder", CustomEmbedder())
pipe.add_component(
    "retriever",
    Neo4jDynamicDocumentRetriever(
        client_config=client_config,
        runtime_parameters=["query_embedding"],
        doc_node_name="movie",
        verify_connectivity=True,
    ),
)
pipe.add_component("prompt_builder", PromptBuilder(template=prompt_template))
pipe.add_component(
    "llm",
    OllamaGenerator(
        model="llama2",
        url="http://localhost:11434/api/generate"
    ),
)
pipe.add_component("answer_builder", AnswerBuilder())

pipe.connect("query_embedder", "retriever.query_embedding")
pipe.connect("retriever.documents", "prompt_builder.documents")
pipe.connect("prompt_builder.prompt", "llm.prompt")
pipe.connect("llm.replies", "answer_builder.replies")
pipe.connect("llm.meta", "answer_builder.meta")
pipe.connect("retriever", "answer_builder.documents")

# Ask a question on the data you just added.
tagline_guess = "What are the odds of meeting each other?"
result = pipe.run(
    {
        "query_embedder": {"text": tagline_guess},
        "retriever": {
            "query": rag_cypher_query,
            "parameters": {"index": "movie-embeddings", "top_k": 5},
        },
        "prompt_builder": {"tagline_guess": tagline_guess, "letter_from": "Haystack & Neo4j"},
        "answer_builder": {"query": tagline_guess},
    }
)

answer: GeneratedAnswer = result["answer_builder"]["answers"][0]

def movie_sources():
    sources = []
    for movie_doc in answer.documents:
        movie_info = (
            f"Score: {movie_doc.score}, "
            f"Movie Title: {movie_doc.meta['title']}, "
            f"Movie Tagline: {movie_doc.meta['tagline']}, "
            f"Directors: {str(movie_doc.meta['directors'])}"
        )
        sources.append(movie_info)
    return sources

print("Query: ", answer.query)
print("============")
print(answer.data)
print("============")
print("Sources:\n", "\n".join(movie_sources()))

ModuleNotFoundError: No module named 'haystack_integrations.components.generators.ollama'