In [1]:
from dotenv import load_dotenv
load_dotenv()
import pprint

# 🧠 Retrieval Augmented Generation (RAG)

RAG = **retrieval + generation** 🎯  
It boosts LLMs by giving them access to **external knowledge** — solving the problem of **outdated** or **limited** internal memory.

---

## 🔑 How RAG Works

1. 🔍 **Retrieve**: Find relevant info using a retriever  
2. 🧾 **Inject**: Add that info into the model's prompt  
3. 🤖 **Generate**: Let the model answer with grounded context

![RAG](assets/rag_concepts-4499b260d1053838a3e361fb54f376ec.png "RAG")

---

## 🚀 Why RAG?

- 🆕 **Stay up-to-date** with current info  
- 📚 **Tap into domain-specific knowledge**  
- 🎯 **Reduce hallucinations** with grounded answers  
- 💸 **Avoid expensive fine-tuning**

🔗 [Retrieval Concepts](https://python.langchain.com/docs/concepts/retrieval/)

---

## 🧩 Typical RAG Flow

1. Accept a user query  
2. Retrieve relevant documents  
3. Format and inject them into the system prompt  
4. Generate a concise response using the LLM

### Setup vector store

In [2]:
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings

docs = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2},
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them",
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    Document(
        page_content="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
    Document(
        page_content="Three men walk into the Zone, three men walk out of the Zone",
        metadata={
            "year": 1979,
            "director": "Andrei Tarkovsky",
            "genre": "thriller",
            "rating": 9.9,
        },
    ),
]
vectorstore = Chroma.from_documents(docs, OpenAIEmbeddings())

### Setup retriever

In [3]:
from langchain.chains.query_constructor.schema import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain_openai import ChatOpenAI

metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director",
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="A 1-10 rating for the movie", type="float"
    ),
]
document_content_description = "Brief summary of a movie"
llm = ChatOpenAI(temperature=0)
retriever = SelfQueryRetriever.from_llm(
    llm=llm,
    vectorstore=vectorstore,
    document_contents=document_content_description,
    metadata_field_info=metadata_field_info,
    verbose= True
)

### Retrieve documents by question

In [4]:
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage

# Define a question
question = """What are Leonardio Di Caprio movies about?"""

# Retrieve relevant documents
docs = retriever.invoke(question)
pprint.pp(docs)

[Document(id='b7f44207-6a3e-41c2-8a59-9fb402f56c90', metadata={'director': 'Christopher Nolan', 'rating': 8.2, 'year': 2010}, page_content='Leo DiCaprio gets lost in a dream within a dream within a dream within a ...'),
 Document(id='e1faebee-8418-45ce-85c6-47beb27e0030', metadata={'director': 'Satoshi Kon', 'rating': 8.6, 'year': 2006}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),
 Document(id='936a65b5-0f5e-4ac3-be42-f059b4363006', metadata={'genre': 'science fiction', 'rating': 7.7, 'year': 1993}, page_content='A bunch of scientists bring back dinosaurs and mayhem breaks loose'),
 Document(id='f7c8247b-e519-4cff-b8be-3535be5262b5', metadata={'director': 'Andrei Tarkovsky', 'genre': 'thriller', 'rating': 9.9, 'year': 1979}, page_content='Three men walk into the Zone, three men walk out of the Zone')]


### Set up the system prompt, add the context

In [5]:
# Define a system prompt that tells the model how to use the retrieved context
system_prompt = """You are an assistant for question-answering tasks. 
Use the following pieces of retrieved context to answer the question. 
If you don't know the answer, just say that you don't know. 
Use three sentences maximum and keep the answer concise.
Context: {context}:"""

# Combine the documents into a single string
docs_text = "".join(d.page_content for d in docs)

# Populate the system prompt with the retrieved context
system_prompt_fmt = system_prompt.format(context=docs_text)
pprint.pp(system_prompt_fmt)


('You are an assistant for question-answering tasks. \n'
 'Use the following pieces of retrieved context to answer the question. \n'
 "If you don't know the answer, just say that you don't know. \n"
 'Use three sentences maximum and keep the answer concise.\n'
 'Context: Leo DiCaprio gets lost in a dream within a dream within a dream '
 'within a ...A psychologist / detective gets lost in a series of dreams '
 'within dreams within dreams and Inception reused the ideaA bunch of '
 'scientists bring back dinosaurs and mayhem breaks looseThree men walk into '
 'the Zone, three men walk out of the Zone:')


### Use the retrieved content to craft the answer

In [6]:
# Create a model
model = ChatOpenAI(model="gpt-4o-mini", temperature=0) 

# Generate a response
questions = model.invoke([SystemMessage(content=system_prompt_fmt),
                          HumanMessage(content=question)])

pprint.pp(questions)

AIMessage(content='Leonardo DiCaprio\'s movies often explore complex themes such as dreams, identity, and morality. For example, in "Inception," he navigates layers of dreams, while in "Jurassic Park," the focus is on the consequences of bringing dinosaurs back to life. His films frequently delve into human psychology and the chaos that ensues from scientific advancements or personal dilemmas.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 77, 'prompt_tokens': 138, 'total_tokens': 215, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_bbfba58e46', 'id': 'chatcmpl-BF8h6Cm6YCAge81749INsMMZHaVFw', 'finish_reason': 'stop', 'logprobs': None}, id='run-ed249652-d581-41d8-9007-bcb35d9c76d4-0', usage_metadata={'input_tokens': 138