# Implement a basic RAG by using LG

In [5]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
import os
from pathlib import Path
from dotenv import load_dotenv

import uuid
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams



from src import utils, conf, sample_docs

ModuleNotFoundError: No module named 'langchain.schema'

# Params

In [None]:
conf_settings = conf.load(file="settings.yaml")

LLM_WORKHORSE = conf_settings.llm_workhorse
LLM_FLAGSHIP = conf_settings.llm_flagship
EMBEDDINGS = conf_settings.embeddings
EMB_DIM = conf_settings.embeddings_dim

In [None]:
conf_infra = conf.load(file="infra.yaml")

VDB_URL = conf_infra.vdb_url

# Environment Variables

In [None]:
load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]

In [None]:
embeddings = OpenAIEmbeddings(
    api_key=OPENAI_API_KEY,
    model=EMBEDDINGS
    )

try:
    embeddings.embed_query("abc")
except Exception as err:
    print(err)

llm = ChatOpenAI(
    api_key=OPENAI_API_KEY,
    model=LLM_WORKHORSE,
    )

try:
    llm.invoke("tell me a joke about devops")
except Exception as err:
    print(err)

In [None]:
client_qdrant = QdrantClient(
    api_key=QDRANT_API_KEY,
    url=VDB_URL
    )

try:
    client_qdrant.get_collections()
except Exception as err:
    print(err)


## Ingestion Pipeline

In [None]:
client_qdrant.get_collection("tutorial")

In [None]:
if client_qdrant.collection_exists("tutorial"):
    vector_store = QdrantVectorStore.from_existing_collection(
        embedding=embeddings,
        collection_name="tutorial",
        url=VDB_URL,
        api_key=QDRANT_API_KEY
    )
else:
    print("run 04-rag-langchain!")

In [None]:
retrieved_docs = vector_store.similarity_search("who won the 2024 Nobel in Chemistry?")
retrieved_docs

# Query Pipeline

In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt_template = """Answer the question based only on the following context:
```
{context}
```

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(prompt_template)


In [None]:
from langchain_core.documents import Document
from typing_extensions import List, TypedDict


class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    # access only needed info
    context = state["context"]  # List(Documents)
    question = state["question"]
    
    docs_content = format_docs(context)
    messages = prompt.invoke({"question": question, "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}


from langgraph.graph import START, StateGraph

graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()
graph

In [None]:
graph.invoke({"question": "who won the 2024 Nobel in Chemistry?"})

# Can we do better?

## Dynamic Source

[Structured outputs](https://python.langchain.com/docs/concepts/structured_outputs/)

In [None]:
from pydantic import BaseModel, Field
from typing import Optional


class Filter(BaseModel):
    do_filter: bool = Field("Whether the user has specified any given source or not")
    value: Optional[str] = Field(
        "Infer from the query is the user intent is to get information only from a given source. Answer only with any of these values: `wikipedia`, `national_geographic`, `desperta_ferro`")

from langchain_core.output_parsers import PydanticOutputParser
parser = PydanticOutputParser(pydantic_object=Filter)


from langchain_core.prompts import ChatPromptTemplate

prompt_template_filt = """Your task is to analyze a question and determine whether the user want to retrieve the answer from a given source.\
Do NOT answer the question itself, only the source if it is stated.\
You can provide an empty string if no source is requested by the user.\
Wrap the output in the following json format:\n {format_instructions}\
Question: {question}
"""
prompt_filt = ChatPromptTemplate.from_template(prompt_template_filt)
chain_filt = (prompt_filt.partial(format_instructions=parser.get_format_instructions()) 
         | llm 
         | parser
         )

resp = chain_filt.invoke("Según Desperta Ferro, existe el término reconquista?")
resp

In [None]:
prompt_filt.partial(format_instructions=parser.get_format_instructions()).invoke("Según Desperta Ferro, existe el término reconquista?")

In [None]:
resp

In [None]:
def source_filter(state: State):
    # removes context not matchin the source in doc.metadata['source']
    q = state['question']
    print(q)
    resp = chain_filt.invoke(q)

    if resp.do_filter:
        value = resp.value
        print(f"Filter: {value=}")
        print( [doc.metadata for doc in state['context'] ])
        docs_filt = [doc for doc in state['context'] if doc.metadata['source'] == value]
        print(f"Remaning docs: {len(docs_filt)=}")
    else:
        docs_filt = state['context']

    return {"context": docs_filt}


graph_builder = StateGraph(State).add_sequence([retrieve, source_filter, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()
graph

    

In [None]:
graph.invoke({"question": "According to wikipedia, who won the Chemistry Nobel Prize in 2024?"})

Ideas:
* Enhance retrieval with native client and custom options (metadata filter)
* Add post processing link the answer to images

## Self-reflective RAG

We are going to implement a linear version of [Self-Reflective RAG with LangGraph](https://blog.langchain.com/agentic-rag-with-langgraph/)

In [None]:

def critic_context_relevance(state: State):
    contexts = state["context"]
    question = state["question"]

    for context in contexts:
        # Use a critic LLM to sinthesize a relevancy score for each context individually
        # Use prompting to only yield a value between 0 and 3 and in int format
        context.metadata['relevance_score'] = 3

    return {"context": contexts}



graph_builder = StateGraph(State).add_sequence([retrieve, critic_context_relevance, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()
graph.invoke({"question": "According to wikipedia, who won the Chemistry Nobel Prize in 2024?"})

## Other ideas:

* Two stage retrieval (Documents +  Chunks)
* Conversational Flow
* Tool calling (Agents!)
* Multi-retrievers
