# Implement a basic RAG by using LCEL

In [8]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
import os
from pathlib import Path
from dotenv import load_dotenv

import uuid
from langchain.schema import Document
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain_core.runnables import RunnablePassthrough

from src import utils, conf

# Params

In [10]:
conf_settings = conf.load(file="settings.yaml")

LLM_WORKHORSE = conf_settings.llm_workhorse
LLM_FLAGSHIP = conf_settings.llm_flagship
EMBEDDINGS = conf_settings.embeddings
EMB_DIM = conf_settings.embeddings_dim

In [11]:
conf_infra = conf.load(file="infra.yaml")

VDB_URL = conf_infra.vdb_url

# Environment Variables

In [12]:
load_dotenv()

OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
QDRANT_API_KEY = os.environ["QDRANT_API_KEY"]

In [13]:
embeddings = OpenAIEmbeddings(
    api_key=OPENAI_API_KEY,
    model=EMBEDDINGS
    )

try:
    embeddings.embed_query("abc")
except Exception as err:
    print(err)

llm = ChatOpenAI(
    api_key=OPENAI_API_KEY,
    model=LLM_WORKHORSE,
    )

try:
    llm.invoke("tell me a joke about devops")
except Exception as err:
    print(err)


In [14]:
client_qdrant = QdrantClient(
    api_key=QDRANT_API_KEY,
    url=VDB_URL
    )

try:
    client_qdrant.get_collections()
except Exception as err:
    print(err)


## Ingestion Pipeline

In [15]:
docs = [
    Document(
        page_content="John J. Hopfield and Geoffrey Hinton received the Nobel Prize in Physics in 2024 for their groundbreaking work on artificial neural networks, a foundation of modern AI. Hopfield developed an associative memory model in the 1980s that allows networks to store and reconstruct patterns. Building on this, Hinton developed the Boltzmann machine, which uses statistical physics principles to recognize and classify data. These pioneering contributions are essential for today's machine learning technologies, enhancing applications from medical imaging to material science.",
        metadata={"source": "wikipedia", "topic": "Physics"}
    ),
    Document(
        page_content="In Chemistry, David Baker, Demis Hassabis, and John Jumper were honored win Nobel Prize in 2024 for their breakthroughs in protein structure prediction. Baker’s work in computational protein design enables the creation of novel proteins, while Hassabis and Jumper, known for their work with DeepMind's AlphaFold, developed an AI that accurately predicts protein structures—a long-standing challenge in biology. This advancement could lead to transformative applications in drug development and synthetic biology.",
        metadata={"source": "wikipedia", "topic": "Chemistry"}
    ),
]


# #############################################
if client_qdrant.collection_exists("tutorial"):
    client_qdrant.delete_collection("tutorial")
# #############################################

client_qdrant.create_collection(
    collection_name="tutorial",
    vectors_config=VectorParams(
        size=EMB_DIM,
        distance=Distance.COSINE),
)

# This example is wrong:
# # https://qdrant.tech/documentation/frameworks/langchain/#using-an-existing-collection
# embedding instead of embeddings
# https://python.langchain.com/api_reference/_modules/langchain_qdrant/qdrant.html#QdrantVectorStore.from_existing_collection

vector_store = QdrantVectorStore.from_existing_collection(
        embedding=embeddings,
        collection_name="tutorial",
        api_key=QDRANT_API_KEY,
        url=VDB_URL
    )
# uuids = [str(uuid.uuid4()) for _ in range(len(docs))]

uuids = [
    '2690cf82-ebfd-48bc-bd52-c61a595a212a',
    '0e8f454e-3ebf-434b-a7cf-26489695bcd0'
    ]


vector_store.add_documents(documents=docs, ids=uuids)  # Add only once!

['2690cf82-ebfd-48bc-bd52-c61a595a212a',
 '0e8f454e-3ebf-434b-a7cf-26489695bcd0']

In [25]:
vector_store.similarity_search(
    "Nobel Price Physics",
    k=1,
)


[Document(metadata={'source': 'wikipedia', 'topic': 'Physics', '_id': '2690cf82-ebfd-48bc-bd52-c61a595a212a', '_collection_name': 'tutorial'}, page_content="John J. Hopfield and Geoffrey Hinton received the Nobel Prize in Physics in 2024 for their groundbreaking work on artificial neural networks, a foundation of modern AI. Hopfield developed an associative memory model in the 1980s that allows networks to store and reconstruct patterns. Building on this, Hinton developed the Boltzmann machine, which uses statistical physics principles to recognize and classify data. These pioneering contributions are essential for today's machine learning technologies, enhancing applications from medical imaging to material science.")]

In [17]:
vector_store.similarity_search_with_score(
    "Nobel Price Physics",
    k=1,
)


[(Document(metadata={'source': 'wikipedia', 'topic': 'Physics', '_id': '2690cf82-ebfd-48bc-bd52-c61a595a212a', '_collection_name': 'tutorial'}, page_content="John J. Hopfield and Geoffrey Hinton received the Nobel Prize in Physics in 2024 for their groundbreaking work on artificial neural networks, a foundation of modern AI. Hopfield developed an associative memory model in the 1980s that allows networks to store and reconstruct patterns. Building on this, Hinton developed the Boltzmann machine, which uses statistical physics principles to recognize and classify data. These pioneering contributions are essential for today's machine learning technologies, enhancing applications from medical imaging to material science."),
  0.41678888)]

In [18]:
retriever = vector_store.as_retriever(k=1)

retriever.invoke("Nobel Price Physics")

[Document(metadata={'source': 'wikipedia', 'topic': 'Physics', '_id': '2690cf82-ebfd-48bc-bd52-c61a595a212a', '_collection_name': 'tutorial'}, page_content="John J. Hopfield and Geoffrey Hinton received the Nobel Prize in Physics in 2024 for their groundbreaking work on artificial neural networks, a foundation of modern AI. Hopfield developed an associative memory model in the 1980s that allows networks to store and reconstruct patterns. Building on this, Hinton developed the Boltzmann machine, which uses statistical physics principles to recognize and classify data. These pioneering contributions are essential for today's machine learning technologies, enhancing applications from medical imaging to material science."),
 Document(metadata={'source': 'wikipedia', 'topic': 'Chemistry', '_id': '0e8f454e-3ebf-434b-a7cf-26489695bcd0', '_collection_name': 'tutorial'}, page_content="In Chemistry, David Baker, Demis Hassabis, and John Jumper were honored win Nobel Prize in 2024 for their break

# Query Pipeline

In [19]:
from langchain_core.prompts import ChatPromptTemplate

prompt_template = """Answer the question based only on the following context:
```
{context}
```

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(prompt_template)

prompt.invoke(input=
    {
        "context": ["context1", "context2"],
        "question": "my_question"
    }
).to_messages()

[HumanMessage(content="Answer the question based only on the following context:\n```\n['context1', 'context2']\n```\n\nQuestion: my_question\n", additional_kwargs={}, response_metadata={})]

In [26]:
chain_rag = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
)

chain_rag.invoke("How won the Physics Nobel price in 2024?")

AIMessage(content='John J. Hopfield and Geoffrey Hinton won the Nobel Prize in Physics in 2024 for their groundbreaking work on artificial neural networks.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 332, 'total_tokens': 360, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4.1-mini-2025-04-14', 'system_fingerprint': 'fp_6d7dcc9a98', 'id': 'chatcmpl-CF1KHkglAmEUpcrfSsuEkI06iwcIw', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--72f065a3-a25b-49ca-a42a-1855c82ab946-0', usage_metadata={'input_tokens': 332, 'output_tokens': 28, 'total_tokens': 360, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 0}})

# Improve your RAG

## Parse output

In [21]:
from langchain_core.output_parsers import StrOutputParser

chain_rag = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

chain_rag.invoke("Who won the Physics Nobel price in 2024?")

'John J. Hopfield and Geoffrey Hinton won the Physics Nobel Prize in 2024.'

## Format context

In [22]:
from langchain_core.runnables import RunnableLambda


def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

print((retriever | RunnableLambda(format_docs)).invoke("Who won the Physics Nobel price in 2024?"))

In Chemistry, David Baker, Demis Hassabis, and John Jumper were honored win Nobel Prize in 2024 for their breakthroughs in protein structure prediction. Baker’s work in computational protein design enables the creation of novel proteins, while Hassabis and Jumper, known for their work with DeepMind's AlphaFold, developed an AI that accurately predicts protein structures—a long-standing challenge in biology. This advancement could lead to transformative applications in drug development and synthetic biology.

John J. Hopfield and Geoffrey Hinton received the Nobel Prize in Physics in 2024 for their groundbreaking work on artificial neural networks, a foundation of modern AI. Hopfield developed an associative memory model in the 1980s that allows networks to store and reconstruct patterns. Building on this, Hinton developed the Boltzmann machine, which uses statistical physics principles to recognize and classify data. These pioneering contributions are essential for today's machine lear

# Prepare your RAG for evaluation: Keep Context

In [23]:
chain_chat = ( prompt
    | llm
    | StrOutputParser()
)

chain_rag = (
    {"context": retriever | RunnableLambda(format_docs),
     "question": RunnablePassthrough()}
    | RunnablePassthrough.assign(  # maintains the keys context and question and adds a new one: answer
        answer=chain_chat  # passes context and question as input 
    )
)

chain_rag.invoke("Who won the Chemistry Nobel price in 2024?")

{'context': "In Chemistry, David Baker, Demis Hassabis, and John Jumper were honored win Nobel Prize in 2024 for their breakthroughs in protein structure prediction. Baker’s work in computational protein design enables the creation of novel proteins, while Hassabis and Jumper, known for their work with DeepMind's AlphaFold, developed an AI that accurately predicts protein structures—a long-standing challenge in biology. This advancement could lead to transformative applications in drug development and synthetic biology.\n\nJohn J. Hopfield and Geoffrey Hinton received the Nobel Prize in Physics in 2024 for their groundbreaking work on artificial neural networks, a foundation of modern AI. Hopfield developed an associative memory model in the 1980s that allows networks to store and reconstruct patterns. Building on this, Hinton developed the Boltzmann machine, which uses statistical physics principles to recognize and classify data. These pioneering contributions are essential for today

https://python.langchain.com/docs/integrations/vectorstores/qdrant/#hybrid-vector-search
https://python.langchain.com/api_reference/qdrant/qdrant/langchain_qdrant.qdrant.QdrantVectorStore.html