# simple RAG pipeline

In [None]:
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI
from typing import List
from operator import itemgetter
from langchain_community.embeddings import JinaEmbeddings
import os, getpass

In [None]:
def _set_env(var: str):
    if not os.environ.get(var):
        os.environ[var] = getpass.getpass(f"{var}: ")

_set_env("GOOGLE_API_KEY")
_set_env("JINA_API_KEY")

In [None]:
text_embeddings = JinaEmbeddings(
    model_name="jina-embeddings-v3"
)
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [None]:
RAG_SYSTEM_PROMPT = (
    "You are a helpful assistant. Given RFP document similar for user's query, your task is to answer the user's query "
    "based *only* on these documents.\n"
    "Do NOT make up any answers. If the answer is not found in it, respond with: "
    "'I cannot answer this based on the provided information.'\n\n"
    "RFP document: {context}"
)
DB_PERSIST_DIRECTORY = "./chroma_db"

In [None]:
def format_retrieved_document(docs: List[Document]) -> str:
    print(f"--- Inspecting Retrieved Documents ---: {docs}")
    formatted_context = ""
    for i, doc in enumerate(docs):
        formatted_context += f"Document-{i+1}:\n{doc.page_content}\n\n"
    
    print(
        f"Updated Document after merging metadata: {formatted_context[:500]}..."
    )
    return {
        "formatted_context": formatted_context, 
        "source_documents": docs
    }

def _log_final_prompt(prompt: ChatPromptTemplate):
        """
        A function to debug the final prompt object before it goes to the LLM.
        """
        print("--- Final Prompt Sent to LLM ---", prompt.to_string())
        return prompt  # Pass the prompt through unchanged

## only answer

In [None]:
vector_store = Chroma(
    persist_directory=DB_PERSIST_DIRECTORY,
    embedding_function=text_embeddings,
)
DENSE_RETRIEVED_DOCUMENTS = 3 
retriever = vector_store.as_retriever(search_kwargs={"k": DENSE_RETRIEVED_DOCUMENTS})
retrieval_and_formatting_chain = (
    itemgetter("input")
    | retriever
    | RunnableLambda(format_retrieved_document)
)
RAG_GENERATION_PROMPT = ChatPromptTemplate.from_messages(
    [
        ("system", RAG_SYSTEM_PROMPT),
        ("human", "{input}"),
    ]
)

answer_generation_chain = (
    RAG_GENERATION_PROMPT
    | RunnableLambda(_log_final_prompt)
    | llm
    | StrOutputParser()
)

rag_chain = (
    RunnablePassthrough.assign(context=retrieval_and_formatting_chain)
    | answer_generation_chain
)

print(
"RAG chain with document formatting and prompt inspection created successfully."
)
query = "what will be the evaluation procedures?"
response = rag_chain.invoke({"input": query})


In [None]:
print(response)

In [None]:
query = "what kind of inspection required for this project?"
response = rag_chain.invoke({"input": query})

In [None]:
print(response)

## get retrieved documents + answer

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough


# 1. Setup the retrieval branch (same as above)
vector_store = Chroma(
    persist_directory=DB_PERSIST_DIRECTORY,
    embedding_function=text_embeddings,
)
DENSE_RETRIEVED_DOCUMENTS = 3 
retriever = vector_store.as_retriever(search_kwargs={"k": DENSE_RETRIEVED_DOCUMENTS})

retrieval_branch = (
    itemgetter("input")
    | retriever
    | RunnableLambda(format_retrieved_document)
)

# 2. Define the prompt and LLM
RAG_GENERATION_PROMPT = ChatPromptTemplate.from_messages(
    [
        ("system", RAG_SYSTEM_PROMPT),
        ("human", "{input}"),
    ]
)

# 3. Build the full RAG chain
# We use RunnableParallel to build the inputs for the prompt ('context' and 'input')
# Then we use .assign to calculate the 'answer' without losing the 'context'
rag_chain = (
    RunnableParallel(
        # 1. Get the dictionary (contains both string and docs)
        retrieved_data=retrieval_branch, 
        # 2. Pass input through
        input=itemgetter("input")        
    )
    .assign(
        # 3. Generate Answer
        # We specifically pull 'context_text' from the dictionary for the prompt
        answer=(
            {
                "context": lambda x: x["retrieved_data"]["formatted_context"], 
                "input": itemgetter("input")
            }
            | RAG_GENERATION_PROMPT 
            | llm
        )
    )
)

# 4. Run it
query = "what kind of inspection required for this project?"
result = rag_chain.invoke({"input": query})



--- Inspecting Retrieved Documents ---: [Document(metadata={'file_name': 'rfp3', 'company': 'Edgemont Union Free School District', 'project': 'Controlled Testing & Inspections for District Wide Additions & Alterations'}, page_content='e.\u200b Where fabrication of a structural assembly is being performed on the premises of a\nfabricators shop, special inspections shall be required.\u200b\nf.\u200b Grade and mill test reports are required for main stress carrying steel elements\u200b\ng.\u200b Prior to the placement of fill, the special inspector shall determine that the site has\nbeen prepared in accordance with the contract documents.\u200b'), Document(metadata={'company': 'Edgemont Union Free School District', 'project': 'Controlled Testing & Inspections for District Wide Additions & Alterations', 'file_name': 'rfp3'}, page_content='b.\u200b Copies of all special inspection reports shall be provided to the Architect and\nOwner’s Representative/Construction Manager on a weekly basis. 

In [13]:
result

{'retrieved_data': {'formatted_context': 'Document-0:\ne.\u200b Where fabrication of a structural assembly is being performed on the premises of a\nfabricators shop, special inspections shall be required.\u200b\nf.\u200b Grade and mill test reports are required for main stress carrying steel elements\u200b\ng.\u200b Prior to the placement of fill, the special inspector shall determine that the site has\nbeen prepared in accordance with the contract documents.\u200b\n\nDocument-1:\nb.\u200b Copies of all special inspection reports shall be provided to the Architect and\nOwner’s Representative/Construction Manager on a weekly basis. Any inspection/\nreport that indicates a failure or a deviation from the contract documents shall be\nprovided to the Architect and Owner’s representative immediately within (24\nhours of the inspection).\u200b\nc.\u200b The testing agency shall be required to maintain a special inspection book. The\nbook shall be a three-ring binder which contains copies of 

In [14]:
# 5. You now have everything
print(f"LLM Answer: {result['answer'].content}")
print(f"Formatted Context Used: {result['retrieved_data']['formatted_context']}") # <--- This is what you wanted

LLM Answer: Special inspections are required for this project.

Specifically:
*   Where fabrication of a structural assembly is being performed on the premises of a fabricator's shop, special inspections shall be required.
*   Prior to the placement of fill, the special inspector shall determine that the site has been prepared in accordance with the contract documents.
Formatted Context Used: Document-0:
e.​ Where fabrication of a structural assembly is being performed on the premises of a
fabricators shop, special inspections shall be required.​
f.​ Grade and mill test reports are required for main stress carrying steel elements​
g.​ Prior to the placement of fill, the special inspector shall determine that the site has
been prepared in accordance with the contract documents.​

Document-1:
b.​ Copies of all special inspection reports shall be provided to the Architect and
Owner’s Representative/Construction Manager on a weekly basis. Any inspection/
report that indicates a failure or 

In [32]:
print(result['answer'])

content="Special inspections are required for this project.\n\nSpecifically:\n*   Where fabrication of a structural assembly is being performed on the premises of a fabricator's shop, special inspections shall be required.\n*   Prior to the placement of fill, the special inspector shall determine that the site has been prepared in accordance with the contract documents." additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []} id='lc_run--d14d044e-64ec-49ef-b4ad-9df4dc090e51-0' usage_metadata={'input_tokens': 446, 'output_tokens': 575, 'total_tokens': 1021, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 507}}


In [36]:
from langchain_core.messages import AIMessage
isinstance(result['answer'], AIMessage)

True

In [19]:
query = result['input']
retrieved_documents = result['retrieved_data']['formatted_context']

## reasoning and validating retrieved documents w.r.t user query

In [23]:
validate_retrieved_doc_prompt = """Your are expert RAG retrieval evaluator. \
Given user query and retrieved documents, your task is to validate with \
valid reasoning whether each retrieved document are relevant with respect to user query. \
Your evaluating reasoning must be concise and factual.
<user_query>
{user_query}
</user_query?

<retrieved_documents>
{retrieved_documents}
</retrieved_documents>

Follow this output format to evalute each document
Document-index
Relevance: Yes/No
Reasoning: Why Relevant or not relevant?
"""

from langchain_core.messages import HumanMessage

# System message
message = validate_retrieved_doc_prompt.format(
    user_query=query, 
    retrieved_documents=retrieved_documents
    )

# Generate question 
rfp_metadata = llm.invoke([
        HumanMessage(content=message)
    ]
)

In [24]:
print(rfp_metadata.content)

Document-0
Relevance: Yes
Reasoning: This document specifies several kinds of inspections required, such as special inspections for structural assembly fabrication, grade and mill test reports for steel elements, and site preparation inspections.

Document-1
Relevance: No
Reasoning: This document discusses the administrative requirements for inspection reports (e.g., copies, frequency, content, maintenance of books) rather than the specific kinds of inspections required for the project.

Document-2
Relevance: No
Reasoning: This document focuses on the documentation, reporting, and communication of inspection results and discrepancies, not on the specific types or kinds of inspections that are required.


# route user query

In [26]:
from langchain_core.messages import HumanMessage


ROUTER_PROMPT = """You are a router. Your job is to decide whether the user's query is related to RFP (Request for Proposal) documents. 
Return:
- "1" if the query is related to RFPs (e.g., proposals, bidding, procurement, deadlines, scope of work, requirements, compliance, evaluation criteria, or anything that should be answered from RFP documents).
- "0" if the query is general and not related to RFPs.

Only return "1" or "0" with no additional text.
<user_query>
{user_query}
</user_query?
"""

# System message
message = ROUTER_PROMPT.format(
    user_query=query, 
    )

# Generate question 
route_response = llm.invoke([
        HumanMessage(content=message)
    ]
)

route_response   

AIMessage(content='1', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []}, id='lc_run--1e408b5f-d94e-4337-8149-cd1f6727fa20-0', usage_metadata={'input_tokens': 134, 'output_tokens': 98, 'total_tokens': 232, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 97}})

In [27]:
query

'what kind of inspection required for this project?'

In [31]:
# System message
message = ROUTER_PROMPT.format(
    user_query="I work on LangChain, it is very good.", 
    )

for i in range(10):
    # Generate question 
    route_response = llm.invoke([
      HumanMessage(content=message)
    ]
)

    print(i, route_response)

0 content='0' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []} id='lc_run--47098c0c-a5ad-4c1d-8465-acb1a886e86c-0' usage_metadata={'input_tokens': 136, 'output_tokens': 119, 'total_tokens': 255, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 118}}
1 content='0' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-flash', 'safety_ratings': []} id='lc_run--cd76b397-25e7-4f5d-988b-6bdbbf89f811-0' usage_metadata={'input_tokens': 136, 'output_tokens': 119, 'total_tokens': 255, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 118}}
2 content='0' additional_kwargs={} response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.5-fla

In [None]:
for i in range(10):
    # Generate question 
    route_response = llm.invoke([
      HumanMessage(content=message)
    ]
)

    print(i, route_response)