Retrieval-Augmented Agent Project
1. System Overview
This project implements an agentic Retrieval-Augmented Generation (RAG) system focused on the domain of industrial chemical safety and process safety management. The goal of the system is to provide accurate, context-aware responses to user queries by intelligently deciding when to rely on general language model knowledge and when to retrieve authoritative information from a specialized document corpus.

The system is built using LangGraph, with an agent node responsible for reasoning and decision-making, and a tool node connected to a document retrieval function backed by a Chroma vector store. Domain-specific documents, including safety guidelines, risk assessment procedures, and emergency response materials, are embedded and stored to enable semantic search.

A conditional control flow allows the agent to determine whether document retrieval is necessary based on the nature of the user query. General or conversational questions are answered directly, while technical or safety-critical questions trigger retrieval before response generation. Conversation memory is integrated to support multi-turn interactions within a session, enabling coherent and context-aware dialogue. The system supports both single-query execution and an interactive chat interface for user interaction.

Note : Remember to do edit this later before sumission

In [3]:
# Imports
from langgraph.graph import START, END, StateGraph, MessagesState
from langgraph.checkpoint.memory import MemorySaver
from langgraph.prebuilt import ToolNode
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from IPython.display import Image, display, Markdown, HTML, Latex
from langchain_core.documents import Document
from langchain_core.tools import tool
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from dotenv import load_dotenv
from typing import Literal
import logging
import time
import os
import re
import warnings
warnings.filterwarnings("ignore")

In [4]:
# Load API key
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

if not openai_api_key:
    raise ValueError("OPENAI_API_KEY not found! Please set it in your .env file.")

print("API key loaded")

API key loaded


In [5]:
# Initialize LLM
llm = ChatOpenAI(
    model="gpt-4o-mini",
    temperature=0,
    api_key=openai_api_key
)

print(f"LLM initialized: {llm.model_name}")

LLM initialized: gpt-4o-mini


In [6]:
# Prevent messy PDF logs
logging.getLogger("pypdf").setLevel(logging.ERROR)

# file path
file_path = "./documents"

async def load_all_pdfs(pdf_dir: str):
    pages = []

    for filename in os.listdir(pdf_dir):
        if filename.lower().endswith(".pdf"):
            file_path = os.path.join(pdf_dir, filename)
            loader = PyMuPDFLoader(file_path)

            async for page in loader.alazy_load():
                page.metadata["source"] = filename
                pages.append(page)

    return pages

documents = await load_all_pdfs(file_path)
print(f"Loaded {len(documents)} pages from PDFs")

FileNotFoundError: [WinError 3] The system cannot find the path specified: './documents'

In [None]:
def smart_chunk_by_sections(documents, chunk_size=1000, chunk_overlap=100):
    """
    Smart chunking for better retrieval:
    - Skips blank pages
    - Chunks each document independently
    - Prevents bias toward any single document
    """

    print(f"Original documents: {len(documents)}")

    # Intelligent splitter
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        separators=[
            "\n\n",
            "\n",
            ". ",
            " ",
            ""
        ]
    )

    all_chunks = []

    for idx, doc in enumerate(documents):
        content = doc.page_content.strip()

        if not content:
            continue  # Skip blank pages

        # Preserve document identity without bias
        single_doc = Document(
            page_content=content,
            metadata={**doc.metadata, "doc_index": idx}
        )

        doc_splits = text_splitter.split_documents([single_doc])
        all_chunks.extend(doc_splits)

        # print(f"‚úì Document {idx + 1}: {len(doc_splits)} chunks")

    print(f"\nTotal chunks created: {len(all_chunks)}")
    return all_chunks


def view_chunks_with_context(chunks, num_chunks_to_show=10):
    """
    Display chunk previews (for sanity checking only)
    """
    print("\n" + "=" * 80)
    print("CHUNK PREVIEW")
    print("=" * 80)

    for i, chunk in enumerate(chunks[:num_chunks_to_show]):
        print(f"\n--- CHUNK {i + 1} ---")
        print(f"Length: {len(chunk.page_content)} characters")
        print("Preview (first 300 chars):")
        print(chunk.page_content[:300])
        print("...")
        if i < num_chunks_to_show - 1:
            print("-" * 40)


if __name__ == "__main__":
    # Chunk the documents (ALL documents treated equally)
    chunks_1000 = smart_chunk_by_sections(
        documents,
        chunk_size=1000,
        chunk_overlap=100
    )

    # # print(f"\n‚úì Total chunks: {len(chunks_1000)}")

    # Chunk statistics
    chunk_sizes = [len(c.page_content) for c in chunks_1000]
    print("\nChunk size statistics:")
    print(f"  Min: {min(chunk_sizes)} chars")
    print(f"  Max: {max(chunk_sizes)} chars")
    print(f"  Avg: {sum(chunk_sizes) / len(chunk_sizes):.0f} chars")

    # Optional preview (debug only)
    view_chunks_with_context(chunks_1000, num_chunks_to_show=5)

    print(f"\n{'=' * 80}")
    print("Chunks are ready for unbiased RAG retrieval across all documents.")
    print(f"{'=' * 80}")

In [None]:
# Initialize embeddings (using OpenAI)
embeddings = OpenAIEmbeddings(
    model="text-embedding-3-small",
    api_key=openai_api_key
)

print("Embeddings model initialized")

In [None]:
chroma_path = "./chroma_db"
collection_name = "becoming_a_quant"

# Check if Chroma DB already exists
db_exists = os.path.exists(chroma_path) and len(os.listdir(chroma_path)) > 0

if db_exists:
    print("Existing Chroma DB found. Loading...")
    
    vectorstore = Chroma(
        collection_name=collection_name,
        persist_directory=chroma_path,
        embedding_function=embeddings
    )

else:
    print("No existing Chroma DB found. Creating new one...")
    
    vectorstore = Chroma(
        collection_name=collection_name,
        persist_directory=chroma_path,
        embedding_function=embeddings
    ) 
    vectorstore.add_documents(documents=chunks_1000)


    print(f"Vector store created with {len(chunks_1000)} chunks")
    print(f"   Persisted to: {chroma_path}")

In [None]:
test_query = "What is Algorithmic Trading?"
test_results = vectorstore.similarity_search(test_query, k=2)

print(f"Query: {test_query}")
print("\nRetrieved Chunks:\n")

for i, doc in enumerate(test_results, 1):
    print(f"Result {i}")
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print(f"Content:\n{doc.page_content[:600]}...\n")

In [None]:
test_query = "Who is Peter Carr?"
test_results = vectorstore.similarity_search(test_query, k=2)

print(f"Query: {test_query}")
print("\nRetrieved Chunks:\n")

for i, doc in enumerate(test_results, 1):
    print(f"Result {i}")
    print(f"Source: {doc.metadata.get('source', 'Unknown')}")
    print(f"Content:\n{doc.page_content[:600]}...\n")

In [None]:
@tool
def retrieve_documents(query: str) -> str:
    """
    Search for relevant documents in the knowledge base.
    
    Use this tool when you need information from the document collection
    to answer the user's question. Do NOT use this for:
    - General knowledge questions
    - Greetings or small talk
    - Simple calculations
    
    Args:
        query: The search query describing what information is needed
        
    Returns:
        Relevant document excerpts that can help answer the question
    """
    # Use MMR (Maximum Marginal Relevance) for diverse results
    retriever = vectorstore.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 5, "fetch_k": 10}
    )
    
    # Retrieve documents
    results = retriever.invoke(query)
    
    if not results:
        return "No relevant documents found."
    
    # Format results
    formatted = "\n\n---\n\n".join(
        f"Document {i+1}:\n"
        f"Source: {doc.metadata.get('source', 'Unknown')}\n"
        f"Page: {doc.metadata.get('page', 'N/A')}\n"
        f"Content:\n{doc.page_content}"
        for i, doc in enumerate(results)
    )
    
    return formatted

print("Retrieval tool created")

In [None]:
test_query = "What is Algorithmic Trading?"

# Test tool directly
test_result = retrieve_documents.invoke({"query": "Explain the Advantages and Disadvantages of Algorithmic Trading."})
print(f"Tool result (first 500 chars):\n{test_result[:500]}...")

In [None]:
system_prompt = SystemMessage(content="""
You are an assistant specialized in quantitative finance, derivatives pricing, and risk management.
Your knowledge source is a collection of quantitative finance documents and financial modeling resources.

DOMAIN RESTRICTION:
Only answer questions related to:
- Quantitative finance and derivatives pricing
- Risk management and portfolio theory
- Financial modeling and numerical methods (Monte Carlo, finite differences, binomial trees)
- Options pricing, Greeks, and hedging strategies
- Fixed income instruments and interest rate models
- Credit derivatives and structured products
- Volatility modeling and stochastic processes
- Mathematical finance fundamentals (It√¥'s lemma, Brownian motion, etc.)

If a question is outside this domain AND no relevant information is found in the retrieved documents,
politely respond that it is outside your scope.

If relevant information IS found in the retrieved documents,
answer the question strictly using the documents, even if the topic is outside quantitative finance.

RETRIEVAL DECISION RULES:

DO NOT retrieve for:
- Greetings or small talk ("Hello", "Thank you")
- Questions about your capabilities
- Very general conceptual questions that do NOT depend on the documents
(e.g., "What is finance?")

DO retrieve for:
- Questions requiring procedural steps, formulas, or detailed explanations
- Questions that depend on document-specific information (models, case studies, examples)
- Technical questions about pricing, hedging, or risk management
- Any request where citing documents improves accuracy
- When in doubt about whether a concept is document-backed, prefer retrieval.


When documents are retrieved:
- Use them as the primary source of truth
- Cite the document source, chapter, or page when possible
- Include relevant formulas, equations, or technical details from the documents
- If the documents do not contain the answer, say so explicitly

Never fabricate pricing models, formulas, or quantitative methodologies.
Always distinguish between classical theory and real-world practice.
                              """)

In [None]:
# Bind tool to LLM
tools = [retrieve_documents]
llm_with_tools = llm.bind_tools(tools)

def assistant(state: MessagesState) -> dict:
    """
    Assistant node - decides whether to retrieve or answer directly.
    """
    # messages = [system_prompt] + state["messages"]
    messages = state["messages"]

    # Inject system prompt only once
    if not messages or messages[0].type != "system":
        messages = [system_prompt] + messages
        
    response = llm_with_tools.invoke(messages)
    return {"messages": [response]}

def should_continue(state: MessagesState) -> Literal["tools", "__end__"]:
    """
    Decide whether to call tools or finish.
    """
    last_message = state["messages"][-1]

    if last_message.tool_calls:
        return "tools"
    return "__end__"

In [None]:
# Build graph
builder = StateGraph(MessagesState)

# Add nodes
builder.add_node("assistant", assistant)
builder.add_node("tools", ToolNode(tools))

# Define edges
builder.add_edge(START, "assistant")
builder.add_conditional_edges(
    "assistant",
    should_continue,
    {"tools": "tools", "__end__": END}
)
builder.add_edge("tools", "assistant")

# Add memory
memory = MemorySaver()
agent = builder.compile(checkpointer=memory)

In [None]:
# Visualize the agentic RAG graph
try:
    display(Image(agent.get_graph().draw_mermaid_png()))
except Exception as e:
    print(f"Could not display graph: {e}")
    print("Graph: START ‚Üí assistant ‚Üí [if tool_call] ‚Üí tools ‚Üí assistant ‚Üí END")

In [None]:
def convert_latex_for_display(text):
    """
    Converts LaTeX in text to proper Markdown format for Jupyter rendering
    Handles both inline ($...$) and display ($$...$$) math
    """
    if not text:
        return text

    text = re.sub(r'\\\(', '$', text)
    text = re.sub(r'\\\)', '$', text)
    text = re.sub(r'\\\[', '$$\n', text)
    text = re.sub(r'\\\]', '\n$$', text)

    return text


def query_agent(user_input: str, thread_id: str = "default_session"):
    """
    Improved query function with clear, well-formatted output.
    """
    print(f"\n{'='*70}")
    print(f"üë§ User: {user_input}")
    print(f"{'='*70}\n")

    result = agent.invoke(
        {"messages": [HumanMessage(content=user_input)]},
        config={"configurable": {"thread_id": thread_id}}
    )

    used_retrieval = False
    final_answer = None

    for message in result["messages"]:
        if isinstance(message, AIMessage):
            if message.tool_calls:
                used_retrieval = True
                print("üîç Agent: [Calling retrieval tool...]")
            if message.content and not message.tool_calls:
                final_answer = message.content

    if final_answer:
        # Convert LaTeX
        final_answer = convert_latex_for_display(final_answer)

        # Render as Markdown (fixes formulas)
        display(Markdown(f"### ü§ñ Agent Response\n\n{final_answer}"))
    else:
        print("‚ö†Ô∏è No response generated after retrieval!")

    print(f"\nüìä Decision: {'USED RETRIEVAL' if used_retrieval else 'ANSWERED DIRECTLY'}")

In [None]:
query_agent("Explain the Discrete Time Models in Quantitative Finance.")

ü§ñ Agent Response
Discrete time models in quantitative finance are frameworks where asset prices and time progress in finite increments. Here are the key features and concepts associated with these models:

Definition: Discrete time models allow for changes in asset prices and time to occur only at specific intervals, such as daily, weekly, or monthly. This contrasts with continuous models, where changes can happen at any moment.

Mathematical Framework: In discrete models, the evolution of asset prices is typically described using difference equations. For example, if 
 represents the asset price at time 
, a simple difference equation might be: $$

S_{t+1} = S_t \cdot u \quad \text{(up move)}


S_{t+1} = S_t \cdot d \quad \text{(down move)}

$$ where 
 and 
 are the factors by which the price increases or decreases.

Binomial Model: A well-known example of a discrete time model is the binomial model for option pricing. In this model, the price of an asset can move to one of two possible values (up or down) at each time step, creating a tree structure that allows for the calculation of option prices based on the possible future paths of the underlying asset.

Applications: Discrete time models are widely used in various financial applications, including option pricing, risk management, and portfolio optimization. They are particularly useful for numerical methods such as Monte Carlo simulations and finite difference methods, which often require discretization of continuous processes.

Comparison with Continuous Models: While continuous models, such as those based on stochastic calculus (e.g., Black-Scholes), can be mathematically elegant, discrete models are often more practical for computational purposes. In practice, continuous models are typically approximated using discrete methods for numerical analysis.

In summary, discrete time models are essential tools in quantitative finance, providing a structured approach to modeling asset price movements and facilitating the pricing of derivatives and risk management strategies.

üìä Decision: USED RETRIEVAL

In [None]:
def interactive_chat_with_mathjax(thread_id: str):
    """
    Interactive chat with explicit MathJax rendering
    Better for complex mathematical expressions
    """
    print("ü§ñ Interactive Chat (Notebook Mode)")
    print("Type 'exit' to quit\n")
    
    while True:
        user_input = input("üë§ You: ").strip()
        
        if user_input.lower() in ["exit", "quit"]:
            display(Markdown("üëã **Goodbye!**"))
            break
        
        if not user_input:
            continue
        
        display(Markdown(f"**üë§ You:** {user_input}"))
        
        thinking_display = display(
            Markdown("ü§ñ *Agent is thinking‚Ä¶*"),
            display_id=True
        )
        
        result = agent.invoke(
            {"messages": [HumanMessage(content=user_input)]},
            config={"configurable": {"thread_id": thread_id}}
        )
        
        used_retrieval = False
        final_answer = None
        
        for msg in result["messages"]:
            if isinstance(msg, AIMessage):
                if msg.tool_calls:
                    used_retrieval = True
                if msg.content and not msg.tool_calls:
                    final_answer = msg.content
        
        decision = "USED RETRIEVAL" if used_retrieval else "ANSWERED DIRECTLY"
        
        response_html = f"""
        <div style="font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; line-height: 1.6;">
            <h3 style="color: #e0e0e0;">ü§ñ Agent Response</h3>
            <div style="
                background-color: #121212;
                color: #ffffff;
                padding: 15px;
                border-left: 4px solid #7c4dff;
                margin: 10px 0;
                border-radius: 6px;
            ">
                {final_answer}
            </div>
            <p style="margin-top: 20px; color: #b0b0b0; font-size: 14px;">
                <strong>üìä Decision:</strong>
                <span style="background-color: #1e1e1e; padding: 3px 8px; border-radius: 4px;">
                    {decision}
                </span>
            </p>
        </div>

        <script src="https://polyfill.io/v3/polyfill.min.js?features=es6"></script>
        <script id="MathJax-script" async
            src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"></script>
        <script>
            MathJax.typesetPromise().catch(err => console.log(err));
        </script>
        """
        
        thinking_display.update(HTML(response_html))

def format_points_for_html(text):
    """
    Ensures numbered points appear on separate lines inside HTML blocks
    """
    if not text:
        return text

    # Force line breaks before numbered points
    text = re.sub(r'\n(\d+\.)', r'<br><br>\1', text)

    # Convert bullet dashes into line breaks
    text = re.sub(r'\n-\s+', r'<br>‚Äì ', text)

    return text

In [None]:
interactive_chat_with_mathjax("session")

In [None]:
interactive_chat_with_mathjax("session-1")