In [1]:
!pip install openai faiss-cpu pandas numpy tiktoken PyMuPDF python-dotenv requests beautifulsoup4




[notice] A new release of pip is available: 25.3 -> 26.0.1
[notice] To update, run: C:\Users\TempAccess\AppData\Local\Programs\Python\Python312\python.exe -m pip install --upgrade pip


In [2]:
import os
import sys
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
os.chdir(r"C:\Users\TempAccess\Documents\Dhruv\RAG")
print(os.getcwd())

C:\Users\TempAccess\Documents\Dhruv\RAG


In [4]:
from helper_function_openai import (
    SimpleRAG,
    RAGRetriever,
    OpenAIEmbedder,
    OpenAIChat,
    FAISSVectorStore,
    chunk_text,
    chunk_documents,
    show_context,
    Document,
    RetrievalResult
)

In [5]:
import requests
from typing import List, Dict, Any, Optional, Tuple
import json

In [6]:
from bs4 import BeautifulSoup

In [7]:
def scrape_web_page(url: str) -> Document:
    """
    Fetch and extract text content from a web page.
    
    Replaces LangChain's WebBaseLoader with requests + BeautifulSoup.
    
    Args:
        url: Web page URL to scrape
    Returns:
        Document with page text and metadata
    """
    headers = {
        "User-Agent": "Mozilla/5.0 (compatible; RAGBot/1.0)"
    }
    response = requests.get(url, headers=headers, timeout=15)
    response.raise_for_status()
    
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Extract title
    title = soup.title.string.strip() if soup.title else "Untitled"
    
    # Remove script/style elements
    for tag in soup(["script", "style", "nav", "footer", "header"]):
        tag.decompose()
    
    # Get clean text from the main content area
    # Try common article containers first
    content_area = (
        soup.find("article") or 
        soup.find("main") or 
        soup.find("div", class_="post-content") or
        soup.find("body")
    )
    
    text = content_area.get_text(separator="\n", strip=True) if content_area else ""
    
    # Clean up excessive whitespace
    lines = [line.strip() for line in text.splitlines() if line.strip()]
    clean_text = "\n".join(lines)
    
    return Document(
        content=clean_text,
        metadata={
            "source": url,
            "title": title
        }
    )


def load_web_documents(urls: List[str]) -> List[Document]:
    """
    Load and chunk multiple web pages into Documents.
    
    Args:
        urls: List of URLs to scrape
    Returns:
        List of chunked Documents with source metadata
    """
    all_docs = []
    
    for url in urls:
        try:
            print(f"Loading: {url[:80]}...")
            doc = scrape_web_page(url)
            
            # Chunk the page content
            chunks = chunk_text(doc.content, chunk_size=2000, chunk_overlap=200)
            
            for i, chunk in enumerate(chunks):
                all_docs.append(Document(
                    content=chunk,
                    metadata={
                        **doc.metadata,
                        "chunk_index": i
                    }
                ))
            
            print(f"  → {len(chunks)} chunks")
        except Exception as e:
            print(f"  ✗ Failed: {e}")
    
    return all_docs

In [8]:
urls = [
    "https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-2-reflection/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-3-tool-use/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?ref=dl-staging-website.ghost.io",
    "https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent-collaboration/?ref=dl-staging-website.ghost.io"
]


In [9]:
documents = load_web_documents(urls)
print(f"\nTotal documents: {len(documents)}")

Loading: https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?re...
  → 2 chunks
Loading: https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-2-reflection/...
  → 2 chunks
Loading: https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-3-tool-use/?r...
  → 3 chunks
Loading: https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?r...
  → 3 chunks
Loading: https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent...
  → 3 chunks

Total documents: 13


In [10]:
embedder = OpenAIEmbedder(model="text-embedding-3-small")
documents = embedder.embed_documents(documents)

In [11]:
documents

[Document(content='Share\nDear friends,\nI think AI agent workflows will drive massive AI progress this year — perhaps even more than the next generation of foundation models. This is an important trend, and I urge everyone who works in AI to pay attention to it.\nToday, we mostly use LLMs in zero-shot mode, prompting a model to generate final output token by token without revising its work. This is akin to asking someone to compose an essay from start to finish, typing straight through with no backspacing allowed, and expecting a high-quality result. Despite the difficulty, LLMs do amazingly well at this task!\nWith an agent workflow, however, we can ask the LLM to iterate over a document many times. For example, it might take a sequence of steps such as:\nPlan an outline.\nDecide what, if any, web searches are needed to gather more information.\nWrite a first draft.\nRead over the first draft to spot unjustified arguments or extraneous information.\nRevise the draft taking into accou

In [12]:
vector_store = FAISSVectorStore(dimension=embedder.dimension)
vector_store.add_documents(documents)

print(f"Indexed {vector_store.index.ntotal} document chunks")

Indexed 13 document chunks


In [13]:
### lets retrive the documents

def retrieve_documents(query: str, k: int = 5) -> List[RetrievalResult]:
    """
    Retrieve top-k relevant documents for a question.
    """
    query_embed = embedder.embed_text(query)
    docs = vector_store.search(query_embed, k=k)
    return docs

In [14]:
query = "What are the different kinds of agentic design patterns???"
results = retrieve_documents(query=query, k=30)

print(f"Retrieved {len(results)} documents for query: {query}")

Retrieved 30 documents for query: What are the different kinds of agentic design patterns???


In [15]:
for re in results:
    print("\n")
    title = re.document.metadata.get("title", "N/A")
    print(f"Title: {title} | Score: {re.score:.4f} | Rank: {re.rank}")
    print(f"meta-data: {re.document.metadata}")
    print(f"Initial Text: \n{re.document.content}")



Title: Agentic Design Patterns Part 5, Multi-Agent Collaboration | Score: 0.6046 | Rank: 0
meta-data: {'source': 'https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-5-multi-agent-collaboration/?ref=dl-staging-website.ghost.io', 'title': 'Agentic Design Patterns Part 5, Multi-Agent Collaboration', 'chunk_index': 2}
Initial Text: 
ut their
GitHub repo
and perhaps clone the repo and run the system yourself. While it may not always produce what you want, you might be amazed at how well it does.
Like the design pattern of
Planning, I find the output quality of multi-agent collaboration hard to predict, especially when allowing agents to interact freely and providing them with multiple tools. The more mature patterns of
Reflection
and
Tool Use
are more reliable. I hope you enjoy playing with these agentic design patterns and that they produce amazing results for you!
If you're interested in learning more, I recommend:
“
Communicative Agents for Software Development
,” Qian 

# Grade Document relevancy

In [16]:
chat = OpenAIChat(
    model_name="gpt-4o-mini",
    temperature=0.0
)

def grade_document(question: str, document_content: str)-> str:
    """
    Grade whether a retrieved document is relevant to the question.
    
    Uses: OpenAI JSON mode for structured output
    
    Returns:
        'yes' or 'no'
    """

    messages = [
        {
            "role":"system",
            "content":(
                "You are a grader assessing relevance of a retrieved document to a user question.\n"
                "If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant.\n"
                "It does not need to be a stringent test. The goal is to filter out erroneous retrievals.\n"
                "Respond with JSON: {\"binary_score\": \"yes\"} or {\"binary_score\": \"no\"}"
            )
        },
        {
            "role":"user",
            "content":(
                "Question: {question}\n"
                "Document Content: {document_content}"
            )
        }
    ]

    result = chat.chat_json(messages=messages)
    return str(result.get("binary_score")).strip()

In [17]:
results

[RetrievalResult(document=Document(content='ut their\nGitHub repo\nand perhaps clone the repo and run the system yourself. While it may not always produce what you want, you might be amazed at how well it does.\nLike the design pattern of\nPlanning, I find the output quality of multi-agent collaboration hard to predict, especially when allowing agents to interact freely and providing them with multiple tools. The more mature patterns of\nReflection\nand\nTool Use\nare more reliable. I hope you enjoy playing with these agentic design patterns and that they produce amazing results for you!\nIf you\'re interested in learning more, I recommend:\n“\nCommunicative Agents for Software Development\n,” Qian et al. (2023) (the ChatDev paper)\n“\nAutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation\n,” Wu et al. (2023)\n“\nMetaGPT: Meta Programming for a Multi-Agent Collaborative Framework\n,” Hong et al. (2023)\nKeep learning!\nAndrew\nRead "Agentic Design Patterns Part 1: Fo

In [18]:
query

'What are the different kinds of agentic design patterns???'

In [19]:
docs_to_use= []

for re in results:
    content = re.document.content
    # print(content)
    score = grade_document(query, content)
    # print(score)

    if score == "yes":
        docs_to_use.append(re.document)


print(f"\n {len(docs_to_use)} / {len(results)} documents passed relevancy filter.")    


 0 / 30 documents passed relevancy filter.


In [20]:
docs_to_use

[]

## Generate the answer from the filtered documents

In [21]:
def format_docs_for_contexts(docs: List[Document]) -> str:
    """
    Format documents into structured context strings.
    Includes doc ID, title, and source for attribution.
    """

    formatted = []
    for i, doc in enumerate(docs):
        title = doc.metadata.get("title", "Untitled")
        source = doc.metadata.get("source", "Unkown")
        formatted.append(
            f"""<doc{i+1}>
            <title>{title}</title>
            <source>{source}</source>
            <content>{doc.content}</content>
            </doc{i+1}>
            """
        )
    
    return formatted

In [22]:
def generate_answer(query:str, docs:List[Document]) -> str:
    """
    Generate a RAG answer using filtered documents.
    
    Replaces: LangChain rag_chain = prompt | llm | StrOutputParser()
    """
    context = format_docs_for_contexts(docs)

    system_prompt = (
        "You are an assistant for question-answering tasks. "
        "Answer the question based upon the provided documents. "
        "Use three-to-five sentences maximum and keep the answer concise."
        )

    return chat.chat_with_context(query=query, context=context, system_prompt=system_prompt)

In [23]:
generation = generate_answer(query, documents)

print(f"Q: {query}\n")
print(f"A: {generation}")

Q: What are the different kinds of agentic design patterns???

A: The different kinds of agentic design patterns include Reflection, Tool Use, Planning, and Multi-agent Collaboration. Reflection involves the LLM examining its own output to identify areas for improvement. Tool Use allows the LLM to utilize external functions for tasks like web searches or code execution. Planning enables the LLM to autonomously determine the sequence of steps needed to accomplish a larger task. Multi-agent Collaboration involves multiple agents working together, each focusing on specific subtasks to enhance overall performance.


### Hallucination check

In [24]:
def check_hallucination(query: str, docs: List[Document], generation:str) -> Dict:
    """
    Check if the generated answer is grounded in the source documents.
    
    Replaces: ChatGroq + with_structured_output(GradeHallucinations)
    
    Returns:
        Dict with 'binary_score' ('yes'/'no') and optional 'explanation'
    """
    context = format_docs_for_contexts(docs)
    facts = "\n\n".join(context)
    
    messages = [
        {
            "role": "system",
            "content": (
                "You are a grader assessing whether an LLM generation is grounded in / supported by "
                "a set of retrieved facts.\n"
                "Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / "
                "supported by the set of facts.\n"
                "Respond with JSON: {\"binary_score\": \"yes\", \"explanation\": \"brief reason\"}"
            )
        },
        {
            "role": "user",
            "content": f"Set of facts:\n\n{facts}\n\nLLM generation:\n{generation}"
        }
    ]
    

    return chat.chat_json(messages)


In [25]:
hallucination_result = check_hallucination(generation, docs_to_use, generation)

grounded = hallucination_result.get("binary_score", "unknown")
explanation = hallucination_result.get("explanation", "")

print(f"Grounded in facts: {grounded}")
if explanation:
    print(f"Explanation: {explanation}")

Grounded in facts: yes
Explanation: The LLM generation accurately describes various agentic design patterns, which are supported by the provided set of facts.


# highlight Used Document Segments

In [26]:
def highlight_used_segments(
    question: str,
    generation: str,
    docs: List[Document]
) -> List[Dict]:
    """
    Identify exact text segments from source documents that were used
    to generate the answer.
    
    Replaces: LangChain PydanticOutputParser + ChatGroq(mixtral)
    Uses: OpenAI JSON mode with explicit schema instructions
    
    Returns:
        List of dicts with 'id', 'title', 'source', 'segment'
    """
    context = "\n\n".join(format_docs_for_contexts(docs))
    
    messages = [
        {
            "role": "system",
            "content": (
                "You are an advanced assistant for document search and retrieval. You are provided with:\n"
                "1. A question\n"
                "2. A generated answer based on the question\n"
                "3. A set of source documents\n\n"
                "Your task: identify and extract the EXACT inline segments from the provided documents "
                "that directly correspond to the content used to generate the answer.\n\n"
                "Rules:\n"
                "- Each segment must be an exact, verbatim snippet from the document text\n"
                "- Only include documents that were actually used\n"
                "- The relevance of each segment to the answer must be clear\n\n"
                "Respond with JSON in this exact format:\n"
                '{"highlights": [{"id": "doc1", "title": "...", "source": "...", "segment": "exact verbatim text..."}]}'
            )
        },
        {
            "role": "user",
            "content": (
                f"Documents:\n{context}\n\n"
                f"Question: {question}\n\n"
                f"Generated answer: {generation}"
            )
        }
    ]
    
    # Use a higher token limit for this task
    result = chat.chat_json(messages)
    return result.get("highlights", [])

In [27]:
documents

[Document(content='Share\nDear friends,\nI think AI agent workflows will drive massive AI progress this year — perhaps even more than the next generation of foundation models. This is an important trend, and I urge everyone who works in AI to pay attention to it.\nToday, we mostly use LLMs in zero-shot mode, prompting a model to generate final output token by token without revising its work. This is akin to asking someone to compose an essay from start to finish, typing straight through with no backspacing allowed, and expecting a high-quality result. Despite the difficulty, LLMs do amazingly well at this task!\nWith an agent workflow, however, we can ask the LLM to iterate over a document many times. For example, it might take a sequence of steps such as:\nPlan an outline.\nDecide what, if any, web searches are needed to gather more information.\nWrite a first draft.\nRead over the first draft to spot unjustified arguments or extraneous information.\nRevise the draft taking into accou

In [28]:
highlights = highlight_used_segments(query, generation, documents)
print(highlights)
print("=" * 60)
print("SOURCE HIGHLIGHTS")
print("=" * 60)

for h in highlights:
    print(f"\nID: {h.get('id', 'N/A')}")
    print(f"Title: {h.get('title', 'N/A')}")
    print(f"Source: {h.get('source', 'N/A')}")
    print(f"Segment: {h.get('segment', 'N/A')}")
    print("-" * 60)

[{'id': 'doc2', 'title': 'Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance', 'source': 'https://www.deeplearning.ai/the-batch/how-agents-can-improve-llm-performance/?ref=dl-staging-website.ghost.io', 'segment': 'Reflection: The LLM examines its own work to come up with ways to improve it.'}, {'id': 'doc5', 'title': 'Agentic Design Patterns Part 3: Tool Use', 'source': 'https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-3-tool-use/?ref=dl-staging-website.ghost.io', 'segment': 'Tool Use, in which an LLM is given functions it can request to call for gathering information, taking action, or manipulating data, is a key design pattern of AI agentic workflows.'}, {'id': 'doc8', 'title': 'Agentic Design Patterns Part 4: Planning', 'source': 'https://www.deeplearning.ai/the-batch/agentic-design-patterns-part-4-planning/?ref=dl-staging-website.ghost.io', 'segment': 'Planning is a key agentic AI design pattern in which we use a large language model (LLM) to auto

# Full RAG pipeline

In [35]:
def retrive_content_from_retrieve_documents(retrival_docs:List[RetrievalResult]) -> List[Document]:
    docs = []
    for doc in retrival_docs:
        
        docs.append(Document(doc.document.content, metadata=doc.document.metadata, embedding=doc.document.embedding))

    
    return docs

In [50]:
def rag_pipeline(question: str,
    k:int = 4,
    check_hallucination_var:bool =True,
    find_highlights_var:bool=True,
    verbose_var:bool=True
    )-> Dict:
    """
    Complete RAG pipeline with grading, hallucination check, and source highlighting.
    
    Args:
        question: User question
        k: Number of documents to retrieve
        check_hallucination_var: Whether to run hallucination check
        find_highlights_var: Whether to extract source segments
        verbose_var: Print intermediate results
    
    Returns:
        Dict with answer, grading results, hallucination check, and highlights
    """

    result = {"question": question}

    # retrieve
    retrieved = retrieve_documents(query=question, k=k)
    retrieved_docs = retrive_content_from_retrieve_documents(retrieved)

    if verbose_var:
        print(f"Retrieved {len(retrieved)} documents")
        for i, doc in enumerate(retrieved):
            print(f"Document {i+1}: {doc.document.metadata['title']}")
            print(f"Score: {doc.score}")
            print("-" * 30)

    graded_docs = []
    filtered_docs = []
    
    for r in retrieved:
        grade = grade_document(question, r.document.content)
        graded_docs.append({"score": r.score, "grade": grade})
        
        if grade == "yes":
            filtered_docs.append(r.document)
        
        if verbose_var:
            print(f"  Score: {r.score:.4f} | Grade: {grade} | {r.document.content[:80]}...")
    
    result["grading"] = graded_docs
    result["docs_used"] = len(filtered_docs)

    if verbose_var:
        print(f"\n✓ {len(filtered_docs)}/{len(retrieved)} docs passed filter")


    # generate answer
    answer = generate_answer(question, retrieved_docs)
    result["answer"] = answer

    if verbose_var:
        print("Answer:", answer)
        print("-" * 30)

    # check hallucination
    if check_hallucination_var:
        hallucination_check = check_hallucination(question, retrieved_docs, answer)
        result["grounded"] = hallucination_check.get("binary_score", "unknown")
        
        if verbose_var:
            print(f"\nGrounded: {result['grounded']}")
    

    # find highlights
    if find_highlights_var:
        highlights = highlight_used_segments(question, answer, retrieved_docs)
        result["highlights"] = highlights

        if verbose_var:
            print(f"\nSource segments found: {len(highlights)}")
            for h in highlights:
                print(f"  [{h.get('id')}] {h.get('segment', '')[:100]}...")

    return result

In [51]:
# Run the full pipeline
result = rag_pipeline("What are the different kinds of agentic design patterns?")

print("\n" + "=" * 60)
print("FINAL RESULT")
print("=" * 60)
print(f"Answer: {result['answer']}")
print(f"Docs used: {result['docs_used']}")
print(f"Grounded: {result.get('grounded', 'N/A')}")

Retrieved 4 documents
Document 1: Agentic Design Patterns Part 5, Multi-Agent Collaboration
Score: 0.6163897514343262
------------------------------
Document 2: Agentic Design Patterns Part 5, Multi-Agent Collaboration
Score: 0.5524249076843262
------------------------------
Document 3: Agentic Design Patterns Part 5, Multi-Agent Collaboration
Score: 0.5509985089302063
------------------------------
Document 4: Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance
Score: 0.5274108648300171
------------------------------
  Score: 0.6164 | Grade: no | ut their
GitHub repo
and perhaps clone the repo and run the system yourself. Whi...
  Score: 0.5524 | Grade: no | For example, the prompt above emphasized clear, efficient code as opposed to, sa...
  Score: 0.5510 | Grade: no | Share
Dear friends,
Multi-agent collaboration is the last of the four
key AI age...
  Score: 0.5274 | Grade: no | zero shot) does better at 67.0%. However, the improvement from GPT-3.5 to GPT-4 ...

✓ 0

In [52]:
result

{'question': 'What are the different kinds of agentic design patterns?',
 'grading': [{'score': 0.6163897514343262, 'grade': 'no'},
  {'score': 0.5524249076843262, 'grade': 'no'},
  {'score': 0.5509985089302063, 'grade': 'no'},
  {'score': 0.5274108648300171, 'grade': 'no'}],
 'docs_used': 0,
 'answer': 'The different kinds of agentic design patterns include Reflection, Tool Use, Planning, and Multi-Agent Collaboration. Reflection involves the LLM examining its own work to identify improvements. Tool Use provides the LLM with tools to gather information or take action. Planning allows the LLM to create and execute a multistep plan to achieve a goal. Multi-Agent Collaboration involves multiple AI agents working together, splitting tasks and discussing ideas to produce better solutions.',
 'grounded': 'yes',
 'highlights': [{'id': 'doc4',
   'title': 'Four AI Agent Strategies That Improve GPT-4 and GPT-3.5 Performance',
   'source': 'https://www.deeplearning.ai/the-batch/how-agents-can-i

In [53]:
result["answer"]

'The different kinds of agentic design patterns include Reflection, Tool Use, Planning, and Multi-Agent Collaboration. Reflection involves the LLM examining its own work to identify improvements. Tool Use provides the LLM with tools to gather information or take action. Planning allows the LLM to create and execute a multistep plan to achieve a goal. Multi-Agent Collaboration involves multiple AI agents working together, splitting tasks and discussing ideas to produce better solutions.'

In [54]:
# Test with more questions
test_questions = [
    "What is the reflection design pattern in AI agents?",
    "How does multi-agent collaboration work?",
    "What tools can AI agents use?",
    "How does planning work in agentic AI?"
]

for q in test_questions:
    print("\n" + "=" * 60)
    r = rag_pipeline(q, verbose_var=False)
    print(f"Q: {q}")
    print(f"A: {r['answer']}")
    print(f"Docs: {r['docs_used']} | Grounded: {r.get('grounded', 'N/A')}")


Q: What is the reflection design pattern in AI agents?
A: The reflection design pattern in AI agents involves the model examining its own outputs to identify areas for improvement. This process typically includes generating an initial response, then prompting the model to critique its own work and suggest enhancements. By automating self-criticism, the model can iteratively refine its outputs, leading to higher quality results in tasks such as coding, writing, and answering questions. This approach can significantly enhance performance with relatively simple implementation.
Docs: 0 | Grounded: yes

Q: How does multi-agent collaboration work?
A: Multi-agent collaboration involves breaking down complex tasks into subtasks that different AI agents can execute, similar to how human teams operate. Each agent is assigned a specific role, such as a software engineer or designer, and is prompted to focus on particular aspects of the task, which enhances efficiency and performance. This approa