In [1]:
%%capture --no-stderr
%pip install langchain_community langchainhub chromadb langchain langgraph tavily-python langchain-text-splitters langchain_openai

In [2]:
from tavily import TavilyClient
tavily = TavilyClient(api_key='')

response = tavily.search(query="Where does Messi play right now?", max_results=3)
context = [{"url": obj["url"], "content": obj["content"]} for obj in response['results']]

# You can easily get search result context based on any max tokens straight into your RAG.
# The response is a string of the context within the max_token limit.

response_context = tavily.get_search_context(query="Where does Messi play right now?", search_depth="advanced", max_tokens=500)

# You can also get a simple answer to a question including relevant sources all with a simple function call:
# You can use it for baseline
response_qna = tavily.qna_search(query="Where does Messi play right now?")


In [3]:
from langchain_openai import ChatOpenAI
import os
os.environ['OPENAI_API_KEY'] = ''
llm = ChatOpenAI(model="gpt-4o-mini", temperature = 0)

In [4]:
import os

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = "<your-api-key>"

In [21]:
# üì¶ ÌïÑÏàò Ìå®ÌÇ§ÏßÄ ÏûÑÌè¨Ìä∏
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser, StrOutputParser
from langchain_core.documents import Document
from langgraph.graph import StateGraph, END
from typing_extensions import TypedDict
from typing import List
from pprint import pprint

# üåê LLM ÏÑ§Ï†ï
llm = ChatOpenAI(model="gpt-4o", temperature=0)

# üìò Î¨∏ÏÑú Î°úÎìú Î∞è Î≤†ÌÑ∞Ïä§ÌÜ†Ïñ¥ Íµ¨ÏÑ±

urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

docs = [WebBaseLoader(url).load() for url in urls]
docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=250, chunk_overlap=0
)
doc_splits = text_splitter.split_documents(docs_list)

# Add to vectorDB
vectorstore = Chroma.from_documents(
    documents=doc_splits,
    collection_name="rag-chroma",
    embedding = OpenAIEmbeddings(model="text-embedding-3-small")
)
retriever = vectorstore.as_retriever()

# üîç Ï≤¥Ïù∏ Íµ¨ÏÑ±
json_rule = (
    "You must respond with a valid JSON object only. "
    "Use double quotes for keys and values. "
    "Respond with one of: {{\"score\": \"yes\"}} or {{\"score\": \"no\"}}. "
    "No explanation. No markdown. No extra text."
)

relevance_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a grader. Given a question and a document, decide if the document helps answer the question. "
     "Respond ONLY with a JSON like {{\"score\": \"yes\"}} or {{\"score\": \"no\"}}. "
     "Be generous. If the document mentions key ideas or terms, it's relevant."),
    ("human", "question: {question}\n\ndocument: {document}")
])
relevance_grader = relevance_prompt | llm | JsonOutputParser()

hallucination_prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a hallucination detector. Determine whether the answer is meaningfully supported by the documents. "
     "You can consider paraphrased or reworded sentences to be valid. "
     "As long as the core definition, fact, or concept appears in the documents, consider it supported.\n\n"
     "Output only one of: {{\"score\": \"yes\"}} or {{\"score\": \"no\"}}, formatted as JSON."),
    ("human", "documents: {documents}\n\nanswer: {generation}")
])
hallucination_grader = hallucination_prompt | llm | JsonOutputParser()

answer_prompt = ChatPromptTemplate.from_messages([
    ("system", f"You are a usefulness evaluator. {json_rule}"),
    ("human", "question: {question}\n\nanswer: {generation}")
])
answer_grader = answer_prompt | llm | JsonOutputParser()

generate_prompt = ChatPromptTemplate.from_messages([
    ("system", "Answer the question using the provided context. Use max 3 sentences. Say 'I don't know' if unsure."),
    ("human", "question: {question}\n\ncontext: {context}")
])
generate_chain = generate_prompt | llm | StrOutputParser()

# üì¶ ÏÉÅÌÉú ÌÉÄÏûÖ Ï†ïÏùò
class RAGState(TypedDict):
    question: str
    generation: str
    documents: List[Document]
    retry_count: int

# üßê ÎÖ∏Îìú Ï†ïÏùò
def docs_retrieval(state):
    docs = retriever.invoke(state["question"])
    return {"documents": docs, "question": state["question"]}

def relevance_checker(state):
    relevant_docs = []
    for doc in state["documents"]:
        result = relevance_grader.invoke({"question": state["question"], "document": doc.page_content})
        print(f"Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: {result['score']}")
        if result["score"] == "yes":
            relevant_docs.append(doc)
    if relevant_docs:
        return {"documents": relevant_docs, "question": state["question"]}
    else:
        return {
            "documents": [],
            "question": state["question"],
            "retry_count": 0,
            "__next__": "search_tavily"
        }

def search_tavily(state):
    print("---SEARCH TAVILY---")
    query = state["question"]
    results = tavily.search(query=query, max_results=3)

    # URL Ìè¨Ìï®Ìï¥ÏÑú Î¨∏ÏÑú ÏÉùÏÑ±
    docs = []
    for item in results["results"]:
        content = item.get("content", "")
        url = item.get("url", "")
        docs.append(Document(page_content=content, metadata={"source": url}))

    return {"question": query, "documents": docs}

def generate_answer(state):
    context = "\n\n".join([doc.page_content for doc in state["documents"]])
    generation = generate_chain.invoke({"context": context, "question": state["question"]})
    return {**state, "generation": generation}

def regenerate_answer(state):
    context = "\n\n".join([doc.page_content for doc in state["documents"]])
    generation = generate_chain.invoke({"context": context, "question": state["question"]})
    return {**state, "generation": generation, "retry_count": 1}

def hallucination_checker(state):
    print("---HALLUCINATION CHECKER Ïã§ÌñâÎê®---")
    context = "\n\n".join([doc.page_content for doc in state["documents"]])
    hallucination = hallucination_grader.invoke({"documents": context, "generation": state["generation"]})["score"]
    print(f"üß™ hallucination ÌåêÎã® Í≤∞Í≥º: {hallucination}")

    if hallucination == "no":
        if state.get("retry_count", 0) < 1:
            print("üîÅ hallucination Ïã§Ìå® ‚Üí Ïû¨ÏÉùÏÑ± ÏãúÎèÑ")
            return "retry"
        else:
            print("‚ùå hallucination 2Ìöå Ïã§Ìå® ‚Üí Ï¢ÖÎ£å")
            return "fail"

    useful = answer_grader.invoke({"question": state["question"], "generation": state["generation"]})["score"]
    print(f"üéØ Ïú†Ïö©ÏÑ± ÌåêÎã® Í≤∞Í≥º: {useful}")
    return "success" if useful == "yes" else "fail"

# LangGraph Ï°∞Î¶Ω
graph = StateGraph(RAGState)
graph.set_entry_point("docs_retrieval")
graph.add_node("docs_retrieval", docs_retrieval)
graph.add_node("relevance_checker", relevance_checker)
graph.add_node("search_tavily", search_tavily)
graph.add_node("generate_answer", generate_answer)
graph.add_node("regenerate_answer", regenerate_answer)

graph.add_edge("docs_retrieval", "relevance_checker")
graph.add_conditional_edges("relevance_checker", lambda s: s.get("__next__", "generate_answer"), {
    "generate_answer": "generate_answer",
    "search_tavily": "search_tavily"
})
graph.add_edge("search_tavily", "generate_answer")

graph.add_conditional_edges("generate_answer", hallucination_checker, {
    "success": END,
    "fail": END,
    "retry": "regenerate_answer"
})
graph.add_conditional_edges("regenerate_answer", hallucination_checker, {
    "success": END,
    "fail": END,
    "retry": END
})

rag_app = graph.compile()

# Ïã§Ìñâ Ìï®Ïàò
def run_rag(question: str):
    state = {
        "question": question,
        "generation": "",
        "documents": [],
        "retry_count": 0
    }

    final_generation = None
    final_documents = []
    steps_taken = []
    hallucination_failed = False

    print(f"\nüìå ÏßàÎ¨∏: {question}")
    print("=" * 50)

    for step in rag_app.stream(state):
        for node_name, result in step.items():
            steps_taken.append(node_name)
            print(f"üîÑ Step Ïã§ÌñâÎê®: {node_name}")

            if "generation" in result:
                final_generation = result["generation"]

            if "documents" in result:
                final_documents = result["documents"]

            if node_name in ["regenerate_answer"] and result.get("retry_count", 0) == 1:
                hallucination_failed = True

    print("=" * 50)
    print(f"\nüìç Ï†ÑÏ≤¥ Ïã§Ìñâ Í≤ΩÎ°ú: {' ‚Üí '.join(steps_taken)}")
    print(f"\nüìÑ Í¥ÄÎ†® Î¨∏ÏÑú Í∞úÏàò: {len(final_documents)}")

    if hallucination_failed:
        print("\n‚ùå ÏµúÏ¢Ö ÌåêÎã®: hallucinationÏúºÎ°ú Ïù∏Ìï¥ ÎãµÎ≥Ä ÏÉùÏÑ± Ïã§Ìå®")
    else:
        print("\nüß† ÏÉùÏÑ±Îêú ÎãµÎ≥Ä:")
        print(final_generation)

        # üìå Ï∂úÏ≤ò Ï∂úÎ†•
        print("\nüìé Ï∂úÏ≤ò:")
        seen = set()
        for doc in final_documents:
            title = doc.metadata.get("title", "Untitled")
            source = doc.metadata.get("source")
            key = (title, source)
            if key in seen:
                continue
            seen.add(key)
            if source:
                print(f"- [{title}]({source})")
            else:
                print(f"- {title}")

    print("=" * 50)


In [24]:
run_rag("what is prompt?")


üìå ÏßàÎ¨∏: what is prompt?
üîÑ Step Ïã§ÌñâÎê®: docs_retrieval




Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: yes




Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: yes
Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: yes




Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: yes
üîÑ Step Ïã§ÌñâÎê®: relevance_checker




---HALLUCINATION CHECKER Ïã§ÌñâÎê®---




üß™ hallucination ÌåêÎã® Í≤∞Í≥º: yes




üéØ Ïú†Ïö©ÏÑ± ÌåêÎã® Í≤∞Í≥º: yes
üîÑ Step Ïã§ÌñâÎê®: generate_answer

üìç Ï†ÑÏ≤¥ Ïã§Ìñâ Í≤ΩÎ°ú: docs_retrieval ‚Üí relevance_checker ‚Üí generate_answer

üìÑ Í¥ÄÎ†® Î¨∏ÏÑú Í∞úÏàò: 4

üß† ÏÉùÏÑ±Îêú ÎãµÎ≥Ä:
A prompt in the context of language models refers to the input or instructions given to the model to guide its behavior and generate desired outcomes. Prompt Engineering involves crafting these prompts to effectively communicate with the model without altering its internal parameters. It is an empirical process that requires experimentation and heuristics to achieve alignment and steerability of the model's responses.

üìé Ï∂úÏ≤ò:
- [Prompt Engineering | Lil'Log](https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/)


In [22]:
run_rag("Where does Messi play right now?")


üìå ÏßàÎ¨∏: Where does Messi play right now?
üîÑ Step Ïã§ÌñâÎê®: docs_retrieval
Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: no
Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: no




Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: no
Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: no
üîÑ Step Ïã§ÌñâÎê®: relevance_checker
---SEARCH TAVILY---




üîÑ Step Ïã§ÌñâÎê®: search_tavily




---HALLUCINATION CHECKER Ïã§ÌñâÎê®---
üß™ hallucination ÌåêÎã® Í≤∞Í≥º: yes
üéØ Ïú†Ïö©ÏÑ± ÌåêÎã® Í≤∞Í≥º: yes
üîÑ Step Ïã§ÌñâÎê®: generate_answer

üìç Ï†ÑÏ≤¥ Ïã§Ìñâ Í≤ΩÎ°ú: docs_retrieval ‚Üí relevance_checker ‚Üí search_tavily ‚Üí generate_answer

üìÑ Í¥ÄÎ†® Î¨∏ÏÑú Í∞úÏàò: 3

üß† ÏÉùÏÑ±Îêú ÎãµÎ≥Ä:
Lionel Messi currently plays for Inter Miami in Major League Soccer (MLS).

üìé Ï∂úÏ≤ò:
- [Untitled](https://www.usatoday.com/story/sports/soccer/2024/12/19/lionel-messi-2025-schedule-inter-miami-argentina-mls/77089729007/)
- [Untitled](https://www.sportingnews.com/us/soccer/news/lionel-messi-playing-today-status-lineup-inter-miami-2025/b87bb697bffbfbd6b7de8a7a)
- [Untitled](https://www.usatoday.com/story/sports/soccer/2025/03/28/inter-miami-vs-philadelphia-union-time-tv-will-messi-play/82704252007/)


In [25]:
run_rag("When did Prompt Engineering become an Olympic sport?")


üìå ÏßàÎ¨∏: When did Prompt Engineering become an Olympic sport?
üîÑ Step Ïã§ÌñâÎê®: docs_retrieval




Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: no
Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: no




Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: no
Î¨∏ÏÑú relevance ÌåêÎã® Í≤∞Í≥º: no
üîÑ Step Ïã§ÌñâÎê®: relevance_checker
---SEARCH TAVILY---




üîÑ Step Ïã§ÌñâÎê®: search_tavily
---HALLUCINATION CHECKER Ïã§ÌñâÎê®---




üß™ hallucination ÌåêÎã® Í≤∞Í≥º: no
üîÅ hallucination Ïã§Ìå® ‚Üí Ïû¨ÏÉùÏÑ± ÏãúÎèÑ
üîÑ Step Ïã§ÌñâÎê®: generate_answer
---HALLUCINATION CHECKER Ïã§ÌñâÎê®---
üß™ hallucination ÌåêÎã® Í≤∞Í≥º: no
‚ùå hallucination 2Ìöå Ïã§Ìå® ‚Üí Ï¢ÖÎ£å
üîÑ Step Ïã§ÌñâÎê®: regenerate_answer

üìç Ï†ÑÏ≤¥ Ïã§Ìñâ Í≤ΩÎ°ú: docs_retrieval ‚Üí relevance_checker ‚Üí search_tavily ‚Üí generate_answer ‚Üí regenerate_answer

üìÑ Í¥ÄÎ†® Î¨∏ÏÑú Í∞úÏàò: 3

‚ùå ÏµúÏ¢Ö ÌåêÎã®: hallucinationÏúºÎ°ú Ïù∏Ìï¥ ÎãµÎ≥Ä ÏÉùÏÑ± Ïã§Ìå®
