## RAG using LangGraph

In [2]:
! pip install beautifulsoup4

Collecting beautifulsoup4
  Downloading beautifulsoup4-4.13.5-py3-none-any.whl.metadata (3.8 kB)
Collecting soupsieve>1.2 (from beautifulsoup4)
  Downloading soupsieve-2.8-py3-none-any.whl.metadata (4.6 kB)
Downloading beautifulsoup4-4.13.5-py3-none-any.whl (105 kB)
Downloading soupsieve-2.8-py3-none-any.whl (36 kB)
Installing collected packages: soupsieve, beautifulsoup4

   ---------------------------------------- 0/2 [soupsieve]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   -------------------- ------------------- 1/2 [beautifulsoup4]
   ---------------------------------------- 2/2 [beautifulsoup4]

Successfully installed beautifulsoup4-4.13.5 soupsieve-2.8


In [3]:
from langchain_community.document_loaders import WebBaseLoader

## Using OpenAI/Thinking Machine Lab Lilian Weng's blog posts
urls = [
    "https://lilianweng.github.io/posts/2025-05-01-thinking/",
    "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",
    "https://lilianweng.github.io/posts/2024-07-07-hallucination/",
    "https://lilianweng.github.io/posts/2024-04-12-diffusion-video/",
]

## Load documents from each URL into docs object
docs = [WebBaseLoader(url).load() for url in urls]

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

## Flatten (Wide-format) the list of documents
docs_list = [item for sublist in docs for item in sublist]

## Initialize text splitter and specify chuck_size and chunk_overlap
## For RAG we want to separate the information in the docs into blocks(chunks)
## Then retrieve those blocks in context based on semantic similarity
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=2000, 
    chunk_overlap=50
)

## Split documents into chunks
doc_splits = text_splitter.split_documents(docs_list)

### Semantic Similarity
- It measures how closely two pieces of text or concepts are related in meaning, focusing on underlying ideas rather than just word-for-word matches

In [None]:
from langchain.embeddings import init_embeddings
from langchain_core.vectorstores import InMemoryVectorStore

## Initialize embeddings model
# providers = {"mistralai": "langchain_mistralai"}
embeddings = init_embeddings("ollama:mistral:7b")

## Create in memory vector store from documents
## This will live in memory and store these chunks for context
vectorstore = InMemoryVectorStore.from_documents(
    documents=doc_splits, 
    embedding=embeddings
)

## Create retriever from vector store >> Retrieves information from Memory VectorStore
## 4 types of blog posts stored so it is important to store them separately
retriever = vectorstore.as_retriever()

In [None]:
## Results are a list of Document objects and checking type for verification
r = retriever.invoke("types of reward hacking")
print(type(r))

In [None]:
from langchain.tools.retriever import create_retriever_tool
from utils import format_retriever_results

## Create retriever tool
retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_blog_posts", ## tool name
    "Search and return information about Lilian Weng blog posts.", ## tool description
)

## Test the retriever tool
result = retriever_tool.invoke({"query": "types of reward hacking"})
format_retriever_results(result[10:1000])

In [None]:
from langchain.chat_models import init_chat_model

## Initialize language model
llm = init_chat_model("ollama:mistral:7b", temperature=0)

## Bind the tools
tools = [retriever_tool]
tools_by_name = {tool.name: tool for tool in tools}

## Bind tools to LLM for agent functionality
llm_with_tools = llm.bind_tools(tools)

In [None]:
from typing_extensions import Literal
from IPython.display import Image, display
from langchain_core.messages import SystemMessage, ToolMessage
from langgraph.graph import END, START, StateGraph, MessagesState

## Define the RAG agent system prompt
rag_prompt = """You are a helpful assistant tasked with retrieving information from a series of technical blog posts by Lilian Weng. 
Clarify the scope of research with the user before using your retrieval tool to gather context. Reflect on any context you fetch, and
proceed until you have sufficient context to answer the user's research request."""


## Define the Nodes
def llm_call(state: MessagesState) -> dict:
    """LLM decides whether to call a tool or not.
    
    Args:
        state: Current conversation state
        
    Returns:
        Dictionary with new messages
    """
    return {
        "messages": [
            llm_with_tools.invoke(
                [SystemMessage(content=rag_prompt)] + state["messages"]
            )
        ]
    }
    
def tool_node(state: MessagesState) -> dict:
    """Performs the tool call.
    
    Args:
        state: Current conversation state with tool calls
        
    Returns:
        Dictionary with tool results
    """
    result = []
    for tool_call in state["messages"][-1].tool_calls:
        tool = tools_by_name[tool_call["name"]]
        observation = tool.invoke(tool_call["args"])
        result.append(ToolMessage(content=observation, tool_call_id=tool_call["id"]))
    return {"messages": result}

def should_continue(state: MessagesState) -> Literal["tool_node", "__end__"]:
    """Decide if we should continue the loop or stop based upon whether the LLM made a tool call.
    
    Args:
        state: Current conversation state
        
    Returns:
        Next node to execute
    """
    messages = state["messages"]
    last_message = messages[-1]
    
    # If the LLM makes a tool call, then perform an action
    if last_message.tool_calls:
        return "tool_node"
    # Otherwise, we stop (reply to the user)
    return END

In [None]:
## Build the workflow
agent_builder = StateGraph(MessagesState)

## Add the Nodes
agent_builder.add_node("llm_call", llm_call)
agent_builder.add_node("tool_node", tool_node)

## Add Edges to connect Nodes
agent_builder.add_edge(START, "llm_call")
agent_builder.add_conditional_edges(
    "llm_call",
    should_continue,
    {
        "tool_node": "tool_node",
        END: END,
    },
)
agent_builder.add_edge("tool_node", "llm_call")

## Compile the agent
agent = agent_builder.compile()

## Display the agent
display(Image(agent.get_graph(xray=True).draw_mermaid_png()))

In [None]:
## Invoke the Agent
from utils import format_messages

## Execute the RAG agent
query = "What are the types of reward hacking discussed in the blogs?"
result = agent.invoke({"messages": [{"role": "user", "content": query}]})

## Format and display results
format_messages(result['messages'])