In [None]:
%autosave 300
%load_ext autoreload
%autoreload 2
%reload_ext autoreload
%config Completer.use_jedi = False

In [None]:
import os

os.chdir("../..")
print(os.getcwd())

Accommodate conversation-style interactions and multi-step retrieval processes.

In [None]:
from dotenv import load_dotenv

load_dotenv()

In [None]:
from langchain.chat_models import init_chat_model

llm = init_chat_model("gpt-4o-mini", model_provider="openai")

In [None]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [None]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

embedding_dim = len(embeddings.embed_query("hello world"))
print(embedding_dim)
index = faiss.IndexFlatL2(embedding_dim)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [None]:
import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [None]:
from langgraph.graph import START, StateGraph, END, MessagesState
from typing_extensions import List, TypedDict, Annotated
from langchain_core.tools import tool
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import ToolNode, tools_condition

In [None]:
# Load and chunk contents of the blog
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=200, add_start_index=True
)
all_splits = text_splitter.split_documents(docs)
print(f"Split into {len(all_splits)} chunks")

In [None]:
# Index chunks
_ = vector_store.add_documents(documents=all_splits)

User input as a HumanMessage\
Vector store query as an AIMessage with tool calls\
Retrieved documents as a ToolMessage\
Final response as a AIMessage.

In this module we will define the retriever under a tool, so the model knows when to call it and when to stop.

In [None]:
@tool(response_format="content_and_artifact")
def retrieve(query: str):
    """Retrieve information related to a query only when the query deals with AI agents and tools."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\n" f"Content: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

Our graph will consist of three nodes:

A node that fields the user input, either generating a query for the retriever or responding directly\
A node for the retriever tool that executes the retrieval step\
A node that generates the final response using the retrieved context.

In [None]:
# Step 1: Generate an AIMessage that may include a tool-call to be sent.
def query_or_respond(state: MessagesState):
    """Generate an AIMessage that may include a tool-call to be sent."""
    llm_with_tool = llm.bind_tools([retrieve])
    response = llm_with_tool.invoke(state["messages"])
    return {"messages": [response]}

In [None]:
# execute if tool is called - A node that runs the tools called in the last AIMessage.
tools = ToolNode([retrieve])

In [None]:
# Step 3: Generate a response using the retrieved content.
def generate(state: MessagesState):
    """Generate answer using the retrieved content."""
    # Get generated ToolMessages
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else:
            break
    tool_messages = recent_tool_messages[::-1]

    # Format into prompt
    docs_content = "\n\n".join(doc.content for doc in tool_messages)
    system_message_content = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        f"{docs_content}"
    )
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    print(f"Prompt: {prompt}")

    # Run
    response = llm.invoke(prompt)
    return {"messages": [response]}

In [None]:
# Build and compile the graph
graph_builder = StateGraph(MessagesState)
# adding nodes
graph_builder.add_node("query_or_respond", query_or_respond)
graph_builder.add_node("tools", tools)
graph_builder.add_node("generate", generate)
# adding edges
graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    source="query_or_respond",
    path=tools_condition,
    path_map={"tools": "tools", END: END},
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)

In [None]:
# compile the graph
graph = graph_builder.compile()

In [None]:
from IPython.display import Image, display

display(Image(graph.get_graph().draw_png()))

In [None]:
input_message = "What is the capital of France?"

In [None]:
result = await graph.ainvoke({"messages": [{"role": "user", "content": input_message}]})

In [None]:
display(result["messages"][-1].content)

In [None]:
for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()

In [None]:
async for step in graph.astream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="updates",
):
    print(f"{step}\n\n----------------\n")

In [None]:
input_message = "What is Task Decomposition as a concept in AI agents?"
result = await graph.ainvoke({"messages": [{"role": "user", "content": input_message}]})

In [None]:
for message in result["messages"]:
    message.pretty_print()

#### Next we add persistent local memory to the graph

In [None]:
from langgraph.checkpoint.memory import MemorySaver
import uuid

In [None]:
memory = MemorySaver()
graph = graph_builder.compile(checkpointer=memory)

In [None]:
# Specify an ID for the thread
config = {"configurable": {"thread_id": str(uuid.uuid4())}}

In [None]:
input_message = "What is Task Decomposition?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    step["messages"][-1].pretty_print()

In [None]:
input_message = "Can you look up some common ways of doing it?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    step["messages"][-1].pretty_print()

#### How to get your RAG application to return sources

In [None]:
class State(MessagesState):
    context: List[Document]

In [None]:
# Step 1: Generate an AIMessage that may include a tool-call to be sent.
def query_or_respond(state: State):
    """Generate tool call for retrieval or respond."""
    llm_with_tools = llm.bind_tools([retrieve])
    response = llm_with_tools.invoke(state["messages"])
    # MessagesState appends messages to state instead of overwriting
    return {"messages": [response]}


# Step 2: Execute the retrieval.
tools = ToolNode([retrieve])

In [None]:
# Updating step3: Generate a response using the retrieved content.
def generate(state: State):
    """Generate answer using the retrieved content."""
    # Get generated ToolMessages
    recent_tool_messages = []
    for message in reversed(state["messages"]):
        if message.type == "tool":
            recent_tool_messages.append(message)
        else:
            break
    tool_messages = recent_tool_messages[::-1]

    # Format into prompt
    docs_content = "\n\n".join(doc.content for doc in tool_messages)
    system_message_content = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer "
        "the question. If you don't know the answer, say that you "
        "don't know. Use three sentences maximum and keep the "
        "answer concise."
        "\n\n"
        f"{docs_content}"
    )
    conversation_messages = [
        message
        for message in state["messages"]
        if message.type in ("human", "system")
        or (message.type == "ai" and not message.tool_calls)
    ]
    prompt = [SystemMessage(system_message_content)] + conversation_messages

    print(f"Prompt: {prompt}")

    # Run
    response = llm.invoke(prompt)
    context = []
    for tool_message in tool_messages:
        context.extend(tool_message.artifact)
    return {"messages": [response], "context": context}

In [None]:
# Build and compile the graph
graph_builder = StateGraph(State)
# adding nodes
graph_builder.add_node("query_or_respond", query_or_respond)
graph_builder.add_node("tools", tools)
graph_builder.add_node("generate", generate)
# adding edges
graph_builder.set_entry_point("query_or_respond")
graph_builder.add_conditional_edges(
    source="query_or_respond",
    path=tools_condition,
    path_map={"tools": "tools", END: END},
)
graph_builder.add_edge("tools", "generate")
graph_builder.add_edge("generate", END)

In [None]:
memory = MemorySaver()
graph = graph_builder.compile(checkpointer=memory)
# Specify an ID for the thread
config = {"configurable": {"thread_id": str(uuid.uuid4())}}

In [None]:
input_message = "What is Task Decomposition?"

for step in graph.stream(
    {"messages": [{"role": "user", "content": input_message}]},
    stream_mode="values",
    config=config,
):
    step["messages"][-1].pretty_print()

In [None]:
step["context"]

In [None]:
op = await graph.ainvoke(
    {"messages": [{"role": "user", "content": input_message}]},
    config=config,
)

In [None]:
op["messages"][-1]

In [None]:
op = await graph.ainvoke(
    {"messages": [{"role": "user", "content": "Who is the author for the same?"}]},
    config=config,
)

In [None]:
for message in op["messages"]:
    message.pretty_print()

######################################################## END ###########################################################