In [1]:
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Load environment variables
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

# Langgraph Persistence
using memory to check-point the conversations at each node interaction

In [2]:
from langchain_openai import ChatOpenAI
from langchain_tavily import TavilySearch
from langgraph.graph import StateGraph, END
from langchain_core.tools import BaseTool
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage
from langchain_core.messages import ToolMessage

from langgraph.checkpoint.sqlite import SqliteSaver


from typing_extensions import TypedDict
from typing import Annotated
import operator


class AgentState(TypedDict):
    messages: Annotated[list[BaseMessage], operator.add]

class LangGraphAgent:
    def __init__(self, model: ChatOpenAI, tools: list[BaseTool], check_pointer, system_message: str):
        self.model = model.bind_tools(tools)
        self.tools = {t.name : t for t in tools}
        self.system_message = system_message

        graph = StateGraph(AgentState)

        graph.add_node("llm", self._call_llm)
        graph.add_node("action", self._call_action)

        graph.add_conditional_edges(
            "llm", self._exists_action, 
            {True: "action", False: END}
        )
        graph.add_edge("action", "llm")

        graph.set_entry_point("llm")
        self.graph = graph.compile(checkpointer=check_pointer)

    def _exists_action(self, state: AgentState) -> bool:
        last_message = state["messages"][-1]
        return hasattr(last_message, 'tool_calls') and last_message.tool_calls and len(last_message.tool_calls) > 0
    
    def _call_llm(self, state: AgentState) -> BaseMessage:
        messages = [SystemMessage(content=self.system_message)] + state["messages"]
        message = self.model.invoke(messages)
        return {"messages": [message]}
    
    def _call_action(self, state: AgentState) -> BaseMessage:
        results = []
        tool_calls = state["messages"][-1].tool_calls
        for tool_call in tool_calls:
            print(f"Calling Tool: {tool_call}")
            if tool_call["name"] in self.tools:
                tool = self.tools[tool_call["name"]]
                result = tool.invoke(tool_call["args"])
                print(f"Tool Result: {result}")
                results.append(ToolMessage(content=str(result), tool_call_id=tool_call["id"], name=tool_call["name"]))   
        print("Back to model!")
        return {"messages": results}

model = ChatOpenAI(
    base_url="http://localhost:1234/v1",
    model="qwen/qwen3-4b-2507",
    api_key=OPENAI_API_KEY,
    temperature=0.0,
)

tools = [
    TavilySearch(
        max_results=2, 
        tavily_api_key=TAVILY_API_KEY
    )
]

sys_prompt = """
You are a smart research assistant. Use the search engine to find the answer to the user's question.
You are allowed to make multiple calls (either together or in sequence).
Only look for information when you are sure of what you want.
""".strip()

In [3]:
import sqlite3
from langgraph.checkpoint.sqlite import SqliteSaver

def stream_agent_sysout(agent, thread_id, user_input):
    last_message = None
    message = HumanMessage(content=user_input)
    thread = { "configurable": { "thread_id": thread_id } }
    for event in agent.graph.stream({"messages": [message]}, thread):
        for v in event.values():
            print(v['messages'])
            last_message = v['messages'][-1].content
    
    print("="*50)
    print(last_message)
    print("="*50)


shared_memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))
agent = LangGraphAgent(model, tools=tools, check_pointer=shared_memory, system_message=sys_prompt)

In [4]:
message = HumanMessage(content="What is the weather in SF?")
thread = { "configurable": { "thread_id": "1" } }
stream_agent_sysout(
    agent, 
    thread_id = "1", 
    user_input="What is the weather in SF?"
)

[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '206668732', 'function': {'arguments': '{"query":"weather in San Francisco","include_domains":["weather.com","accuweather.com"],"search_depth":"basic","include_images":false}', 'name': 'tavily_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 52, 'prompt_tokens': 1869, 'total_tokens': 1921, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen/qwen3-4b-2507', 'system_fingerprint': 'qwen/qwen3-4b-2507', 'id': 'chatcmpl-wmg3rieytvfd13coknxcxw', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--95f6db57-19da-4c1d-a990-6a7726df8cf6-0', tool_calls=[{'name': 'tavily_search', 'args': {'query': 'weather in San Francisco', 'include_domains': ['weather.com', 'accuweather.com'], 'search_depth': 'basic', 'include_images': False}, 'id': '206668732', 'type': 'tool_call'}], usage_metadata={'input_tokens': 1869, 'output_

In [5]:
stream_agent_sysout(
    agent, 
    thread_id = "1", 
    user_input="How about LA?"
)

[AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '230187744', 'function': {'arguments': '{"query":"weather in Los Angeles","include_domains":["weather.com","accuweather.com"],"search_depth":"basic","include_images":false}', 'name': 'tavily_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 52, 'prompt_tokens': 2776, 'total_tokens': 2828, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen/qwen3-4b-2507', 'system_fingerprint': 'qwen/qwen3-4b-2507', 'id': 'chatcmpl-408qndg1slyiav792gz4ha', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--cbd8e7fc-f642-436b-af94-8abbc13f34ef-0', tool_calls=[{'name': 'tavily_search', 'args': {'query': 'weather in Los Angeles', 'include_domains': ['weather.com', 'accuweather.com'], 'search_depth': 'basic', 'include_images': False}, 'id': '230187744', 'type': 'tool_call'}], usage_metadata={'input_tokens': 2776, 'output_toke

In [6]:
stream_agent_sysout(
    agent, 
    thread_id = "2", 
    user_input="Which one is hotter?"
)

[AIMessage(content="I need more information to answer your question. Could you please specify what you're comparing? For example, are you asking which is hotter between two places, two objects, or two types of temperatures (like the sun vs. a campfire)? Let me know so I can provide a more accurate answer.", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 62, 'prompt_tokens': 1867, 'total_tokens': 1929, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen/qwen3-4b-2507', 'system_fingerprint': 'qwen/qwen3-4b-2507', 'id': 'chatcmpl-ppxi2a04npjrhpjpeczzid', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--97399de8-29c9-479e-a0be-09b5f8a63f5c-0', usage_metadata={'input_tokens': 1867, 'output_tokens': 62, 'total_tokens': 1929, 'input_token_details': {}, 'output_token_details': {}})]
I need more information to answer your question. Could you please specify what you're comparing? For exampl

In [7]:
# We don't see any tool calls as the LLM has already in its context from conversation earlier.
stream_agent_sysout(
    agent, 
    thread_id = "1", 
    user_input="Which one is hotter?"
)

[AIMessage(content='San Francisco is cooler, with a temperature of 18.3°C (64.9°F), while Los Angeles is hotter at 29.1°C (84.4°F). Therefore, Los Angeles is hotter than San Francisco.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 52, 'prompt_tokens': 3666, 'total_tokens': 3718, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen/qwen3-4b-2507', 'system_fingerprint': 'qwen/qwen3-4b-2507', 'id': 'chatcmpl-6am5x4tagn2fnjzgr01q0e', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--0c8d35ae-13a7-419f-a78d-377031e54f88-0', usage_metadata={'input_tokens': 3666, 'output_tokens': 52, 'total_tokens': 3718, 'input_token_details': {}, 'output_token_details': {}})]
San Francisco is cooler, with a temperature of 18.3°C (64.9°F), while Los Angeles is hotter at 29.1°C (84.4°F). Therefore, Los Angeles is hotter than San Francisco.


In [8]:
def inspect_memory(checkpointer, thread):
    print(f"\n=== Inspecting thread {thread} ===")
    checkpoint = checkpointer.get(thread)
    if checkpoint:
        print(f"Thread ID: {thread}")
        print(f"state: {checkpoint['channel_values']}")
    else:
        print(f"No checkpoint found for thread {thread_id}")

print("\n=== All threads in memory ===")
for thread_id in ["1", "2"]:
    thread = { "configurable": { "thread_id": thread_id } }
    inspect_memory(shared_memory, thread)


=== All threads in memory ===

=== Inspecting thread {'configurable': {'thread_id': '1'}} ===
Thread ID: {'configurable': {'thread_id': '1'}}
state: {'messages': [HumanMessage(content='What is the weather in SF?', additional_kwargs={}, response_metadata={}), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '206668732', 'function': {'arguments': '{"query":"weather in San Francisco","include_domains":["weather.com","accuweather.com"],"search_depth":"basic","include_images":false}', 'name': 'tavily_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 52, 'prompt_tokens': 1869, 'total_tokens': 1921, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen/qwen3-4b-2507', 'system_fingerprint': 'qwen/qwen3-4b-2507', 'id': 'chatcmpl-wmg3rieytvfd13coknxcxw', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--95f6db57-19da-4c1d-a990-6a7726df8cf6-0', tool_calls=[{'name'

In [9]:
print("AgentState: Accumulates messages during one graph execution")
print("Memory: Preserves messages across multiple graph executions")

print("""
# Without checkpointer
Turn 1: AgentState = [Msg1, Response1] → ❌ Lost after execution
Turn 2: AgentState = [Msg2, Response2] → ❌ Lost after execution

# With checkpointer (per each thread)  
Turn 1: AgentState = [Msg1, Response1] → ✅ Saved to memory
Turn 2: AgentState = [Msg1, Response1, Msg2, Response2] → ✅ Saved to memory
Turn 3: AgentState = [Msg1, Response1, Msg2, Response2, Msg3, Response3] → ✅ Saved to memory
""")

AgentState: Accumulates messages during one graph execution
Memory: Preserves messages across multiple graph executions

# Without checkpointer
Turn 1: AgentState = [Msg1, Response1] → ❌ Lost after execution
Turn 2: AgentState = [Msg2, Response2] → ❌ Lost after execution

# With checkpointer (per each thread)  
Turn 1: AgentState = [Msg1, Response1] → ✅ Saved to memory
Turn 2: AgentState = [Msg1, Response1, Msg2, Response2] → ✅ Saved to memory
Turn 3: AgentState = [Msg1, Response1, Msg2, Response2, Msg3, Response3] → ✅ Saved to memory



# Streaming Tokens
Need Async streaming checkpointer

In [10]:
from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
import aiosqlite

ashared_memory = AsyncSqliteSaver(aiosqlite.connect(":memory:", check_same_thread=False))
async_agent = LangGraphAgent(model, tools=tools, check_pointer=ashared_memory, system_message=sys_prompt)

user_msg = HumanMessage(content="What is the weather in SF?")
thread = { "configurable": { "thread_id": "4" } }
async for event in async_agent.graph.astream_events({"messages": [user_msg]}, thread):
    if event["event"] == "on_chat_model_stream":
        content = event["data"]["chunk"].content
        # if content is none, means the llm is
        #   asking for tool call.
        if content:
            print(content, end="|")

Calling Tool: {'name': 'tavily_search', 'args': {'query': 'weather in San Francisco', 'include_domains': ['weather.com', 'accuweather.com'], 'search_depth': 'basic', 'include_images': False}, 'id': '380469545', 'type': 'tool_call'}
Tool Result: {'query': 'weather in San Francisco', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'title': 'Weather in San Francisco', 'url': 'https://www.weatherapi.com/', 'content': "{'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.775, 'lon': -122.4183, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1756177123, 'localtime': '2025-08-25 19:58'}, 'current': {'last_updated_epoch': 1756176300, 'last_updated': '2025-08-25 19:45', 'temp_c': 18.3, 'temp_f': 64.9, 'is_day': 0, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/night/116.png', 'code': 1003}, 'wind_mph': 9.8, 'wind_kph': 15.8, 'wind_degree': 256, 'wind_dir': 'WSW', 'pressure_m

# Human in loop
stop and ask if agent can run the tool

In [11]:
import uuid
from langchain_core.messages import AnyMessage


# merge messages with same id
def reduce_messages(left: list[AnyMessage], right: list[AnyMessage]) -> list[AnyMessage]:
    for message in right:
        if not hasattr(message, 'id') or not message.id:
            message.id = str(uuid.uuid4())

    merged = left.copy()
    for message in right:
        for i, existing_message in enumerate(merged):
            if existing_message.id == message.id:
                merged[i] = message
                break
        else:
            merged.append(message)
    return merged

In [32]:
from langchain_openai import ChatOpenAI
from langchain_tavily import TavilySearch
from langchain_core.messages import BaseMessage, AnyMessage
from langgraph.graph import StateGraph

from typing import Annotated, TypedDict

import sys

model = ChatOpenAI(
    base_url="http://localhost:1234/v1",
    model="qwen/qwen3-4b-2507",
    api_key=OPENAI_API_KEY,
    temperature=0.0,
)

tools = [
    TavilySearch(
        max_results=2, 
        tavily_api_key=TAVILY_API_KEY
    )
]

class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], reduce_messages]

class InteractiveGraphAgent:
    def __init__(self, model, tools, checkpointer, system_message):
        self.model = model.bind_tools(tools)
        self.tools = {tool.name: tool for tool in tools}
        self.system_message = system_message

        graph = StateGraph(AgentState)
        graph.add_node("llm", self._call_llm)
        graph.add_node("action", self._call_action)

        graph.add_conditional_edges(
            "llm",
            self._exists_action,
            {
                True: "action",
                False: END
            }
        )
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")

        self.graph = graph.compile(
            checkpointer=checkpointer,
            interrupt_before=["action"]
        )
    
    def _exists_action(self, state: AgentState) -> bool:
        last_message = state["messages"][-1]
        return hasattr(last_message, 'tool_calls') and last_message.tool_calls and len(last_message.tool_calls) > 0
    
    def _call_llm(self, state: AgentState) -> BaseMessage:
        messages = [SystemMessage(content=self.system_message)] + state["messages"]
        message = self.model.invoke(messages)
        return {"messages": [message]}

    def _call_action(self, state: AgentState) -> BaseMessage:
        results = []
        tool_calls = state["messages"][-1].tool_calls
        for tool_call in tool_calls:
            print(f"Calling Tool: {tool_call}")
            if tool_call["name"] in self.tools:
                tool = self.tools[tool_call["name"]]
                result = tool.invoke(tool_call["args"])
                print(f"Tool Result: {result}")
                results.append(ToolMessage(content=str(result), tool_call_id=tool_call["id"], name=tool_call["name"]))   
        print("Back to model!")
        return {"messages": results}

    def run(self, user_input: str, thread_id: str):
        messages = { "messages": [HumanMessage(content=user_input)] }
        thread = { "configurable": { "thread_id": thread_id } }

        break_outer = False
        while True:
            for event in self.graph.stream(messages, thread):
                if '__interrupt__' in event:
                    print("Interrupted for tool use!")
                    print("next node is", self.graph.get_state(thread).next)
                    user_response = input("Continue? (y/n): ")
                    sys.stdout.flush()
                    if user_response.lower() != 'y':
                        break_outer = True
                    else:
                        print("Resuming...")
                        messages = None
                    break
                else:
                    for v in event.values():
                        print(v)
                        last_message = v['messages'][-1].content
            else:
                break_outer = True

            if break_outer:
                break

        print("="*50)
        print(last_message)
        print("="*50)


from langgraph.checkpoint.sqlite import SqliteSaver
import sqlite3

shared_memory = SqliteSaver(sqlite3.connect(":memory:", check_same_thread=False))

sys_prompt = """
You are a smart research assistant. Use the search engine to find the answer to the user's question.
You are allowed to make multiple calls (either together or in sequence).
"""

agent = InteractiveGraphAgent(model, tools=tools, checkpointer=shared_memory, system_message=sys_prompt)


In [42]:
thread_id = "5"
agent.run(user_input="What is the weather in SF?", thread_id=thread_id)


{'messages': [AIMessage(content='The current weather in San Francisco is partly cloudy with a temperature of 18.3°C (64.9°F). The wind is from the west-southwest at 9.8 mph (15.8 kph), with a wind chill of 14.6°C (58.4°F). The humidity is 78%, and the cloud cover is at 50%. There is no precipitation, and the dew point is 13.9°C (56.9°F). \n\nThe weather is expected to be partly cloudy with a high of 69°F (20.5°C) and a low of around 65°F (18.3°C) tomorrow, with a UV index of 7 (High). \n\nFor more details, you can refer to the official weather sources.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 174, 'prompt_tokens': 3008, 'total_tokens': 3182, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen/qwen3-4b-2507', 'system_fingerprint': 'qwen/qwen3-4b-2507', 'id': 'chatcmpl-gf752rlz6qiiund214ygh', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--3d4b9138-fef9-4083-a614-543248bb55

In [43]:
thread = { "configurable": { "thread_id": thread_id } }
for state_snapshot in agent.graph.get_state_history(thread):
    print(state_snapshot)

StateSnapshot(values={'messages': [HumanMessage(content='What is the weather in SF?', additional_kwargs={}, response_metadata={}, id='5bc42af0-a7f8-42dd-990e-dcfb1e3a4457'), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '317224118', 'function': {'arguments': '{"query":"weather in San Francisco","include_domains":["weather.com","accuweather.com"],"search_depth":"basic","include_images":false}', 'name': 'tavily_search'}, 'type': 'function'}], 'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 52, 'prompt_tokens': 1856, 'total_tokens': 1908, 'completion_tokens_details': None, 'prompt_tokens_details': None}, 'model_name': 'qwen/qwen3-4b-2507', 'system_fingerprint': 'qwen/qwen3-4b-2507', 'id': 'chatcmpl-6y1omza6typit370roice', 'service_tier': None, 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--21fd9ce0-b9fb-44ff-8c02-28d4f9bf380e-0', tool_calls=[{'name': 'tavily_search', 'args': {'query': 'weather in San Francisco', 'include_domains': ['w