# Lesson 4: Persistence and Streaming

In [1]:
from langchain_groq import ChatGroq
import os
from dotenv import load_dotenv
from tavily import TavilyClient


load_dotenv()

groq_api_key = os.getenv("GROQ_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

model_id = "openai/gpt-oss-120b" # 	qwen/qwen3-32b  openai/gpt-oss-120b llama-3.1-8b-instant
    
llm = ChatGroq(model=model_id,
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    verbose=1)

client = TavilyClient(api_key=os.environ.get("TAVILY_API_KEY"))

In [2]:
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage
from langchain_community.tools.tavily_search import TavilySearchResults

In [3]:
tool = TavilySearchResults(max_results=2)

  tool = TavilySearchResults(max_results=2)


In [4]:
class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]

In [8]:
# from langgraph.checkpoint.sqlite import SqliteSaver
from langgraph.checkpoint.memory import InMemorySaver

# memory = SqliteSaver.from_conn_string(":memory:")
memory = InMemorySaver()

In [9]:
class Agent:
    def __init__(self, model, tools, checkpointer, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_openai)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges("llm", self.exists_action, {True: "action", False: END})
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile(checkpointer=checkpointer)
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def call_openai(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        return len(result.tool_calls) > 0

    def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            result = self.tools[t['name']].invoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}

In [10]:
prompt = """You are a smart research assistant. Use the search engine to look up information. \
You are allowed to make multiple calls (either together or in sequence). \
Only look up information when you are sure of what you want. \
If you need to look up some information before asking a follow up question, you are allowed to do that!
"""
abot = Agent(llm, [tool], system=prompt, checkpointer=memory)

In [11]:
messages = [HumanMessage(content="What is the weather in sf?")]

In [12]:
thread = {"configurable": {"thread_id": "1"}}

In [13]:
for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v['messages'])

[AIMessage(content='', additional_kwargs={'reasoning_content': 'The user asks: "What is the weather in sf?" Likely they mean San Francisco. Need current weather. Use search.', 'tool_calls': [{'id': 'fc_2bd7e694-4cf9-4b8e-83ff-c204a6d53daa', 'function': {'arguments': '{"query":"current weather San Francisco"}', 'name': 'tavily_search_results_json'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 61, 'prompt_tokens': 229, 'total_tokens': 290, 'completion_time': 0.128010931, 'prompt_time': 0.009720302, 'queue_time': 0.16633694, 'total_time': 0.137731233}, 'model_name': 'openai/gpt-oss-120b', 'system_fingerprint': 'fp_100cdb69bc', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--d6ada0d0-f301-41c7-95c0-24b87b4e4cb8-0', tool_calls=[{'name': 'tavily_search_results_json', 'args': {'query': 'current weather San Francisco'}, 'id': 'fc_2bd7e694-4cf9-4b8e-83ff-c204a6d53daa', 'type': 'tool_call'}], usage_metadata={'input_tokens

In [14]:
messages = [HumanMessage(content="What about in la?")]
thread = {"configurable": {"thread_id": "1"}}
for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v)

{'messages': [AIMessage(content='', additional_kwargs={'reasoning_content': 'We need current weather in Los Angeles (LA) same date (Nov 1 2025). Use search.', 'tool_calls': [{'id': 'fc_7d145c67-e63f-45dd-9939-fdc5b6425c80', 'function': {'arguments': '{"query":"Los Angeles weather November 1 2025"}', 'name': 'tavily_search_results_json'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 60, 'prompt_tokens': 3776, 'total_tokens': 3836, 'completion_time': 0.132159025, 'prompt_time': 0.185377691, 'queue_time': 0.167575465, 'total_time': 0.317536716}, 'model_name': 'openai/gpt-oss-120b', 'system_fingerprint': 'fp_490688312f', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None}, id='run--c44171e2-1044-4fad-9b4b-e5b65cac3926-0', tool_calls=[{'name': 'tavily_search_results_json', 'args': {'query': 'Los Angeles weather November 1 2025'}, 'id': 'fc_7d145c67-e63f-45dd-9939-fdc5b6425c80', 'type': 'tool_call'}], usage_metadata={'input_tokens':

In [15]:
messages = [HumanMessage(content="Which one is warmer?")]
thread = {"configurable": {"thread_id": "1"}}
for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v)

{'messages': [AIMessage(content='Los\u202fAngeles is noticeably warmer.\n\n| City | Typical high today | Typical low today |\n|------|-------------------|-------------------|\n| **San\u202fFrancisco** | ~19\u202f°C (66\u202f°F) | ~15\u202f°C (59\u202f°F) |\n| **Los\u202fAngeles**   | ~27\u202f°C (81\u202f°F) | ~19\u202f°C (66\u202f°F) |\n\nLos\u202fAngeles’ daytime temperature is about **8\u202f°C (≈\u202f14\u202f°F) higher** than San\u202fFrancisco’s, and its night‑time temperature is also a few degrees warmer. So, for today’s weather, **Los\u202fAngeles is the warmer of the two cities**.', additional_kwargs={'reasoning_content': 'We need to compare temperatures: San Francisco high ~19°C, low ~15°C. Los Angeles high ~27°C, low ~19°C. So Los Angeles is warmer. Provide answer.'}, response_metadata={'token_usage': {'completion_tokens': 197, 'prompt_tokens': 5142, 'total_tokens': 5339, 'completion_time': 0.412987203, 'prompt_time': 0.231347193, 'queue_time': 0.167471798, 'total_time': 0.6

In [16]:
messages = [HumanMessage(content="Which one is warmer?")]
thread = {"configurable": {"thread_id": "2"}}
for event in abot.graph.stream({"messages": messages}, thread):
    for v in event.values():
        print(v)

{'messages': [AIMessage(content='I’m not sure which two things you’d like to compare. Could you let me know the specific items, locations, or objects you’re wondering about, so I can tell you which one is warmer?', additional_kwargs={'reasoning_content': 'The user asks: "Which one is warmer?" No context. Likely they refer to something previously mentioned? There\'s no prior conversation. Could be ambiguous. Maybe they want to compare two items? Need clarification. Should ask follow-up.'}, response_metadata={'token_usage': {'completion_tokens': 98, 'prompt_tokens': 227, 'total_tokens': 325, 'completion_time': 0.20520614, 'prompt_time': 0.00887187, 'queue_time': 0.177288091, 'total_time': 0.21407801}, 'model_name': 'openai/gpt-oss-120b', 'system_fingerprint': 'fp_b0f0a60553', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None}, id='run--27e12ce7-6ab3-4ba5-85f0-faa4734d5bd1-0', usage_metadata={'input_tokens': 227, 'output_tokens': 98, 'total_tokens': 325})]}


## Streaming tokens

In [None]:
from langgraph.checkpoint.aiosqlite import AsyncSqliteSaver

memory = AsyncSqliteSaver.from_conn_string(":memory:")
abot = Agent(model, [tool], system=prompt, checkpointer=memory)

In [None]:
messages = [HumanMessage(content="What is the weather in SF?")]
thread = {"configurable": {"thread_id": "4"}}
async for event in abot.graph.astream_events({"messages": messages}, thread, version="v1"):
    kind = event["event"]
    if kind == "on_chat_model_stream":
        content = event["data"]["chunk"].content
        if content:
            # Empty content in the context of OpenAI means
            # that the model is asking for a tool to be invoked.
            # So we only print non-empty content
            print(content, end="|")