In [17]:
from deepagents import create_deep_agent
from langchain_ollama import ChatOllama
from langgraph.checkpoint.memory import MemorySaver

In [18]:
from typing import Literal
from tavily import TavilyClient
from langchain_core.tools import tool
from dotenv import load_dotenv
load_dotenv()

tavily_client = TavilyClient()
@tool
def internet_search(
    query: str,
    max_results: int = 5,
    topic: Literal["general", "news", "finance"] = "general",
    include_raw_content: bool = False,
):
    """
    Search the internet for information.
    
    Args:
        query: The search query string
        max_results: The maximum number of search results to return

        
    Returns:
        Search results as a dict
    """
    return tavily_client.search(
        query,
        max_results=max_results,
        include_raw_content=include_raw_content,
        topic=topic,
    )

In [19]:
llm = ChatOllama(
    model="gpt-oss:120b-cloud", # Use the model you pulled
    base_url="http://localhost:11434", # Point to the local Ollama server
    temperature=0
)

In [20]:

agent_graph = create_deep_agent(
    model=llm,
    tools=[internet_search],
    checkpointer=MemorySaver() # This enables Short-Term Chat Memory
)

In [21]:
config = {"configurable": {"thread_id": "session_1"}}

In [36]:
events = agent_graph.stream(
    {"messages": [("user", "who is pm of india?")]},
    config,
    stream_mode="values"
)

In [30]:
final_answer = ""
for event in events:
    if "messages" in event:
        last_msg = event["messages"][-1]
        # Filter out intermediate tool calls, show only AI response
        if last_msg.type == "ai" and not last_msg.tool_calls:
            final_answer = last_msg.content

In [None]:
print(final_answer)


The Prime Minister of India is **Narendra Modi**. He has served in that role since May 2014.


In [45]:
agent_graph.checkpointer.alist(config)

<async_generator object InMemorySaver.alist at 0x00000170404ED840>