In [1]:
from langchain_google_vertexai import ChatVertexAI
llm = ChatVertexAI(model="gemini-2.0-flash-001", temperature=1.0)

# Streaming

Firts, let's download a public MMLU [dataset](https://huggingface.co/datasets/cais/mmlu) from HuggingFace:

In [3]:
from datasets import load_dataset
ds = load_dataset("cais/mmlu", "high_school_geography")

README.md:   0%|          | 0.00/53.2k [00:00<?, ?B/s]

dataset_infos.json:   0%|          | 0.00/138k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/28.2k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/6.16k [00:00<?, ?B/s]

dev-00000-of-00001.parquet:   0%|          | 0.00/3.93k [00:00<?, ?B/s]

Generating test split:   0%|          | 0/198 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/22 [00:00<?, ? examples/s]

Generating dev split:   0%|          | 0/5 [00:00<?, ? examples/s]

Now, let's create a simple research agent:

In [12]:
from langchain.agents import load_tools
from langgraph.prebuilt import create_react_agent


research_tools = load_tools(
  tool_names=["ddg-search", "arxiv", "wikipedia"],
  llm=llm
)

system_prompt = (
    "You're a hard-working, curious and creative student. "
    "You're working on exam quesion. Think step by step."
    "Always provide an argumentation for your answer. "
    "Do not assume anything, use available tools to search "
    "for evidence and supporting statements."
)


In [13]:
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langgraph.graph import MessagesState
from langgraph.prebuilt.chat_agent_executor import AgentState

raw_prompt_template = (
    "Answer the following multiple-choice question. "
    "\nQUESTION:\n{question}\n\nANSWER OPTIONS:\n{options}\n"
)
prompt = ChatPromptTemplate.from_messages(
    [("system", system_prompt),
     ("user", raw_prompt_template),
     ("placeholder", "{messages}")
     ]
)

class ResearchState(AgentState):
  question: str
  options: str

research_agent = create_react_agent(model=llm, tools=research_tools, state_schema=ResearchState, prompt=prompt)

Now we need to prepare a _question` and answer _options_:

In [19]:
i = 6
ds_dict = ds["test"].take(100).to_dict()
question = ds_dict["question"][i]
options = "\n".join([f"{i}. {a}" for i, a in enumerate(ds_dict["choices"][i])])

Compare outputs (and available amount of messages in the `event` instance) when using `stream_mode=values` vs `updates`:

In [20]:
async for _, event in research_agent.astream({"question": question, "options": options}, stream_mode=["values"]):
  print(len(event["messages"]))

0




1
5
6


In [21]:
async for _, event in research_agent.astream({"question": question, "options": options}, stream_mode=["updates"]):
  node = list(event.keys())[0]
  print(node, len(event[node].get("messages", [])))

agent 1
tools 1
agent 1


In [22]:
async for _, event in research_agent.astream({"question": question, "options": options}, stream_mode=["updates"]):
  print(event)



{'agent': {'messages': [AIMessage(content='A relict boundary is a former boundary line that no longer functions as an international border but is still visible in the cultural landscape. To determine which country does NOT have a well-known example of a relict boundary, I will investigate each option.\n\n*   **Vietnam:** I\'ll search for "relict boundary Vietnam" to see if there are any well-known examples.\n*   **United Kingdom:** I\'ll search for "relict boundary United Kingdom".\n*   **Germany:** I\'ll search for "relict boundary Germany".\n*   **Bolivia:** I\'ll search for "relict boundary Bolivia".\n\n', additional_kwargs={'function_call': {'name': 'duckduckgo_search', 'arguments': '{"query": "relict boundary Bolivia"}'}}, response_metadata={'is_blocked': False, 'safety_ratings': [], 'usage_metadata': {'prompt_token_count': 249, 'candidates_token_count': 173, 'total_token_count': 422, 'prompt_tokens_details': [{'modality': 1, 'token_count': 249}], 'candidates_tokens_details': [{'m

We can also explore what types of events we have seen:

In [24]:
seen_events = set([])
async for event in research_agent.astream_events({"question": question, "options": options}, version="v1"):
  if event["event"] not in seen_events:
    seen_events.add(event["event"])

print(seen_events)



{'on_chat_model_stream', 'on_chat_model_end', 'on_tool_start', 'on_chain_end', 'on_chain_stream', 'on_tool_end', 'on_chain_start', 'on_chat_model_start', 'on_prompt_start', 'on_prompt_end'}
