## Test a ReAct agent with Pytest and LangSmith

We will create a ReAct Agent tha answers questions about publicly traded stocks and write a comprehensive test suite for it.

In [2]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [5]:
# Initialize the LLM options
from langchain.chat_models import init_chat_model
model_gemini_flash = init_chat_model("gemini-2.5-flash", model_provider="google_genai", timeout=30, temperature=0)
model_llama_groq = init_chat_model("llama-3.1-8b-instant", model_provider="groq", timeout=30, temperature=0)
model_gpt_4o_mini = init_chat_model("gpt-4o-mini", model_provider="openai", timeout=30, temperature=0)


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
from typing_extensions import Annotated, Literal, TypedDict
from langchain_community.tools import TavilySearchResults
from e2b_code_interpreter import Sandbox
from langchain_community.utilities.polygon import PolygonAPIWrapper
from langchain_community.tools.polygon.aggregates import PolygonAggregates
# Define search tool
search_tool = TavilySearchResults(
  max_results=5,
  include_raw_content=True,
)

# Define code tool
def code_tool(code: str) -> str:
  """Execute python code and return the result."""
  sbx = Sandbox()
  execution = sbx.run_code(code)

  if execution.error:
      return f"Error: {execution.error}"
  return f"Results: {execution.results}, Logs: {execution.logs}"


# Define input schema for stock ticker tool
class TickerToolInput(TypedDict):
  """Input format for the ticker tool.
    The tool will pull data in aggregate blocks (timespan_multiplier * timespan) from the from_date to the to_date
  """
  ticker: Annotated[str, ..., "The ticker symbol of the stock"]
  timespan: Annotated[Literal["minute", "hour", "day", "week", "month", "quarter", "year"], ..., "The size of the time window."]
  timespan_multiplier: Annotated[int, ..., "The multiplier for the time window"]
  from_date: Annotated[str, ..., "The date to start pulling data from, YYYY-MM-DD format - ONLY include the year month and day"]
  to_date: Annotated[str, ..., "The date to stop pulling data, YYYY-MM-DD format - ONLY include the year month and day"]

api_wrapper = PolygonAPIWrapper()
polygon_aggregates = PolygonAggregates(api_wrapper=api_wrapper)

# Define stock ticker tool
def ticker_tool(query: TickerToolInput) -> str:
  """Pull data for the ticker."""
  return polygon_aggregates.invoke(query)

### Define Agent

In [8]:
from langchain.agents import create_agent
class AgentOutputFormat(TypedDict):
    numeric_answer: Annotated[float | None, ..., "The numeric answer, if the user asked for one"]
    text_answer: Annotated[str | None, ..., "The text answer, if the user asked for one"]
    reasoning: Annotated[str, ..., "The reasoning behind the answer"]

agent = create_agent(
    model=model_gemini_flash,
    tools=[code_tool, search_tool, polygon_aggregates],
    response_format=AgentOutputFormat,
    system_prompt="You are a financial expert. Respond to the users query accurately",
)



In [9]:
from langsmith import testing as t
import pytest

@pytest.mark.langsmith
@pytest.mark.parametrize(
  # <-- Can still use all normal pytest markers
  "query",
  ["Hello!", "How are you doing?"],
)
def test_no_tools_on_offtopic_query(query: str) -> None:
  """Test that the agent does not use tools on offtopic queries."""
  # Log the test example
  t.log_inputs({"query": query})
  expected = []
  t.log_reference_outputs({"tool_calls": expected})
  # Call the agent's model node directly instead of running the ReACT loop.
  result = agent.nodes["agent"].invoke(
      {"messages": [{"role": "user", "content": query}]}
  )
  #this for loop is just for activity tracing in notebook cell output
  for msg in result["messages"]:
    msg.pretty_print()

  actual = result["messages"][0].tool_calls
  t.log_outputs({"tool_calls": actual})
  # Check that no tool calls were made.
  assert actual == expected

### Test 2: Simple Tool Calling

For tool calling we are going to verify that the agent calls the correct tool with the correct parameters

In [None]:
@pytest.mark.langsmith
def test_searches_for_correct_ticker() -> None:
    """Test that the model looks up the correct ticker on simple query."""
    # Log the test example
    query = "What is the price of Apple?"
    t.log_inputs({"query": query})
    expected = "AAPL"
    t.log_reference_outputs({"ticker": expected})
    # Call the agent's model node directly instead of running the full ReACT loop.
    result = agent.nodes["agent"].invoke(
      {"messages": [{"role": "user", "content": query}]}
    )

    #this for loop is just for activity tracing in notebook cell output
    for msg in result["messages"]:
        msg.pretty_print()

    tool_calls = result["messages"][0].tool_calls

    if tool_calls[0]["name"] == polygon_aggregates.name:
      actual = tool_calls[0]["args"]["ticker"]

    else:
      actual = None

    # Check that the right ticker was queried
    assert actual == expected