### AI Agent Tool Call Testing

In [None]:
from deepeval.tracing import observe, update_current_span

#### AI Agent with Tools

In [5]:
from langchain.tools import tool
from langchain_community.tools import DuckDuckGoSearchRun

s_tool = DuckDuckGoSearchRun()

@tool
@observe(type="tool")
def search_tool(query: str):
    "Tool to search online for given query and return result"
    return s_tool.run(query)

@tool
@observe(type="tool")
def add_numbers(a: int, b: int):
    "Add two numbers and returns result"
    return int(a) + int(b)

@tool
@observe(type="tool")
def subtract_numbers(a: int, b: int):
    "Subtract two numbers and returns result"
    return int(a) - int(b)

tools = [add_numbers, subtract_numbers, search_tool]

#### Local LLMs

In [6]:
from langchain_ollama import ChatOllama
@observe(type="llm",
         name="Local LLM",
         model="qwen2.5:latest")
def local_llm():
    llm = ChatOllama(base_url="http://localhost:11434", 
                      model="qwen2.5:latest", 
                      temperature=0.5, 
                      max_token=250)
    return llm.bind_tools(tools=tools)
llm = local_llm()

#### Create ChatPrompt, agent

In [None]:
from langchain_classic.agents import create_tool_calling_agent, AgentExecutor
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder
from deepeval.test_case import ToolCall
from deepeval.test_case import LLMTestCase
from deepeval.metrics import ToolCorrectnessMetric


prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant with access to tools."),
    MessagesPlaceholder("chat_history"),
    ("human", "{input}"),
    MessagesPlaceholder("agent_scratchpad"),
])
@observe(type="agent", 
         available_tools=["add_numbers", "subtract_numbers", "search_tool"])
def main_ai_agent(query):
    agent = create_tool_calling_agent(llm, tools, prompt)
    agent_executor = AgentExecutor(agent=agent,
                                   tools=tools,
                                   verbose=True,
                                   handle_parsing_errors=True,
                                   return_intermediate_steps=True
    )
    response = agent_executor.invoke({"input": query, "chat_history": []})
    return response

#### Extract Tools called from Response

In [None]:
from langchain_core.agents import AgentAction
from typing import List
def extract_tools_from_response(response: dict[str: any]) -> List[ToolCall]:
    deepeval_tools: List[ToolCall] = []
    intermediate_steps = response.get("intermediate_steps", [])
    for action, _ in intermediate_steps:
        if isinstance(action, AgentAction):
            tool_name = action.tool
            tool_args = action.tool_input
            deepeval_tools.append(
                ToolCall(name=tool_name, 
                         input_parameters=tool_args)
            ) 
    return deepeval_tools

In [None]:
search_tool_response = main_ai_agent("What is 15 + 20?")
extract_tools_from_response(search_tool_response)

#### Evaluation (Tool Correctness Metric)

In [None]:

from deepeval.dataset import Golden
