#### Testing AI Agent Tool calling with Deep-Eval

In [4]:
!pip install -qU duckduckgo-search


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [38]:
from langchain_ollama import ChatOllama

llm = ChatOllama(base_url="http://localhost:11434", 
                 model="qwen2.5:latest", 
                 temperature=0.5, 
                 num_predict=200)

#### Agent with Tool

In [None]:
!pip install -U ddgs

In [39]:
from langchain_ollama import ChatOllama
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_core.tools import tool
from langchain.agents import create_agent 
from langchain_core.output_parsers import StrOutputParser
from langchain_core.messages import HumanMessage

# --- 1. Define Tools ---
@tool
def add_number(a: float, b: float) -> float:
    """Adds two numbers, a and b, and returns the result."""
    return a + b

@tool
def subtract_number(a: float, b: float) -> float:
    """Subtracts the second number, b, from the first number, a, and returns the result."""
    return a - b

search_tool = DuckDuckGoSearchRun()
tools = [add_number, subtract_number, search_tool]


# --- 2. Initialize LLM and System Prompt ---

llm = ChatOllama(
    model="qwen2.5:latest", 
    temperature=0,
    timeout=120, 
    num_ctx=8192, 
    stream=False
) 

system_instruction = (
    "You are an expert AI assistant that uses the available tools to answer questions. "
    "Use the tools for any calculation or external search. "
    "Always reason about your actions using the Thought, Action, and Observation loop. "
    "Your final output MUST be the Final Answer."
)

agent_runnable = create_agent(
    model=llm, 
    tools=tools, 
    system_prompt=system_instruction
)

# Chain the runnable to extract the clean string content from the final message
final_chain = agent_runnable | (lambda x: x["messages"][-1].content) | StrOutputParser()


print("\n--- Running Agent Example ---")
user_input_string = "What is the result of 850 minus 75 and what is the largest city in Portugal?"

input_data = {
    "input": user_input_string,
    "messages": [HumanMessage(content=user_input_string)]
}
data = agent_runnable.invoke(input=input_data)
data
# result_string = final_chain.invoke(
#         input_data,
#         {"verbose": True} 
#     )
# result_string


--- Running Agent Example ---


{'messages': [HumanMessage(content='What is the result of 850 minus 75 and what is the largest city in Portugal?', additional_kwargs={}, response_metadata={}, id='5970ab65-d80e-4eef-ad6a-bba6923a6e2c'),
  AIMessage(content="Let's break this down into two parts. First, we'll perform the subtraction for 850 minus 75. Then, we'll use a web search to find the largest city in Portugal.\n", additional_kwargs={}, response_metadata={'model': 'qwen2.5:latest', 'created_at': '2025-11-25T15:58:13.420311Z', 'done': True, 'done_reason': 'stop', 'total_duration': 7421409541, 'load_duration': 96987666, 'prompt_eval_count': 359, 'prompt_eval_duration': 1188236000, 'eval_count': 97, 'eval_duration': 5465881330, 'logprobs': None, 'model_name': 'qwen2.5:latest', 'model_provider': 'ollama'}, id='lc_run--54ac0ab7-704e-42f1-bd70-b19267e3f56b-0', tool_calls=[{'name': 'subtract_number', 'args': {'a': 850, 'b': 75}, 'id': 'cb79a8a4-6f09-4a2b-b5bc-b9cce35fab5e', 'type': 'tool_call'}, {'name': 'duckduckgo_search

In [41]:
from langchain_core.messages import AIMessage, ToolMessage, HumanMessage

def get_all_agent_steps(messages: list):
    """
    Traverses the list of LangChain messages to extract all tool calls and observations.
    """
    steps = []
    
    for i, msg in enumerate(messages):
        # 1. Look for the AI's Decision (Tool Call/Action)
        if isinstance(msg, AIMessage) and hasattr(msg, 'tool_calls') and msg.tool_calls:
            # An AIMessage with tool_calls is the agent's Action
            for call in msg.tool_calls:
                steps.append({
                    "step_type": "ACTION",
                    "sequence": len(steps) + 1,
                    "tool_name": call['name'],
                    "tool_input": call['args'],
                    "tool_call_id": call['id']
                })
        
        # 2. Look for the Tool's Result (Observation)
        elif isinstance(msg, ToolMessage):
            # A ToolMessage is the Observation/Result
            steps.append({
                "step_type": "OBSERVATION",
                "sequence": len(steps) + 1,
                "tool_name": msg.name,
                "tool_call_id": msg.tool_call_id,
                "output": msg.content[:100] + "..." if len(msg.content) > 100 else msg.content
            })
            
    return steps

# Assuming 'response' is the final dictionary returned by final_chain.invoke()
# full_log = get_all_agent_steps(response['messages'])
# print(full_log)
get_all_agent_steps(messages=data['messages'])

[{'step_type': 'ACTION',
  'sequence': 1,
  'tool_name': 'subtract_number',
  'tool_input': {'a': 850, 'b': 75},
  'tool_call_id': 'cb79a8a4-6f09-4a2b-b5bc-b9cce35fab5e'},
 {'step_type': 'ACTION',
  'sequence': 2,
  'tool_name': 'duckduckgo_search',
  'tool_input': {'query': 'largest city in Portugal'},
  'tool_call_id': '0d119326-3942-499a-be4a-51973df05241'},
 {'step_type': 'OBSERVATION',
  'sequence': 3,
  'tool_name': 'subtract_number',
  'tool_call_id': 'cb79a8a4-6f09-4a2b-b5bc-b9cce35fab5e',
  'output': '775.0'},
 {'step_type': 'OBSERVATION',
  'sequence': 4,
  'tool_name': 'duckduckgo_search',
  'tool_call_id': '0d119326-3942-499a-be4a-51973df05241',
  'output': 'As the most important city in the heavily industrialized northwest, many of the largest Portuguese c...'}]