# Llama Stack - Agents Evaluation

In [66]:
import os
# from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
from llama_stack_client import LlamaStackClient

# client = LlamaStackAsLibraryClient(
#     "together", provider_data = {"tavily_search_api_key": os.environ['TAVILY_SEARCH_API_KEY']})

client = LlamaStackClient(base_url="http://localhost:8321")
# _ = client.initialize()

#### Building a Search Agent

In [67]:
from llama_stack_client.types.agent_create_params import AgentConfig
from llama_stack_client.lib.agents.agent import Agent

model_id = "meta-llama/Llama-3.3-70B-Instruct"

agent_config_with_websearch = AgentConfig(
    model=model_id,
    instructions="You are a helpful assistant that can answer questions by searching the web.",
    sampling_params={
        "strategy": {"type": "top_p", "temperature": 1.0, "top_p": 0.9},
    },
    toolgroups=["builtin::websearch"],
    tool_config={
        "tool_choice": "auto",
        "tool_prompt_format": "python_list",
    },
    input_shields=[],
    output_shields=[],
    enable_session_persistence=False,
)

In [68]:
import uuid
from datasets import load_dataset
from rich.pretty import pprint

# 0. Create an agent
agent = Agent(client, agent_config_with_websearch)

# 2. Send it a couple of questions in the sesison. Here, we get the first 10 questions from the Simple QA dataset.
ds = load_dataset("llamastack/evals", "evals__simpleqa")
questions = []
answers = []
for idx, x in enumerate(ds["train"]):
    if idx >= 1:
        break
    questions.append(x["input_query"])
    answers.append(x["expected_answer"])

for query in questions:
    # create a new session for each question
    session_id = agent.create_session(f"{uuid.uuid4().hex}")
    response = agent.create_turn(
        [
            {
                "role": "user",
                "content": query,
            }
        ], 
        session_id=session_id,
        stream=False
    )
    pprint(response)

### Examine agent logs

In [70]:
# get the agent session ids
session_ids = agent.sessions
agent_id = agent.agent_id

session_response = client.agents.session.retrieve(
    session_id=session_ids[0],
    agent_id=agent_id,
)
pprint(session_response)

turn_response = client.agents.turn.retrieve(
    session_id=session_ids[0],
    agent_id=agent_id,
    turn_id=session_response.turns[0].turn_id,
)
pprint(turn_response)

step_response = client.agents.steps.retrieve(
    session_id=session_ids[0],
    agent_id=agent_id,
    turn_id=session_response.turns[0].turn_id,
    step_id=turn_response.steps[0].step_id,
)
pprint(step_response)