#### This notebook showcases how to build, run, and evaluate different **Flotorch ADK Agents** with SDK tracing and LLM-based evaluation.
- Sets up a FlotorchADK agent configured on Flotorch Console
- Create tools and connects everything with the agent.
- Sets up Tracing along with the agent
- Use the trace to evaluate the agent's response on different metrics


In [None]:
!pip install pandas jinja2

In [None]:
import os
import time
from typing import List
from google.adk.runners import Runner
from flotorch.adk.agent import FlotorchADKAgent
from flotorch.adk.sessions import FlotorchADKSession
from flotorch.adk.memory import FlotorchMemoryService
from google.genai import types
from google.adk.tools import FunctionTool

# Evaluation
from flotorch_eval.agent_eval.metrics.llm_evaluators import (
    TrajectoryEvalWithLLM, 
    TrajectoryEvalWithLLMWithReference, 
    ToolCallAccuracy, 
    AgentGoalAccuracy
    )
from flotorch_eval.agent_eval.metrics.usage_metrics import UsageMetric
from flotorch_eval.agent_eval.metrics.latency_metrics import LatencyMetric
from flotorch_eval.agent_eval.core.client import FlotorchEvalClient
from evaluation_utils import display_evaluation_results


##### Set up the Flotorch API key and base URL
##### This is the only configuration you need to do as it will be used for the agents, tracing and evaluation.


In [None]:
FLOTORCH_GATEWAY_BASE_URL = ""
FLOTORCH_API_KEY = ""
MEMORY_PROVIDER = ""
evaluation_llm_model = ""

### Creating a Runner Factory and a function to run the runner with a query

##### FlotorchADKAgent can be set up using the API key and base url. It can build the agent and the session it requires.

In [None]:
def create_runner(agent_name: str, tools: List[FunctionTool], enable_sdk_tracing: bool, app_name: str):
    """Factory to create runner for SDK/ADK tracing modes."""
    os.environ["FLOTORCH_ENABLE_SDK_TRACING"] = str(enable_sdk_tracing).lower()

    agent_client = FlotorchADKAgent(
        agent_name=agent_name,
        enable_memory=True,
        custom_tools=tools,
        base_url=FLOTORCH_GATEWAY_BASE_URL,
        api_key=FLOTORCH_API_KEY,
        tracer_config={
            "enabled": True, 
            "sampling_rate": 1
        }
    )
    agent = agent_client.get_agent()

    session_service = FlotorchADKSession(
        api_key=FLOTORCH_API_KEY,
        base_url=FLOTORCH_GATEWAY_BASE_URL,
    )

    memory_service = FlotorchMemoryService(
        name=MEMORY_PROVIDER,
        api_key=FLOTORCH_API_KEY,
        base_url=FLOTORCH_GATEWAY_BASE_URL,
    )

    return Runner(agent=agent, app_name=app_name, session_service=session_service, memory_service=memory_service), agent_client

# Single Turn Query
def run_single_turn(runner: Runner, query: str, session_id: str, user_id: str) -> str:
    """Send query to agent and return final response text."""
    content = types.Content(role="user", parts=[types.Part(text=query)])
    events = runner.run(user_id=user_id, session_id=session_id, new_message=content)

    for event in events:
        if event.is_final_response() and event.content and event.content.parts:
            return event.content.parts[0].text
    return "No response from agent."

## Evaluation  

- The `FlotorchEvalClient` supports the following metrics:  
  - **TrajectoryEvalWithLLM**  
  - **TrajectoryEvalWithLLMWithReference**
  - **ToolCallAccuracy**  
  - **AgentGoalAccuracy**  
  - **UsageMetric**  
  - **LatencyMetric**  

- `FlotorchEvalClient` only requires an **API key** and **base URL**.  
- Evaluations can be triggered by simply providing a **trace ID**.  
- By default, all available metrics are evaluated.  
- **TrajectoryEvalWithLLMWithReference** is evaluated only if reference is provided.
- A default evaluator can be set on the client, which is used for all metrics that require an LLM.  
- To run specific metrics — or to use a particular model for a given metric — you can define them explicitly and pass them during evaluation.  

**Example:**  

```python
metrics = [
    TrajectoryEvalWithLLM(llm="flotorch/gpt-4o")
    ]
client.evaluate(trace_id, metrics)
```

In [None]:
async def evaluate_trajectory(trace_id, reference=None, reference_id=None):
    start_time = time.time()
    
    if reference and reference_id:
        raise ValueError("Provide either 'reference' or 'reference_trace_id', not both.")

    client = FlotorchEvalClient(
        api_key=FLOTORCH_API_KEY,
        base_url=FLOTORCH_GATEWAY_BASE_URL,
        default_evaluator=evaluation_llm_model) # Setting a default evaluator for all metrics that require an LLM.

    traces = client.fetch_traces(trace_id)
    print(f"Traces: {traces}")
    results = await client.evaluate( # Metrics can be optionally provided as a list or a single metric
        trace=traces,
        reference=reference,
        reference_trace_id=reference_id
    )
    
    display_evaluation_results(results)
    print(results.model_dump_json(indent=4))
    end_time = time.time()
    time_taken = round(end_time - start_time, 2)
    print(f"Time taken for evaluation: {time_taken} seconds")

## 1. Text Analysis Agent  
- This is a FlotorchADK agent that has access to a single tool.
- **Goal:** Analyze a sentence using the `sentence_breakdown` tool.  
- **Demo Flow:**  
  1. Run agent with a query.  
  2. Capture trace.  
  3. Evaluate trajectory against a reference.

In [None]:
# Text Analysis Tool
def sentence_breakdown(sentence: str) -> str:
    """
    Break down a sentence into counts of words, characters, and letters.
    Args:
        sentence (str): The input sentence.
    Returns:
        str: A summary of the breakdown.
    """
    words = sentence.split()
    num_words = len(words)
    num_chars = len(sentence)
    num_letters = sum(c.isalpha() for c in sentence)
    num_digits = sum(c.isdigit() for c in sentence)
    num_spaces = sum(c.isspace() for c in sentence)

    return (
        f"Sentence Breakdown:\n"
        f"- Words: {num_words}\n"
        f"- Characters (including spaces): {num_chars}\n"
        f"- Letters: {num_letters}\n"
        f"- Digits: {num_digits}\n"
        f"- Spaces: {num_spaces}"
    )



#### Here we're executing the text-analyzer-agent using the FlotorchADKAgent

In [None]:
# Main Async Execution
async def run_agent_in_sdk_mode():
    agent_name = "text-analyzer-agent"  # Name of the agent as in the Flotorch Console
    APP_NAME = "text_analysis_app"
    USER_ID = "text_user_001"
    tools = [FunctionTool(sentence_breakdown)]  # List of tools to be used by the agent
    enable_sdk_tracing = False  # Whether to use SDK tracing or not

    runner, agent_client = create_runner(agent_name, tools, enable_sdk_tracing, APP_NAME)

    session1 = await runner.session_service.create_session(
        app_name=APP_NAME, user_id=USER_ID,
    )

    query = "The quick brown fox jumps over 13 lazy dogs."
    response = run_single_turn(runner, query, session1.id, USER_ID)

    completed_session = await runner.session_service.get_session(
        app_name=APP_NAME, user_id=USER_ID, session_id=session1.id
    )
    await runner.memory_service.add_session_to_memory(completed_session)

    print("=== SDK Tracing Mode Response ===")
    print(response)

    return agent_client, response


async def main():
    agent_client, response = await run_agent_in_sdk_mode()
    return agent_client

agent_client = await main()


#### Create a reference trajectory that will be used as a reference for **TrajectoryEvalWithLLMWithReference**

You can create a reference in two ways:

- Manually: Write the reference yourself in the required format.

- Using an existing trace: If you already have a trace ID from a previous run, you can pass it to the evaluate method via the reference_trace_id parameter.

`Note`: You can provide either a manual reference or a trace ID, but not both at the same time.

In [None]:
# Sample reference trajectory

REFERENCE_TRAJECTORY = {
    "input": "What is the weather in London and what is 2+2?",
    "expected_steps": [
        {
            "thought": "The user has two distinct questions. I need to get the weather for 'London' first.",
            "tool_call": {
                "name": "get_weather",
                "arguments": {"city": "London"}
            }
        },
        {
            "thought": "Now that I've handled the weather, I need to solve the mathematical expression '2+2'.",
            "tool_call": {
                "name": "calculator",
                "arguments": {"expression": "2+2"}
            }
        },
        {
            "thought": "I have successfully gathered all the necessary information. I will now synthesize the results into a final, coherent answer for the user.",
            "final_response": "The weather in London is currently sunny, and the result of 2+2 is 4."
        }
    ]
}

from flotorch_eval.agent_eval.core.schemas import ReferenceTrajectory
validated_ref = ReferenceTrajectory(**REFERENCE_TRAJECTORY)
print(validated_ref)


#### Evaluate the trajectory using the trace id and the reference trajectory
- A breakdown of all the evaluation will be displayed where each metrics and it's corresponding score (if relevant) and a justification for the score will be provided.

In [None]:
async def main():
    trace_ids = agent_client.get_tracer_ids() 
    for trace_id in trace_ids:
        if trace_id:
            print(f"Evaluating trace id: {trace_id}")
            await evaluate_trajectory(
                trace_id=trace_id,
                reference=REFERENCE_TRAJECTORY
            )

await main()  

## 2. Weather Report Simulator  
- This is a FlotorchADK agent that has access to multiple tools.
- **Goal:** Generate a weather report for a given city.  
- **Tools Used:**  
  - `get_temperature` → Random temperature.  
  - `get_conditions` → Random weather condition.  
  - `generate_advice` → Suggests advice based on condition.  
- **Demo Flow:**  
  1. Agent queries tools in sequence.  
  2. Response combines temperature + condition + advice.  
  3. Evaluated against reference trajectory.

In [None]:
import random

def get_temperature(city: str) -> int:
    """Return a random temperature in Celsius for the given city.
    
    Args:
        city (str): The name of the city.

    Returns:
        int: A random temperature in Celsius.
    """
    return random.randint(-5, 40)

def get_conditions(city: str) -> str:
    """Return a random weather condition for the given city.
    
    Args:
        city (str): The name of the city.

    Returns:
        str: A random weather condition.
    """
    conditions = ["sunny", "rainy", "cloudy", "stormy", "snowy"]
    return random.choice(conditions)

def generate_advice(condition: str) -> str:
    """Return advice based on the weather condition.
    
    Args:
        condition (str): The weather condition.

    Returns:
        str: Advice based on the weather condition.
    """
    if condition == "rainy":
        return "Carry an umbrella with you."
    elif condition == "sunny":
        return "Wear sunscreen and stay hydrated."
    elif condition == "cloudy":
        return "A light jacket might be useful."
    elif condition == "stormy":
        return "Stay indoors if possible and be cautious."
    elif condition == "snowy":
        return "Dress warmly and watch for icy roads."
    return "Have a great day!"


In [None]:

# Main Async Execution
async def run_agent_in_sdk_mode():
    APP_NAME = "weather_report_app"
    USER_ID = "weather_user_001"
    agent_name = "weather-report-simulator"
    tools = [FunctionTool(get_temperature), FunctionTool(get_conditions), FunctionTool(generate_advice)]
    enable_sdk_tracing = True

    runner, agent_client = create_runner(agent_name, tools, enable_sdk_tracing, APP_NAME)
    session = await runner.session_service.create_session(app_name=APP_NAME, user_id=USER_ID)

    query = "Generate today’s weather report for Paris"
    response = run_single_turn(runner, query, session.id, USER_ID)

    print("=== SDK Tracing Mode Response ===")
    print(response)
    return agent_client, response

async def main():
    agent_client, response = await run_agent_in_sdk_mode()
    return agent_client

agent_client = await main()  

In [None]:
REFERENCE_TRAJECTORY = {
    "context": "What is the weather report for Paris?",
    "goal": "Generate a weather report for the requested city, including temperature, condition, and advice.",
    "examples": [
        {
            "thought": "The user wants a weather report for Paris. I need to get the temperature, the weather condition, and then generate advice based on the condition. I will call the tools in this order: `get_temperature`, `get_conditions`, and `generate_advice`.",
            "actions": [
                {
                    "tool_name": "get_temperature",
                    "parameters": {
                        "city": "Paris"
                    }
                },
                {
                    "tool_name": "get_conditions",
                    "parameters": {
                        "city": "Paris"
                    }
                }
            ],
            "response": "null"
        },
        {
            "thought": "I have the temperature (15°C) and the condition ('rainy') for Paris. Now I need to get the appropriate advice for a rainy day by calling the `generate_advice` tool.",
            "actions": [
                {
                    "tool_name": "generate_advice",
                    "parameters": {
                        "condition": "rainy"
                    }
                }
            ],
            "response": "null"
        },
        {
            "thought": "I have successfully gathered all the necessary information: the temperature in Paris is 15°C, the condition is rainy, and the advice is to carry an umbrella. Now I will combine this information into a comprehensive weather report for the user.",
            "actions": [],
            "response": "The weather in Paris is currently 15°C and rainy. Carry an umbrella with you."
        }
    ]
}


In [None]:
async def main():
    trace_ids = agent_client.get_tracer_ids() 
    for trace_id in trace_ids:
        if trace_id:
            print(f"Evaluating trace id: {trace_id}")
            await evaluate_trajectory(
                trace_id=trace_id,
                reference=REFERENCE_TRAJECTORY
            )

await main()  

## 3. Fun Fact Generator
- This is an agent that do not have access to any tools. If the agent is still able to answer the query without using any tools, it can be considered a successful run.
- **Goal:** Respond with a fun fact.  
- **Tools:** None (LLM-only reasoning).  
- **Demo Flow:**  
  1. Agent generates fun fact.  
  2. Evaluated without reference trajectory.

In [None]:

# Main Async Execution
async def run_agent_in_sdk_mode():
    agent_name = "fun-fact-generator"
    APP_NAME = "fun_fact_app"
    USER_ID = "fun_fact_user_001"
    tools = []
    enable_sdk_tracing = True

    runner, agent_client = create_runner(agent_name, tools, enable_sdk_tracing, APP_NAME)
    session = await runner.session_service.create_session(app_name=APP_NAME, user_id=USER_ID)

    query = "Tell me a fun fact."
    response = run_single_turn(runner, query, session.id, USER_ID)

    print("=== SDK Tracing Mode Response ===")
    print(response)
    return agent_client, response

async def main():
    agent_client, response = await run_agent_in_sdk_mode()
    return agent_client

agent_client = await main()

In [None]:
async def main():
    trace_ids = agent_client.get_tracer_ids() 
    for trace_id in trace_ids:
        if trace_id:
            print(f"Evaluating trace id: {trace_id}")
            await evaluate_trajectory(
                trace_id=trace_id
            )

await main()  