## Task 5. Create and initialize a new Colab Enterprise notebook

In [1]:
%pip --version

pip 25.1.1 from /usr/local/lib/python3.11/dist-packages/pip (python 3.11)


In [2]:
%pip install pip==24.0

Collecting pip==24.0
  Downloading pip-24.0-py3-none-any.whl.metadata (3.6 kB)
Downloading pip-24.0-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 25.1.1
    Uninstalling pip-25.1.1:
      Successfully uninstalled pip-25.1.1
Successfully installed pip-24.0


In [3]:
%pip --version

pip 24.0 from /usr/local/lib/python3.11/dist-packages/pip (python 3.11)


In [4]:
%pip install --upgrade --user --quiet "google-cloud-aiplatform[agent_engines,evaluation,langchain]" \
    "langchain_google_vertexai" \
    "google-cloud-aiplatform" \
    "google-cloud-logging" \
    "google-cloud-aiplatform[autologging]" \
    "cloudpickle==3.0.0" \
    "pydantic>=2.10" \
    "requests"

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.9/17.9 MB[0m [31m76.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m150.3/150.3 kB[0m [31m13.6 MB/s[0m eta [36m0:00:00[0m
[0m

==> Restart the session now.

In [1]:
# General
import random
import string
import google.cloud.logging
import logging

from IPython.display import HTML, Markdown, display
import pandas as pd

# Build agent
import vertexai
from google.cloud import aiplatform
from vertexai import agent_engines

# Evaluate agent
from vertexai.preview.evaluation import EvalTask
from vertexai.preview.evaluation.metrics import (
    PointwiseMetric,
    PointwiseMetricPromptTemplate,
    TrajectorySingleToolUse,
)
from vertexai.preview.reasoning_engines import LangchainAgent

# Do not remove logging section
client = google.cloud.logging.Client()
client.setup_logging()

In [2]:
# Variable initialization
PROJECT_ID ="qwiklabs-gcp-03-d419223c1a66"
LOCATION ="us-central1"
BUCKET_URI ="gs://qwiklabs-gcp-03-d419223c1a66-experiments-staging-bucket"
EXPERIMENT_NAME ="evaluate-agent"

import vertexai

# Initialize vertexai
vertexai.init(
    # Fill in the appropriate configuration
    project=PROJECT_ID,
    location=LOCATION,
    staging_bucket=BUCKET_URI,
    experiment=EXPERIMENT_NAME
)

INFO:root:MetadataStore default not found.


## Task 6. Build agent and execute a query

In [3]:
# General
import random
import string

from IPython.display import HTML, Markdown, display

# Build agent
import vertexai
from google.cloud import aiplatform
from vertexai import agent_engines

from vertexai.preview.reasoning_engines import LangchainAgent

In [4]:
def get_product_details(product_name: str):
    """Gathers basic details about a product."""
    details = {
        "mens blue shorts": "Elevate your summer style with these tailored blue dress shorts. Featuring a modern slim fit and breathable cotton-blend fabric, they deliver all-day comfort with a refined look. Finished with a flat front, belt loops, and sleek pockets, they're perfect for everything from rooftop brunches to casual office days.",
        "floral dress": "Stay cool and stylish in this lightweight floral midi dress, featuring a flattering cinched waist, flowing A-line skirt, and delicate ruffle details. Perfect for sunny brunches or sunset strolls, it blends feminine charm with effortless ease for any summer occasion.",
        "garden furniture": "Create your perfect outdoor retreat with this stylish, weather-resistant garden furniture set featuring plush cushions, timeless design, and all-day comfort — ideal for relaxing, entertaining, or enjoying the outdoors in style.",
        "oled tv": "Experience stunning 4K clarity, vibrant color, and perfect blacks with this OLED Smart TV — featuring AI-powered optimization, cinematic sound, and a sleek design for immersive, next-gen home entertainment.",
        "dishwasher": "The SmartWash Dishwasher delivers powerful, quiet cleaning with advanced spray tech, smart cycles, and app control—saving time, water, and energy while adding modern style to your kitchen..",
    }
    return details.get(product_name, "Product details not found.")


def get_product_price(product_name: str):
    """Gathers price about a product."""
    details = {
        "mens blue shorts": 50,
        "floral dress": 100,
        "garden furniture": 1000,
        "oled tv": 1500,
        "dishwasher": 400,
    }
    return details.get(product_name, "Product price not found.")

In [5]:
model_name="gemini-2.0-flash"

In [6]:
local_1p_agent = LangchainAgent(
    model=model_name,
    tools=[get_product_details, get_product_price],
    agent_executor_kwargs={"return_intermediate_steps": True},
)

In [7]:
response = local_1p_agent.query(input="Get product details for garden furniture")
display(Markdown(response["output"]))

Ok. I have the following details about garden furniture: Create your perfect outdoor retreat with this stylish, weather-resistant garden furniture set featuring plush cushions, timeless design, and all-day comfort — ideal for relaxing, entertaining, or enjoying the outdoors in style.


In [8]:
response = local_1p_agent.query(input="Get product price for garden furniture")
display(Markdown(response["output"]))

The price of the garden furniture is 1000.


In [9]:
remote_1p_agent = agent_engines.create(
    local_1p_agent,
    requirements=[
        "google-cloud-aiplatform[agent_engines,langchain]",
        "langchain_google_vertexai",
        "cloudpickle==3.0.0",
        "pydantic>=2.10",
        "requests",
    ],
)

INFO:vertexai.agent_engines:Identified the following requirements: {'pydantic': '2.11.7', 'google-cloud-aiplatform': '1.100.0', 'cloudpickle': '3.0.0'}
INFO:vertexai.agent_engines:The final list of requirements: ['google-cloud-aiplatform[agent_engines,langchain]', 'langchain_google_vertexai', 'cloudpickle==3.0.0', 'pydantic>=2.10', 'requests']
INFO:vertexai.agent_engines:Using bucket qwiklabs-gcp-03-d419223c1a66-experiments-staging-bucket
INFO:vertexai.agent_engines:Wrote to gs://qwiklabs-gcp-03-d419223c1a66-experiments-staging-bucket/agent_engine/agent_engine.pkl
INFO:vertexai.agent_engines:Writing to gs://qwiklabs-gcp-03-d419223c1a66-experiments-staging-bucket/agent_engine/requirements.txt
INFO:vertexai.agent_engines:Creating in-memory tarfile of extra_packages
INFO:vertexai.agent_engines:Writing to gs://qwiklabs-gcp-03-d419223c1a66-experiments-staging-bucket/agent_engine/dependencies.tar.gz
INFO:vertexai.agent_engines:Creating AgentEngine
INFO:vertexai.agent_engines:Create AgentEngi

In [10]:
response = remote_1p_agent.query(input="Get product details for garden furniture")
display(Markdown(response["output"]))

The garden furniture set is stylish and weather-resistant, featuring plush cushions and a timeless design. It's designed for relaxation, entertaining, and enjoying the outdoors in style.


In [11]:
response = remote_1p_agent.query(input="Get product price for garden furniture")
display(Markdown(response["output"]))

The price of the garden furniture is 1000.


## Task 7. Evaluate the agent using Single Tool Selection

In [12]:
eval_data = {
    "prompt": [
        "Get price for mens blue shorts",
        "Get product details and price for floral dress",
    ],
    "reference_trajectory": [
        [
            {
                "tool_name": "get_product_price",
                "tool_input": {"product_name": "mens blue shorts"},
            }
        ],
        [
            {
                "tool_name": "get_product_details",
                "tool_input": {"product_name": "floral dress"},
            },
            {
                "tool_name": "get_product_price",
                "tool_input": {"product_name": "floral dress"},
            },
        ],
    ],
}

eval_sample_dataset = pd.DataFrame(eval_data)

In [13]:
EXPERIMENT_RUN = "qwiklabs-gcp-03-d419223c1a66-single-tool-use"

single_tool_usage_metrics = [TrajectorySingleToolUse(tool_name="get_product_price")]

single_tool_call_eval_task = EvalTask(
    # Fill in the appropriate configuration
    dataset=eval_sample_dataset,
    metrics=single_tool_usage_metrics,
    experiment=EXPERIMENT_NAME,
    output_uri_prefix=BUCKET_URI + "/single-metric-eval",
)


single_tool_call_eval_result = single_tool_call_eval_task.evaluate(
    runnable=remote_1p_agent, experiment_run_name=EXPERIMENT_RUN
)

INFO:vertexai.preview.evaluation.eval_task:Logging Eval experiment evaluation metadata: {'output_file': 'gs://qwiklabs-gcp-03-d419223c1a66-experiments-staging-bucket/single-metric-eval/eval_results_2025-06-28-21-36-14-9cb3c.csv'}
100%|██████████| 2/2 [00:01<00:00,  1.45it/s]
INFO:vertexai.preview.evaluation._evaluation:All 2 responses are successfully generated from the runnable.
INFO:vertexai.preview.evaluation._evaluation:Computing metrics with a total of 2 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 2/2 [00:00<00:00, 12.19it/s]
INFO:vertexai.preview.evaluation._evaluation:All 2 metric requests are successfully computed.
INFO:vertexai.preview.evaluation._evaluation:Evaluation Took:0.17666605799968238 seconds


In [17]:
display_eval_report(single_tool_call_eval_result)

### Summary Metrics

Unnamed: 0,row_count,trajectory_single_tool_use/mean,trajectory_single_tool_use/std,latency_in_seconds/mean,latency_in_seconds/std,failure/mean,failure/std
0,2.0,1.0,0.0,1.174266,0.236169,0.0,0.0


### Row-wise Metrics

Unnamed: 0,prompt,reference_trajectory,response,latency_in_seconds,failure,predicted_trajectory,trajectory_single_tool_use/score
0,Get price for mens blue shorts,"[{'tool_name': 'get_product_price', 'tool_inpu...",The price for mens blue shorts is $50.\n,1.007269,0,"[{'tool_name': 'get_product_price', 'tool_inpu...",1.0
1,Get product details and price for floral dress,"[{'tool_name': 'get_product_details', 'tool_in...",OK. I have the following information about the...,1.341263,0,"[{'tool_name': 'get_product_details', 'tool_in...",1.0


## Task 8. Evaluate the agent using Multiple Tool Selection (Trajectory) Evaluation

In [14]:
EXPERIMENT_RUN = "qwiklabs-gcp-03-d419223c1a66-agent-trajectory-evaluation"

trajectory_metrics = [
    "trajectory_exact_match",
    "trajectory_in_order_match",
    "trajectory_any_order_match",
    "trajectory_recall",
]

trajectory_eval_task = EvalTask(
    # Fill in the appropriate configuration
    dataset=eval_sample_dataset,
    metrics=trajectory_metrics,
    experiment=EXPERIMENT_NAME,
    output_uri_prefix=BUCKET_URI + "/single-metric-eval",
)

trajectory_eval_result = trajectory_eval_task.evaluate(runnable=remote_1p_agent,experiment_run_name=EXPERIMENT_RUN)

INFO:vertexai.preview.evaluation.eval_task:Logging Eval experiment evaluation metadata: {'output_file': 'gs://qwiklabs-gcp-03-d419223c1a66-experiments-staging-bucket/multiple-metric-eval/eval_results_2025-06-28-21-51-54-86d8d.csv'}
100%|██████████| 2/2 [00:01<00:00,  1.67it/s]
INFO:vertexai.preview.evaluation._evaluation:All 2 responses are successfully generated from the runnable.
INFO:vertexai.preview.evaluation._evaluation:Computing metrics with a total of 8 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 8/8 [00:00<00:00, 10.47it/s]
INFO:vertexai.preview.evaluation._evaluation:All 8 metric requests are successfully computed.
INFO:vertexai.preview.evaluation._evaluation:Evaluation Took:0.7770108800004891 seconds


In [16]:
def display_eval_report(eval_result: pd.DataFrame) -> None:
    """Display the evaluation results."""
    metrics_df = pd.DataFrame.from_dict(eval_result.summary_metrics, orient="index").T
    display(Markdown("### Summary Metrics"))
    display(metrics_df)

    display(Markdown("### Row-wise Metrics"))
    display(eval_result.metrics_table)

display_eval_report(trajectory_eval_result)

### Summary Metrics

Unnamed: 0,row_count,trajectory_exact_match/mean,trajectory_exact_match/std,trajectory_in_order_match/mean,trajectory_in_order_match/std,trajectory_any_order_match/mean,trajectory_any_order_match/std,trajectory_recall/mean,trajectory_recall/std,latency_in_seconds/mean,latency_in_seconds/std,failure/mean,failure/std
0,2.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.09125,0.148796,0.0,0.0


### Row-wise Metrics

Unnamed: 0,prompt,reference_trajectory,response,latency_in_seconds,failure,predicted_trajectory,trajectory_exact_match/score,trajectory_in_order_match/score,trajectory_any_order_match/score,trajectory_recall/score
0,Get price for mens blue shorts,"[{'tool_name': 'get_product_price', 'tool_inpu...",The price for mens blue shorts is $50.\n,0.986035,0,"[{'tool_name': 'get_product_price', 'tool_inpu...",1.0,1.0,1.0,1.0
1,Get product details and price for floral dress,"[{'tool_name': 'get_product_details', 'tool_in...",The floral dress is a lightweight floral midi ...,1.196465,0,"[{'tool_name': 'get_product_details', 'tool_in...",1.0,1.0,1.0,1.0


## Task 9. Evaluate the quality of the agent response to prompts

In [18]:
response_metrics = ["safety", "coherence"]

In [19]:
EXPERIMENT_RUN = "qwiklabs-gcp-03-d419223c1a66-agent-response-evaluation"

response_eval_task = EvalTask(
    # Fill in the appropriate configuration
    dataset=eval_sample_dataset,
    metrics=response_metrics,
    experiment=EXPERIMENT_NAME,
    output_uri_prefix=BUCKET_URI + "/response-metric-eval",
)

response_eval_result = response_eval_task.evaluate(runnable=remote_1p_agent,experiment_run_name=EXPERIMENT_RUN)


INFO:vertexai.preview.evaluation.eval_task:Logging Eval experiment evaluation metadata: {'output_file': 'gs://qwiklabs-gcp-03-d419223c1a66-experiments-staging-bucket/response-metric-eval/eval_results_2025-06-28-21-56-59-fcaba.csv'}
100%|██████████| 2/2 [00:01<00:00,  1.56it/s]
INFO:vertexai.preview.evaluation._evaluation:All 2 responses are successfully generated from the runnable.
INFO:vertexai.preview.evaluation._evaluation:Computing metrics with a total of 4 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 4/4 [00:00<00:00,  4.36it/s]
INFO:vertexai.preview.evaluation._evaluation:All 4 metric requests are successfully computed.
INFO:vertexai.preview.evaluation._evaluation:Evaluation Took:0.9361447480005154 seconds


In [20]:
display(response_eval_result.metrics_table)

Unnamed: 0,prompt,reference_trajectory,response,latency_in_seconds,failure,predicted_trajectory,safety/explanation,safety/score,coherence/explanation,coherence/score
0,Get price for mens blue shorts,"[{'tool_name': 'get_product_price', 'tool_inpu...",The price for mens blue shorts is 50.\n,0.932998,0,"[{'tool_name': 'get_product_price', 'tool_inpu...",The response is safe because it does not conta...,1.0,The response is completely coherent as it prov...,5.0
1,Get product details and price for floral dress,"[{'tool_name': 'get_product_details', 'tool_in...",OK. The floral dress is a lightweight floral m...,1.281225,0,"[{'tool_name': 'get_product_details', 'tool_in...",The response is safe as it only provides produ...,1.0,The response is completely coherent because it...,5.0
