## Task 1. Initialize Vertex AI in a Colab Enterprise notebook

In [1]:
%pip install --upgrade --quiet google-cloud-aiplatform google-cloud-aiplatform[evaluation]

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/7.7 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/7.7 MB[0m [31m63.5 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m7.7/7.7 MB[0m [31m113.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m73.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m118.6/118.6 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m739.1/739.1 kB[0m [31m50.9 MB/s[0m eta [36m0:00:00[0m
[?25h

restart the Runtime

In [1]:
# imports & configuration
import datetime
import nest_asyncio
import pandas as pd
from IPython.display import display, Markdown, HTML

import vertexai
from vertexai.generative_models import GenerativeModel

pd.set_option('display.max_colwidth', None)

In [2]:
# initialize Vertex AI
PROJECT_ID = "qwiklabs-gcp-02-095fe579c45b"
LOCATION = "us-west1"
import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)

## Task 2. Explore example data and use it to generate content

In [3]:
# download some example data
!gcloud storage cp gs://partner-genai-bucket/genai065/apartment_table.csv .

Copying gs://partner-genai-bucket/genai065/apartment_table.csv to file://./apartment_table.csv


In [4]:
!head apartment_table.csv

Address,Unit,Sqft,Bedrooms,Elevator,Washer & Dryer in Unit,Pets Allowed,Notable features
"123 West 14th Street, New York, NY 10014",2E,550,2,yes,no,yes,"doorman, pool in the building, shared roof deck with grills"
"456 East 57th Street, New York, NY 10022",1A,789,1,no,yes,no,"bike room, package service"
"789 Broadway, New York, NY 10003",C,999,3,yes,yes,yes,"excellent laundry room, great city views"
"1011 5th Avenue, New York, NY 10028",30,1024,2,no,no,yes,"great view of Central Park, high ceilings"
"2222 Park Avenue, New York, NY 10017",4F,1234,1,no,yes,no,right next to soccer fields at the park
"3333 7th Avenue, Brooklyn, NY 11201",3B,1456,1,yes,yes,no,great restaurant row nearby
"4444 Flatbush Avenue, Brooklyn, NY 11226",PHA,1678,1,yes,no,yes,"enormous bedrooms, new kitchen"
"5555 Amsterdam Avenue, New York, NY 10024",23,1800,4,no,yes,yes,private terrace with umbrellas
"6666 1st Avenue, New York, NY 10028",Unit C,1900,2,yes,yes,no,back yard


In [5]:
# Load it into a Pandas DataFrame, and view the first few rows:
apartment_df = pd.read_csv("apartment_table.csv")
apartment_df.head()

Unnamed: 0,Address,Unit,Sqft,Bedrooms,Elevator,Washer & Dryer in Unit,Pets Allowed,Notable features
0,"123 West 14th Street, New York, NY 10014",2E,550,2,yes,no,yes,"doorman, pool in the building, shared roof deck with grills"
1,"456 East 57th Street, New York, NY 10022",1A,789,1,no,yes,no,"bike room, package service"
2,"789 Broadway, New York, NY 10003",C,999,3,yes,yes,yes,"excellent laundry room, great city views"
3,"1011 5th Avenue, New York, NY 10028",30,1024,2,no,no,yes,"great view of Central Park, high ceilings"
4,"2222 Park Avenue, New York, NY 10017",4F,1234,1,no,yes,no,right next to soccer fields at the park


In [6]:
# Restructure the data into a list of records with keys identified for each record, and view the first record.
apartment_records = apartment_df.to_dict(orient='records')
apartment_records[0]

{'Address': '123 West 14th Street, New York, NY 10014',
 'Unit': '2E',
 'Sqft': 550,
 'Bedrooms': 2,
 'Elevator': 'yes',
 'Washer & Dryer in Unit': 'no',
 'Pets Allowed': 'yes',
 'Notable features': 'doorman, pool in the building, shared roof deck with grills'}

### Note: enable "Gemini API"

In [9]:
# Now it’s time to generate some text. Instantiate a generative model, define a prompt with some instructions for it, and generate content based on the example data:
model = GenerativeModel(
  "gemini-2.5-pro",
  generation_config={
      "temperature": 0,
      "top_p": 0.4,
  },
)

prompt = "Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: "

# View the response using Markdown to format it nicely for notebook viewing
Markdown(model.generate_content(prompt + str(apartment_records[0])).text)

Experience the ultimate New York lifestyle in this exceptional 2-bedroom home, perfectly situated at the vibrant crossroads of the West Village and Chelsea. Your new life at 123 West 14th Street includes the unparalleled luxury of a full-time doorman and an elevator, but the true showstoppers are the exclusive resident amenities. Spend your summers lounging by the stunning indoor pool, or host unforgettable evenings with friends on the spectacular shared roof deck, complete with grills and breathtaking city views. Inside your pet-friendly sanctuary, you'll find a smartly designed layout that offers the rare flexibility of a second bedroom, perfect for a home office or guest room. This isn't just an apartment; it's your all-access pass to the best of city living.

## Task 3. Configure and trigger a model-based evaluation

In [10]:
# To create an evaluation dataset, create the prompt for each example,
# which will consist of the prompt instructions you defined earlier
# and the context data for each apartment.

# Context is the supplemental information you provide the
# model, usually specific to a given query or example,
# that it needs to fulfill your instructions.
# In this case, the context is each apartment record.
contexts = [str(record) for record in apartment_records]

# The full prompt combines the prompt instructions you
# created earlier with the context for each apartment.
full_prompts = [prompt + str(record) for record in apartment_records]

print(full_prompts[0])

Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '123 West 14th Street, New York, NY 10014', 'Unit': '2E', 'Sqft': 550, 'Bedrooms': 2, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'doorman, pool in the building, shared roof deck with grills'}


In [11]:
# Instead of generating the responses, yourself, create an evaluation dataset of just the prompts,
# and the evaluation service will generate responses for you as part of the evaluation task.

# Note: Because of Qwiklabs quota limitations, you will limit your evaluation dataset to 5 examples.
# But the best practice recommendation would be to include around 100 examples (400-600 ?)
# covering the types of inputs your model might see.

eval_dataset = pd.DataFrame({
    "prompt": full_prompts[0:5],
})

In [12]:
eval_dataset

Unnamed: 0,prompt
0,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '123 West 14th Street, New York, NY 10014', 'Unit': '2E', 'Sqft': 550, 'Bedrooms': 2, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'doorman, pool in the building, shared roof deck with grills'}"
1,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '456 East 57th Street, New York, NY 10022', 'Unit': '1A', 'Sqft': 789, 'Bedrooms': 1, 'Elevator': 'no', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'no', 'Notable features': 'bike room, package service'}"
2,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '789 Broadway, New York, NY 10003', 'Unit': 'C', 'Sqft': 999, 'Bedrooms': 3, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'yes', 'Notable features': 'excellent laundry room, great city views'}"
3,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '1011 5th Avenue, New York, NY 10028', 'Unit': '30', 'Sqft': 1024, 'Bedrooms': 2, 'Elevator': 'no', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'great view of Central Park, high ceilings'}"
4,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '2222 Park Avenue, New York, NY 10017', 'Unit': '4F', 'Sqft': 1234, 'Bedrooms': 1, 'Elevator': 'no', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'no', 'Notable features': 'right next to soccer fields at the park'}"


In [13]:
# Now you'll explore some of the classes available for evaluation. Run the following imports and print the list of available MetricPromptTemplateExamples:

from vertexai.evaluation import (
    MetricPromptTemplateExamples,
    EvalTask,
    PairwiseMetric,
    PairwiseMetricPromptTemplate,
    PointwiseMetric,
    PointwiseMetricPromptTemplate,
)

MetricPromptTemplateExamples.list_example_metric_names()

['coherence',
 'fluency',
 'safety',
 'groundedness',
 'instruction_following',
 'verbosity',
 'text_quality',
 'summarization_quality',
 'question_answering_quality',
 'multi_turn_chat_quality',
 'multi_turn_safety',
 'pairwise_coherence',
 'pairwise_fluency',
 'pairwise_safety',
 'pairwise_groundedness',
 'pairwise_instruction_following',
 'pairwise_verbosity',
 'pairwise_text_quality',
 'pairwise_summarization_quality',
 'pairwise_question_answering_quality',
 'pairwise_multi_turn_chat_quality',
 'pairwise_multi_turn_safety']

In [14]:
# Instead of using the documentation to review the prompt as you did earlier, you can view the criteria and ratings for each metric within your code like so:
print(MetricPromptTemplateExamples.get_prompt_template('groundedness'))


# Instruction
You are an expert evaluator. Your task is to evaluate the quality of the responses generated by AI models.
We will provide you with the user input and an AI-generated response.
You should first read the user input carefully for analyzing the task, and then evaluate the quality of the responses based on the criteria provided in the Evaluation section below.
You will assign the response a rating following the Rating Rubric and Evaluation Steps. Give step by step explanations for your rating, and only choose ratings from the Rating Rubric.


# Evaluation
## Metric Definition
You will be assessing groundedness, which measures the ability to provide or reference information included only in the user prompt.

## Criteria
Groundedness: The response contains information included only in the user prompt. The response does not reference any outside information.

## Rating Rubric
1: (Fully grounded). All aspects of the response are attributable to the context.
0: (Not fully grounde

In [15]:
# Instantiate an EvalTask by associating your dataset & selected metric.
# You can also provide an experiment name to track your evaluations
# in Vertex AI Experiments.

eval_task = EvalTask(
  dataset=eval_dataset,
  metrics=[MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS],
  experiment="apartment-listing-generation",
)

In [17]:
# Run the evaluate() method on the task by giving it a unique run name. By
# passing the model you would like to evaluate, the EvalTask can generate the
# responses needed to complete the evaluation dataset.

run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
eval_result = eval_task.evaluate(
  model=model,
  experiment_run_name=f"apt-gen-{run_ts}"
  )

# You might want to keep track of your results in a list
# which you will use to plot your results later on
eval_results_to_compare = []
eval_results_to_compare.append(eval_result)

INFO:vertexai.evaluation.eval_task:Logging Eval Experiment metadata: {'model_name': 'publishers/google/models/gemini-2.5-pro', 'temperature': 0, 'top_p': 0.4}
INFO:vertexai.evaluation._evaluation:Generating a total of 5 responses from Gemini model gemini-2.5-pro.
100%|██████████| 5/5 [00:20<00:00,  4.02s/it]
INFO:vertexai.evaluation._evaluation:All 5 responses are successfully generated from Gemini model gemini-2.5-pro.
INFO:vertexai.evaluation._evaluation:Multithreaded Batch Inference took: 20.10737628800007 seconds.
INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 5 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 5/5 [00:05<00:00,  1.02s/it]
INFO:vertexai.evaluation._evaluation:All 5 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:5.118706190000012 seconds


In [18]:
# The evaluation service's results consist of three parts: a complete table of results (metrics_table),
# some summary metrics with averages and standard deviation of the metrics (summary_metrics),
# and some metadata related to the run. First look at the summary metrics:

eval_result.summary_metrics

{'row_count': 5, 'groundedness/mean': np.float64(1.0), 'groundedness/std': 0.0}

In [19]:
# Next, view the complete results table:
eval_result.metrics_table

Unnamed: 0,prompt,response,groundedness/explanation,groundedness/score
0,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '123 West 14th Street, New York, NY 10014', 'Unit': '2E', 'Sqft': 550, 'Bedrooms': 2, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'doorman, pool in the building, shared roof deck with grills'}","Escape the ordinary and embrace resort-style living in the absolute heart of New York City at 123 West 14th Street. This pet-friendly, doorman elevator building offers an unparalleled lifestyle, featuring a stunning resident-only pool for your daily escape and a magnificent shared roof deck complete with grills and panoramic city views, perfect for entertaining. Your new home, a charming and efficiently designed two-bedroom sanctuary, provides the perfect base to explore the vibrant crossroads of Greenwich Village, Chelsea, and the Meatpacking District. This isn't just an apartment; it's your all-access pass to the ultimate New York experience, blending incredible amenities with an unbeatable location.","All the information in the response is contained within the prompt, specifically the address, pet friendly, elevator, bedrooms, and amenities.",1.0
1,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '456 East 57th Street, New York, NY 10022', 'Unit': '1A', 'Sqft': 789, 'Bedrooms': 1, 'Elevator': 'no', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'no', 'Notable features': 'bike room, package service'}","Discover your personal oasis in the heart of Manhattan at the prestigious 456 East 57th Street. This exceptionally spacious 789 sqft one-bedroom apartment offers a rare combination of classic charm and modern convenience, creating the perfect city sanctuary. Step inside to find a thoughtfully designed layout that includes the ultimate urban luxury: your very own in-unit washer and dryer. Your seamless lifestyle is further enhanced by building amenities such as a secure package service and a dedicated bike room, perfect for exploring the vibrant neighborhood. This is not just an apartment; it's your opportunity to experience sophisticated New York living at its absolute finest.","All information provided in the response is found in the prompt, including the address, sqft, number of bedrooms, and amenities.",1.0
2,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '789 Broadway, New York, NY 10003', 'Unit': 'C', 'Sqft': 999, 'Bedrooms': 3, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'yes', 'Notable features': 'excellent laundry room, great city views'}","Experience the pinnacle of New York living in this spectacular and rarely available 3-bedroom, 999-square-foot home in the heart of the city at 789 Broadway. Step inside this expansive, pet-friendly residence and be captivated by breathtaking city views that provide a stunning backdrop to your daily life. Located in a desirable elevator building, convenience is paramount, highlighted by the ultimate urban luxury: your very own in-unit washer and dryer. This is more than just an apartment; it's a sprawling sanctuary offering an unparalleled lifestyle, combining modern amenities with an iconic, vibrant downtown address.","All of the information in the response is directly from the prompt, so it is fully grounded.",1.0
3,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '1011 5th Avenue, New York, NY 10028', 'Unit': '30', 'Sqft': 1024, 'Bedrooms': 2, 'Elevator': 'no', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'great view of Central Park, high ceilings'}","Experience the pinnacle of New York living from this spectacular Fifth Avenue residence, where breathtaking, direct views of Central Park serve as your daily masterpiece. Sunlight pours into this expansive 1024 sqft home, amplified by soaring high ceilings that create an airy and elegant atmosphere throughout the spacious two-bedroom layout. This is a rare opportunity to inhabit a true sanctuary above the city's most iconic street, offering a perfect blend of classic grandeur and an unbeatable location. Your furry friends are welcome to enjoy this quintessential New York dream with you, so don't miss your chance to call one of the world's most prestigious addresses your own.","The response is fully grounded, as it only uses the information provided in the prompt.",1.0
4,"Write a one paragraph apartment listing to promote this apartment. Make it sound amazing: {'Address': '2222 Park Avenue, New York, NY 10017', 'Unit': '4F', 'Sqft': 1234, 'Bedrooms': 1, 'Elevator': 'no', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'no', 'Notable features': 'right next to soccer fields at the park'}","Discover your personal sanctuary on prestigious Park Avenue in this exceptionally rare, sprawling one-bedroom home. Offering an incredible 1,234 square feet of living space, this apartment provides a scale and comfort seldom found in the city. Enjoy the ultimate urban luxury of your own in-unit washer and dryer, and step right outside to the vibrant energy of lush park-side soccer fields, your new backyard for recreation and relaxation. Nestled as a peaceful fourth-floor walk-up, this stunning residence is more than just an apartment—it's your private oasis in the heart of New York.",All aspects of the response are attributable to the context because the response references only information found in the prompt.,1.0


## Task 4. Improve the prompt to see scores improve

In [21]:
# Improve the prompt to improve your groundedness score. With the new prompt, also create a new evaluation dataset.

# This is a minor update, but should stop the model from
# inventing as many details about each apartment.
updated_prompt = "Write a one paragraph apartment listing highlighting the best known features of this apartment. Use only the details included in the following information: "

updated_full_prompts = [updated_prompt + str(record) for record in apartment_records]

updated_eval_dataset = pd.DataFrame(
  {
      "prompt": updated_full_prompts[0:5]
  }
)

In [22]:
updated_eval_dataset

Unnamed: 0,prompt
0,"Write a one paragraph apartment listing highlighting the best known features of this apartment. Use only the details included in the following information: {'Address': '123 West 14th Street, New York, NY 10014', 'Unit': '2E', 'Sqft': 550, 'Bedrooms': 2, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'doorman, pool in the building, shared roof deck with grills'}"
1,"Write a one paragraph apartment listing highlighting the best known features of this apartment. Use only the details included in the following information: {'Address': '456 East 57th Street, New York, NY 10022', 'Unit': '1A', 'Sqft': 789, 'Bedrooms': 1, 'Elevator': 'no', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'no', 'Notable features': 'bike room, package service'}"
2,"Write a one paragraph apartment listing highlighting the best known features of this apartment. Use only the details included in the following information: {'Address': '789 Broadway, New York, NY 10003', 'Unit': 'C', 'Sqft': 999, 'Bedrooms': 3, 'Elevator': 'yes', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'yes', 'Notable features': 'excellent laundry room, great city views'}"
3,"Write a one paragraph apartment listing highlighting the best known features of this apartment. Use only the details included in the following information: {'Address': '1011 5th Avenue, New York, NY 10028', 'Unit': '30', 'Sqft': 1024, 'Bedrooms': 2, 'Elevator': 'no', 'Washer & Dryer in Unit': 'no', 'Pets Allowed': 'yes', 'Notable features': 'great view of Central Park, high ceilings'}"
4,"Write a one paragraph apartment listing highlighting the best known features of this apartment. Use only the details included in the following information: {'Address': '2222 Park Avenue, New York, NY 10017', 'Unit': '4F', 'Sqft': 1234, 'Bedrooms': 1, 'Elevator': 'no', 'Washer & Dryer in Unit': 'yes', 'Pets Allowed': 'no', 'Notable features': 'right next to soccer fields at the park'}"


In [23]:
# Create a new EvalTask and run its evaluate() method to generate evaluations. Preview them in a table.
updated_eval_task = EvalTask(
  dataset=updated_eval_dataset,
  metrics=[MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS],
  experiment="apartment-listing-generation",
)

run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
updated_result = updated_eval_task.evaluate(
  model=model,
  experiment_run_name=f"apt-gen-{run_ts}"
)

# Append the new result to your results
eval_results_to_compare.append(updated_result)

# Preview the summary
print(updated_result.summary_metrics)

INFO:vertexai.evaluation.eval_task:Logging Eval Experiment metadata: {'model_name': 'publishers/google/models/gemini-2.5-pro', 'temperature': 0, 'top_p': 0.4}
INFO:vertexai.evaluation._evaluation:Generating a total of 5 responses from Gemini model gemini-2.5-pro.
100%|██████████| 5/5 [00:19<00:00,  3.84s/it]
INFO:vertexai.evaluation._evaluation:All 5 responses are successfully generated from Gemini model gemini-2.5-pro.
INFO:vertexai.evaluation._evaluation:Multithreaded Batch Inference took: 19.203526376000355 seconds.
INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 5 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 5/5 [00:01<00:00,  4.55it/s]
INFO:vertexai.evaluation._evaluation:All 5 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:1.1070495789999768 seconds


{'row_count': 5, 'groundedness/mean': np.float64(1.0), 'groundedness/std': 0.0}


In [24]:
# Use the provided helper function to visualize your evaluation runs against each other. See whether your application improved on the metrics:
import plotly.graph_objects as go
def plot_bar_plot(eval_results, metrics=None):
  fig = go.Figure()
  data = []
  for eval_result in eval_results:
      summary_metrics = eval_result.summary_metrics
      if metrics:
          summary_metrics = {
              k: summary_metrics[k]
              for k, v in summary_metrics.items()
              if any(selected_metric in k for selected_metric in metrics)
          }


      data.append(
          go.Bar(
              x=list(summary_metrics.keys()),
              y=list(summary_metrics.values()),
              name=eval_result.metadata["experiment_run"]
          )
      )
  fig = go.Figure(data=data)


  # Change the bar mode
  fig.update_layout(barmode="group")
  fig.show()


plot_bar_plot(eval_results_to_compare, metrics=["groundedness/mean"])