In [None]:
!pip install google-genai
#



In [5]:
# Create a Python function that uses Gemini to classify user questions into one of the following
# categories: Employment, General Information, Emergency Services, or Tax Related

# Using an LLM for classification or sentiment analysis are examples

import os
from google import genai
from google.genai import types

PROJECT_ID = "qwiklabs-gcp-01-c75658565206"
LOCATION = "us-west1"
client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

def classify_user_question(prompt: str) -> str:
  """
  Uses the Gemini model to classify a user question into one of four categories:
  Employment, General Information, Emergency Services, or Tax Related.

  Args:
    prompt: The user's question as a string.

  Returns:
    The classified category as a string (e.g., 'Employment').
  """

  # Select a suitable model for fast classification (e.g., gemini-2.5-flash)
  model = 'gemini-2.5-flash'

  # The system instruction guides the model's behavior and output format.
  system_instruction = (
      "You are an expert classification system. "
      "Your sole task is to classify the user's message into one of the "
      "following four categories: 'Employment', 'General Information', "
      "'Emergency Services', or 'Tax Related'. "
      "You MUST output ONLY the name of the category."
  )

  response = client.models.generate_content(
      model=model,
      contents=f"Message: {prompt}",
      config=types.GenerateContentConfig(
          system_instruction=system_instruction,
          # Setting temperature to 0.0 encourages deterministic and strict classification
          temperature=0.0
      ),
  )

  # Clean up the response text and return the category
  return response.text.strip()


# --- Example Usage ---
print(classify_user_question("I need to know my W-2 status."))
print(classify_user_question("What is the capital of France?"))
print(classify_user_question("Is there a fire nearby?"))
print(classify_user_question("How do I apply for the open position?"))

Tax Related
General Information
Emergency Services
Employment


In [6]:
# Write unit tests for each function using pytest.
import unittest

class TestPositiveOrNegative(unittest.TestCase):

  def test_isTaxRelated(self):
    response = classify_user_question("I need to know my W-2 status.")
    self.assertEqual(response, "Tax Related")

  def test_isGeneralInformation(self):
    response = classify_user_question("What is the capital of France?")
    self.assertEqual(response, "General Information")

  def test_isEmergencyServices(self):
    response = classify_user_question("Is there a fire nearby?")
    self.assertEqual(response, "Emergency Services")

  def test_isEmployment(self):
    response = classify_user_question("How do I apply for the open position?")
    self.assertEqual(response, "Employment")

unittest.main(argv=[''], verbosity=2, exit=False)

test_isEmergencyServices (__main__.TestPositiveOrNegative.test_isEmergencyServices) ... ok
test_isEmployment (__main__.TestPositiveOrNegative.test_isEmployment) ... ok
test_isGeneralInformation (__main__.TestPositiveOrNegative.test_isGeneralInformation) ... ok
test_isTaxRelated (__main__.TestPositiveOrNegative.test_isTaxRelated) ... ok

----------------------------------------------------------------------
Ran 4 tests in 11.396s

OK


<unittest.main.TestProgram at 0x7c7c5ae8b110>

In [7]:
# Create a second function that generates social media posts for government announcements like
# weather emergencies, holidays, school closings, etc.


def generate_government_post(prompt: str) -> str:
  """
  Generates a concise, authoritative social media announcement (e.g., Tweet)
  for public service purposes based on the user's request.

  Args:
    prompt: The core information for the announcement (e.g., "All schools
            are closed due to snow today").

  Returns:
    The generated, formatted social media post.
  """
  # Using a fast model for text generation
  model = 'gemini-2.5-flash'

  # The system instruction defines the persona and rules the model must follow
  system_instruction = (
      "You are a public communications officer writing critical announcements "
      "for social media (like Twitter/X). Your tone must be authoritative, "
      "clear, and concise. All posts must be highly readable and actionable."
  )

  # Use few-shot prompting to guide the model on formatting and rules
  few_shot_prompt = (
      """
      # Rules:
      1. Max character limit is 240.
      2. Use ALL CAPS for critical information.
      3. Include relevant hashtags like #SafetyAlert, #Weather, or #Update.

      # Example Input:
      City Hall will be closed for the holiday tomorrow.

      # Example Output:
      REMINDER: City Hall and all non-essential municipal services will be CLOSED tomorrow, Dec 25, for the holiday. Normal operations resume Wednesday. #HolidayUpdate

      # Input:
      {0}

      # Output:
      """.format(prompt)
  )

  response = client.models.generate_content(
      model=model,
      contents=few_shot_prompt,
      config=types.GenerateContentConfig(
          system_instruction=system_instruction,
          temperature=0.7 # Allow for some creativity in phrasing
      ),
  )

  return response.text.strip()


# --- Example of how to use the function in your notebook ---
print("--- Weather Emergency Post ---")
print(generate_government_post("A severe thunderstorm warning is in effect until 8 PM. Seek shelter immediately."))

print("\n--- School Closing Post ---")
print(generate_government_post("All district schools are closed today, Tuesday, due to a power outage."))

--- Weather Emergency Post ---

--- School Closing Post ---
ATTENTION: ALL DISTRICT SCHOOLS ARE CLOSED TODAY, TUESDAY, due to a power outage. Student & staff safety is our top priority. Further updates will be shared as available. #SchoolClosure #PowerOutage #SafetyAlert


In [8]:
# write unit tests for second function

def does_tweet_follow_rules(tweet):
  model = 'gemini-2.5-flash'

  response = client.models.generate_content(
    model=model,
    contents=
    """Does the tweet follow the following rules:
    1. Keep your Tweets below 240 characters
    2. Relevant hashtags must be added to each output, ie: #SafetyAlert, #Weather, or #Update.

    Only return Yes or No
    Tweet: {0}
    Output: """.format(tweet)
  )
  return response.text.strip()



# import unittest
class TestTweetRules(unittest.TestCase):
  def test_tweet_results_1(self):
    generated_tweet = generate_government_post("Write a tweet about our Thanksgiving Day Special")
    correct = does_tweet_follow_rules(generated_tweet)
    self.assertEqual(correct, "Yes")

  def test_does_not_follow_rules(self):
    generated_tweet = "Thanksgiving feast at Luigi's! Enjoy our special menu."
    correct = does_tweet_follow_rules(generated_tweet)
    self.assertEqual(correct, "No")

unittest.main(argv=[''], verbosity=2, exit=False)

test_isEmergencyServices (__main__.TestPositiveOrNegative.test_isEmergencyServices) ... ok
test_isEmployment (__main__.TestPositiveOrNegative.test_isEmployment) ... ok
test_isGeneralInformation (__main__.TestPositiveOrNegative.test_isGeneralInformation) ... ok
test_isTaxRelated (__main__.TestPositiveOrNegative.test_isTaxRelated) ... ok
test_does_not_follow_rules (__main__.TestTweetRules.test_does_not_follow_rules) ... ok
test_tweet_results_1 (__main__.TestTweetRules.test_tweet_results_1) ... ok

----------------------------------------------------------------------
Ran 6 tests in 24.769s

OK


<unittest.main.TestProgram at 0x7c7c5ac41310>

In [41]:
# Use the Google Evaluation API to evaluate and compare Gemini responses from different
# prompts.

car_data = [
    {
        "manufacturer": "Toyota",
        "model": "Camry",
        "MSRP": 26420,
        "vehicle_classification": "Sedan",
        "num_doors": 4,
        "num_seats": 5,
        "fuel_type": "gas",
        "drive_train": "FWD"
    },
    {
        "manufacturer": "Honda",
        "model": "CR-V",
        "MSRP": 28410,
        "vehicle_classification": "SUV",
        "num_doors": 4,
        "num_seats": 5,
        "fuel_type": "gas",
        "drive_train": "AWD"
    },
    {
        "manufacturer": "Tesla",
        "model": "Model 3",
        "MSRP": 40240,
        "vehicle_classification": "Sedan",
        "num_doors": 4,
        "num_seats": 5,
        "fuel_type": "electric",
        "drive_train": "AWD"
    },
    {
        "manufacturer": "Ford",
        "model": "F-150",
        "MSRP": 34585,
        "vehicle_classification": "Pickup truck",
        "num_doors": 4,
        "num_seats": 5,
        "fuel_type": "gas",
        "drive_train": "RWD"
    },
    {
        "manufacturer": "BMW",
        "model": "X5",
        "MSRP": 61600,
        "vehicle_classification": "Luxury SUV",
        "num_doors": 4,
        "num_seats": 5,
        "fuel_type": "gas",
        "drive_train": "AWD"
    },
    {
        "manufacturer": "Hyundai",
        "model": "Elantra",
        "MSRP": 20950,
        "vehicle_classification": "Compact car",
        "num_doors": 4,
        "num_seats": 5,
        "fuel_type": "gas",
        "drive_train": "FWD"
    }
]

In [55]:
# create an evaluation dataset
import pandas as pd

prompt = "Write a one paragraph car listing known features of this car. Use only the details included in the following information: "
prompt_2 = "Using your imagination, use the cars properties as a baseline, and exagerate its features fantastically. You get bonus points for being creative, add in anything to make the car more desireable!"

contexts = [str(car) for car in car_data]
full_prompts = [prompt + str(car) for car in car_data]
full_prompts_2 = [prompt_2 + str(car) for car in car_data]

eval_dataset = pd.DataFrame(
  {
    "prompt": full_prompts,
    # "instruction": full_prompts,
    "context": contexts,
  }
)

eval_dataset_2 = pd.DataFrame(
  {
    "prompt": full_prompts_2,
    # "instruction": full_prompts,
    "context": contexts,
  }
)

In [56]:
# create an evaluation task
from vertexai.evaluation import (
    MetricPromptTemplateExamples,
    EvalTask,
    PairwiseMetric,
    PairwiseMetricPromptTemplate,
    PointwiseMetric,
    PointwiseMetricPromptTemplate,
)

qa_eval_task = EvalTask(
  dataset=eval_dataset,
  metrics=["instruction_following", "groundedness"],
  experiment="car-listing-generation",
)

qa_eval_task_2 = EvalTask(
  dataset=eval_dataset_2,
  metrics=["instruction_following", "groundedness"],
  experiment="car-listing-generation-two",
)

In [57]:
# run the evaluation
import datetime
from vertexai.generative_models import GenerativeModel
import vertexai
vertexai.init()

model_name_str = 'gemini-2.5-flash'
# --- FIX: Load the model using the GenerativeModel class ---
model_object = GenerativeModel(model_name_str)

run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
result = qa_eval_task.evaluate(
    model=model_object,
    experiment_run_name=f"apartment-listing-gen-{run_ts}"
  )

evaluation_results = []
evaluation_results.append(result)

run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
result = qa_eval_task_2.evaluate(
    model=model_object,
    experiment_run_name=f"apartment-listing-gen-two-{run_ts}"
  )

evaluation_results_2 = []
evaluation_results_2.append(result)

INFO:vertexai.evaluation.eval_task:Logging Eval Experiment metadata: {'model_name': 'publishers/google/models/gemini-2.5-flash'}
INFO:vertexai.evaluation._evaluation:Generating a total of 6 responses from Gemini model gemini-2.5-flash.
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]
INFO:vertexai.evaluation._evaluation:All 6 responses are successfully generated from Gemini model gemini-2.5-flash.
INFO:vertexai.evaluation._evaluation:Multithreaded Batch Inference took: 6.502393101998678 seconds.
INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 12 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 12/12 [00:18<00:00,  1.54s/it]
INFO:vertexai.evaluation._evaluation:All 12 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:18.520138175001193 seconds


INFO:vertexai.evaluation.eval_task:Logging Eval Experiment metadata: {'model_name': 'publishers/google/models/gemini-2.5-flash'}
INFO:vertexai.evaluation._evaluation:Generating a total of 6 responses from Gemini model gemini-2.5-flash.
100%|██████████| 6/6 [00:27<00:00,  4.63s/it]
INFO:vertexai.evaluation._evaluation:All 6 responses are successfully generated from Gemini model gemini-2.5-flash.
INFO:vertexai.evaluation._evaluation:Multithreaded Batch Inference took: 27.795867737000663 seconds.
INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 12 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 12/12 [00:19<00:00,  1.64s/it]
INFO:vertexai.evaluation._evaluation:All 12 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:19.638087234001432 seconds


In [59]:
# examine the results
def display_evaluation_results(results_list):
    """
    Takes a list of EvalTask results and prints the overall summary metrics
    and a DataFrame of the detailed instance results.
    """
    if not results_list:
        print("\nNo evaluation results available to display.")
        return

    first_result = results_list[0]

    # Display overall aggregated metrics
    print("\n" + "="*50)
    print(f"OVERALL METRICS SUMMARY for Run: {first_result.metadata}")
    print(f"OVERALL METRICS SUMMARY for Run: {first_result.metadata['experiment_run']}")
    print("="*50)

    # Use the result's summary_metrics attribute to show averaged scores
    for metric_name, score in first_result.summary_metrics.items():
        print(f"| {metric_name.ljust(25)} | {score:.4f} |")
    print("="*50)

    print("DETAILED INSTANCE RESULTS (First 5 rows):")
    print("-" * 50)

    results_df = first_result.metrics_table

    print(results_df[results_df.columns].head().to_markdown(index=False))



print("Grounded by context, fails to adhere to strictly information provided")
print("\n" + "="*50)
#
# in
#
#
#
display_evaluation_results(evaluation_results)
print("\n")
print("Creative Writing, follows instructions & grounded in context provided to it")
print("\n" + "="*50)

display_evaluation_results(evaluation_results_2)

Grounded by context, fails to adhere to strictly information provided


OVERALL METRICS SUMMARY for Run: {'experiment': 'car-listing-generation', 'experiment_run': 'apartment-listing-gen-20251203-192130'}
OVERALL METRICS SUMMARY for Run: apartment-listing-gen-20251203-192130
| row_count                 | 6.0000 |
| instruction_following/mean | 4.0000 |
| instruction_following/std | 0.8944 |
| groundedness/mean         | 0.0000 |
| groundedness/std          | 0.0000 |
DETAILED INSTANCE RESULTS (First 5 rows):
--------------------------------------------------
| prompt                                                                                                                                                                                                                                                                                                   | context                                                                                                                               