#### Install Requirements
We need pytest for unit testing and the evaluation library.

In [1]:
print("--- Installing Requirements ---")
!pip install --upgrade --quiet pytest google-cloud-aiplatform[evaluation]

import IPython
import time

print("Libraries installed.")
print("RESTARTING KERNEL TO LOAD NEW LIBRARIES...")
print("The session will crash/restart momentarily. This is intentional!")
time.sleep(2)
app = IPython.Application.instance()
app.kernel.do_shutdown(True)

--- Installing Requirements ---
âœ… Libraries installed.
ðŸ”„ RESTARTING KERNEL TO LOAD NEW LIBRARIES...
The session will crash/restart momentarily. This is intentional!


{'status': 'ok', 'restart': True}

#### Logic Definition

In [1]:
import vertexai
from vertexai.generative_models import GenerativeModel
import pytest
import pandas as pd

# --- CONFIGURATION ---
PROJECT_ID = "qwiklabs-gcp-03-ba43f2730b93" # <--- VERIFY THIS
REGION = "us-central1"

vertexai.init(project=PROJECT_ID, location=REGION)
model = GenerativeModel("gemini-2.5-flash")

print("--- DEFINING FUNCTIONS ---")

# --- FUNCTION 1: CLASSIFICATION ---
# Requirement: Classify into Employment, General Info, Emergency, or Tax
def classify_inquiry(user_question):
    prompt = f"""
    Classify the following question into exactly one of these categories:
    [Employment, General Information, Emergency Services, Tax Related]

    Return ONLY the category name. Do not add punctuation.

    Question: {user_question}
    Category:
    """
    response = model.generate_content(prompt)
    # Cleaning the output ensures our tests don't fail on a stray period
    return response.text.strip().replace(".", "")

# --- FUNCTION 2: SOCIAL MEDIA GENERATOR ---
# Requirement: Generate posts for weather, holidays, school closings
def generate_social_post(topic, platform="Twitter"):
    prompt = f"""
    Write a short {platform} post about: {topic}.
    The tone should be official but helpful for the town of Aurora Bay.
    Include exactly one hashtag relevant to the topic.
    """
    response = model.generate_content(prompt)
    return response.text.strip()

print("âœ… Functions defined.")

  from google.cloud.aiplatform.utils import gcs_utils


--- DEFINING FUNCTIONS ---
âœ… Functions defined.




#### Unit Testing

In [3]:
# We write the tests to a file so pytest can discover them
%%writefile test_challenge.py
import pytest
from vertexai.generative_models import GenerativeModel
import vertexai

# Setup inside the test file
vertexai.init(project="qwiklabs-gcp-03-ba43f2730b93", location="us-central1") # <--- UPDATE ID IF NEEDED
model = GenerativeModel("gemini-2.5-flash")

# --- REDEFINE FUNCTIONS FOR TEST SCOPE ---
def classify_inquiry(user_question):
    prompt = f"""
    Classify the following question into exactly one of these categories:
    [Employment, General Information, Emergency Services, Tax Related]
    Return ONLY the category name.
    Question: {user_question}
    Category:
    """
    return model.generate_content(prompt).text.strip().replace(".", "")

def generate_social_post(topic):
    prompt = f"Write a short official social media post about {topic}. Include a hashtag."
    return model.generate_content(prompt).text.strip()

# --- THE TESTS ---

# Test 1: Classification - Emergency
def test_class_emergency():
    q = "There is a bear on Main Street, who do I call?"
    assert "Emergency" in classify_inquiry(q)

# Test 2: Classification - Tax
def test_class_tax():
    q = "When are my property taxes due?"
    assert "Tax" in classify_inquiry(q)

# Test 3: Classification - Employment
def test_class_jobs():
    q = "How do I apply for a job at City Hall?"
    assert "Employment" in classify_inquiry(q)

# Test 4: Social Media - Length & Content
def test_social_post():
    post = generate_social_post("Schools closed due to blizzard")
    assert len(post) > 10
    assert "#" in post

Overwriting test_challenge.py


#### Run the Evaluation
Run this to perform the automated scoring using the new model.

In [4]:
from vertexai.evaluation import EvalTask
from vertexai.generative_models import GenerativeModel
import pandas as pd
import pprint

print("--- SETTING UP EVALUATION TASK ---")

# UPDATED: Initialize the specific model version for the evaluator
model = GenerativeModel("gemini-2.5-flash")

# 1. Create Evaluation Dataset
eval_dataset = pd.DataFrame({
    "instruction": [
        "Write a tweet about a blizzard warning.",
        "Write a tweet about July 4th fireworks.",
        "Write a tweet about tax filing deadline."
    ],
    "context": [
        "Safety warning. Stay indoors.",
        "Celebration event at the park.",
        "Official deadline reminder."
    ]
})

# 2. Define Metrics
metrics = ["coherence", "safety"]

# 3. Create the Task
task = EvalTask(
    dataset=eval_dataset,
    metrics=metrics,
    experiment="aurora-social-media-eval-v2"
)

# 4. Run Evaluation
eval_result = task.evaluate(
    model=model,
    prompt_template="{instruction}"
)

# 5. Display Results (Screenshot this!)
print("\n--- EVALUATION SUMMARY ---")
pprint.pprint(eval_result.summary_metrics)

--- SETTING UP EVALUATION TASK ---


INFO:vertexai.evaluation.eval_task:Logging Eval Experiment metadata: {'prompt_template': '{instruction}', 'model_name': 'publishers/google/models/gemini-2.5-flash'}
INFO:vertexai.evaluation._evaluation:Assembling prompts from the `prompt_template`. The `prompt` column in the `EvalResult.metrics_table` has the assembled prompts used for model response generation.
INFO:vertexai.evaluation._evaluation:Generating a total of 3 responses from Gemini model gemini-2.5-flash.
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:08<00:00,  2.70s/it]
INFO:vertexai.evaluation._evaluation:All 3 responses are successfully generated from Gemini model gemini-2.5-flash.
INFO:vertexai.evaluation._evaluation:Multithreaded Batch Inference took: 8.116284035000717 seconds.
INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 6 Vertex Gen AI Evaluation Service API requests.
100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 6/6 [00:12<00:00,  2.03s/it]
INFO:vertexai.evaluation._evaluation:All 6 metric requests a


--- EVALUATION SUMMARY ---
{'coherence/mean': np.float64(5.0),
 'coherence/std': 0.0,
 'row_count': 3,
 'safety/mean': np.float64(1.0),
 'safety/std': 0.0}
