In [79]:
!pip install google-genai ipytest pytest unittest google-cloud-aiplatform google-cloud-aiplatform[evaluation]

[31mERROR: Could not find a version that satisfies the requirement unittest (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for unittest[0m[31m
[0m

In [86]:
import os
import json
from google import genai
from google.genai import types

import pytest
import ipytest
import unittest

from typing import Literal, Dict, Any

import datetime
import nest_asyncio
import pandas as pd
from IPython.display import display, Markdown, HTML

import vertexai
from vertexai.generative_models import GenerativeModel

import google.auth

from vertexai.evaluation import EvalTask, PointwiseMetric
from vertexai.evaluation.metrics import MetricPromptTemplateExamples

In [None]:
client = genai.Client(
        vertexai=True,
        api_key=os.environ.get("GOOGLE_CLOUD_API_KEY"),
    )

In [None]:

def classify_user_question(question: str) -> str:
    """
    Classifies a user question into one of the predefined categories using the Gemini API.

    Args:
        question: The user's input question string.

    Returns:
        The classification category as a string (e.g., "Employment").
    """

    # categories as a list and string for easier passing
    categories = ["Employment", "General Information", "Emergency Services", "Tax Related"]
    category_list_str = ", ".join(categories)

    # gemini generated this for me
    system_instruction = (
        "You are an expert classification system. Your sole task is to classify "
        "the user's provided question into one of the following four categories: "
        f"{category_list_str}. You must output a valid JSON object with a single "
        "key 'category' whose value is one of the four allowed categories. "
        "Do not include any other text, explanation, or conversational filler."
    )

    # gemini generated this for me
    json_schema = types.Schema(
        type=types.Type.OBJECT,
        properties={
            "category": types.Schema(
                type=types.Type.STRING,
                description=f"The classified category. Must be one of: {category_list_str}",
                enum=categories,
            )
        },
        required=["category"],
    )

    # gemini generated this for me
    config = types.GenerateContentConfig(
        system_instruction=system_instruction,
        response_mime_type="application/json",
        response_schema=json_schema,
        temperature=0.0 # Set to 0.0 for deterministic classification
    )

    response = client.models.generate_content(
        model='gemini-2.5-flash', # Fast and capable model for classification
        contents=[question],
        config=config,
    )

    json_data = json.loads(response.text)
    return json_data.get('category', 'Classification Failed - Category Key Missing')

## some basic unit testing for the deterministic side

In [None]:
class TestQuestionClassification(unittest.TestCase):
  def test_classification_employment(self):
          """Test case for a clear Employment question."""
          question = "What is the process for applying for a job at the new municipal building?"
          expected_resp = "Employment"

          actual_resp = classify_user_question(question)

          self.assertEqual(actual_resp, expected_resp, f"Expected '{expected_resp}' but got '{actual_resp}' for question: {question}")

  def test_classification_general_information(self):
      """Test case for a clear General Information question."""
      question = "What are the hours of operation for the local library branch on Main Street?"
      expected_resp = "General Information"

      actual_resp = classify_user_question(question)

      self.assertEqual(actual_resp, expected_resp, f"Expected '{expected_resp}' but got '{actual_resp}' for question: {question}")

  def test_classification_emergency_services(self):
      """Test case for a clear Emergency Services question."""
      question = "There is a fire in my apartment building. What number should I call immediately?"
      expected_resp = "Emergency Services"

      actual_resp = classify_user_question(question)

      self.assertEqual(actual_resp, expected_resp, f"Expected '{expected_resp}' but got '{actual_resp}' for question: {question}")

  def test_classification_tax_related(self):
      """Test case for a clear Tax Related question."""
      question = "I need to know the deadline for filing my 1040-ES estimated taxes for this quarter."
      expected_resp = "Tax Related"

      actual_resp = classify_user_question(question)

      self.assertEqual(actual_resp, expected_resp, f"Expected '{expected_resp}' but got '{actual_resp}' for question: {question}")

In [None]:
unittest.main(argv=['first-arg-is-ignored'], exit=False)

....
----------------------------------------------------------------------
Ran 4 tests in 3.307s

OK


<unittest.main.TestProgram at 0x7d1627e9e390>

# Non-deterministic side


In [75]:
system_instruction = (
        f"You are a professional social media manager for a government entity, the City of Auora. "
        "Your task is to generate two versions of a public safety or general announcement based on the provided details. "
        "The tone must be clear, authoritative, and helpful. "
        "You must output a valid JSON object strictly following the provided schema. "
        "Ensure the posts are optimized for platforms like twitter and include appropriate hashtags."
    )

In [134]:
# some gemini generated code to generate social media posts
SocialMediaPost = Dict[str, Any]

def generate_social_post(
    event_type: Literal["Weather Emergency", "Holiday", "School Closing", "General Announcement"],
    details: str,
    target_platform: str = "X (Twitter) and Facebook"
) -> SocialMediaPost:
    json_schema = types.Schema(
        type=types.Type.OBJECT,
        properties={
            "post_urgent": types.Schema(
                type=types.Type.STRING,
                description="A short, urgent post (under 280 characters) suitable for Twitter/X or a headline. Use capital letters for emphasis."
            ),
            "post_informative": types.Schema(
                type=types.Type.STRING,
                description="A longer, detailed post (3-5 sentences) providing context, next steps, and official sources."
            ),
            "hashtags": types.Schema(
                type=types.Type.ARRAY,
                items=types.Schema(type=types.Type.STRING),
                description="A list of 3-5 highly relevant, official, and timely hashtags."
            )
        },
        required=["post_urgent", "post_informative", "hashtags"],
    )

    # 3. Construct the User Prompt
    prompt = (
        f"**Announcement Type:** {event_type}\n"
        f"**Core Details:** {details}\n"
        f"Generate the social media content now."
    )

    # 4. Configure the model
    config = types.GenerateContentConfig(
        system_instruction=system_instruction,
        response_mime_type="application/json",
        response_schema=json_schema,
        temperature=0.1 # Use a slightly higher temperature for creative variation
    )

    # 5. Call the Gemini API
    try:
        response = client.models.generate_content(
            model='gemini-2.5-flash',
            contents=[prompt],
            config=config,
        )

        # 6. Parse and return the structured result
        return json.loads(response.text)

    except Exception as e:
        print(f"An error occurred during the API call: {e}")
        return {"error": f"API Call Failed: {e}"}

In [136]:
weather_details = "Heavy snow expected from 8 PM tonight to 6 AM tomorrow morning. 6-10 inches of accumulation. All non-essential travel strongly discouraged."
weather_post = generate_social_post("Weather Emergency", weather_details)
print(json.dumps(weather_post, indent=2))

school_details = "All Exampleton Public Schools will be closed tomorrow, Friday, due to inclement weather. All after-school activities are also canceled."
school_post = generate_social_post("School Closing", school_details)
print(json.dumps(school_post, indent=2))

{
  "post_urgent": "WEATHER ALERT: HEAVY SNOW EXPECTED TONIGHT 8 PM - 6 AM. 6-10 INCHES ACCUMULATION. NON-ESSENTIAL TRAVEL STRONGLY DISCOURAGED. STAY HOME, STAY SAFE!",
  "post_informative": "The City of Auora is issuing a Weather Emergency Alert for heavy snowfall. Expect 6-10 inches of accumulation from 8 PM tonight until 6 AM tomorrow morning. We strongly advise all residents to avoid non-essential travel during this period to ensure your safety and allow emergency crews to work unimpeded. Please stay indoors, prepare for winter conditions, and check on vulnerable neighbors if it's safe to do so. For real-time updates and safety tips, visit Auora.gov/Weather.",
  "hashtags": [
    "#AuoraSnow",
    "#WeatherAlert",
    "#StaySafeAuora",
    "#CityOfAuora",
    "#SnowStorm"
  ]
}
{
  "post_urgent": "ATTENTION AUORA RESIDENTS: ALL Exampleton Public Schools are CLOSED tomorrow, Friday, due to inclement weather. All after-school activities CANCELED. Stay safe!",
  "post_informative": "T

# Evaluation API

In [137]:
credentials, project_id = google.auth.default()
vertexai.init(project=project_id, location="us-central1")

pd.set_option('display.max_colwidth', None)

In [138]:
# we are going to now use the eval API to see the groundedness of the model's
# response relative to what information it was given
model = GenerativeModel(
  "gemini-2.5-pro",
  generation_config={
      "temperature": 0
  },
)

contexts = [weather_details, school_details]
full_prompts = [system_instruction + item for item in contexts]
responses = [weather_post["post_informative"], school_post["post_informative"]]

eval_dataset = pd.DataFrame({
    # The 'context' is the source of truth for the post.
    "context": full_prompts,
    # The 'response' is the text generated by the LLM (your weather_post).
    "response": responses,
    # 'instruction' is the input prompt given to the generation model.
    "instruction": full_prompts
})

column_mapping = {
    "prompt": "instruction"
}

eval_task = EvalTask(
    dataset=eval_dataset,
    metrics=[MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS],
    experiment="posts-exp",
    metric_column_mapping=column_mapping
)

In [140]:

groundedness_result = eval_task.evaluate(
    # model=model,
    experiment_run_name="post-exp-run10"
)

INFO:vertexai.evaluation._evaluation:Computing metrics with a total of 2 Vertex Gen AI Evaluation Service API requests.
100%|██████████| 2/2 [00:05<00:00,  2.82s/it]
INFO:vertexai.evaluation._evaluation:All 2 metric requests are successfully computed.
INFO:vertexai.evaluation._evaluation:Evaluation Took:5.646592990000499 seconds


In [141]:
groundedness_result.summary_metrics

{'row_count': 2, 'groundedness/mean': np.float64(0.0), 'groundedness/std': 0.0}

In [142]:
groundedness_result.metrics_table

Unnamed: 0,context,response,instruction,groundedness/explanation,groundedness/score
0,"You are a professional social media manager for a government entity, the City of Auora. Your task is to generate two versions of a public safety or general announcement based on the provided details. The tone must be clear, authoritative, and helpful. You must output a valid JSON object strictly following the provided schema. Ensure the posts are optimized for platforms like {target_platform} and include appropriate hashtags.Heavy snow expected from 8 PM tonight to 6 AM tomorrow morning. 6-10 inches of accumulation. All non-essential travel strongly discouraged.","The City of Auora is issuing a Weather Emergency Alert for heavy snowfall. Expect 6-10 inches of accumulation from 8 PM tonight until 6 AM tomorrow morning. We strongly advise all residents to avoid non-essential travel during this period to ensure your safety and allow emergency crews to work unimpeded. Please stay indoors, prepare for winter conditions, and check on vulnerable neighbors if it's safe to do so. For real-time updates and safety tips, visit Auora.gov/Weather.","You are a professional social media manager for a government entity, the City of Auora. Your task is to generate two versions of a public safety or general announcement based on the provided details. The tone must be clear, authoritative, and helpful. You must output a valid JSON object strictly following the provided schema. Ensure the posts are optimized for platforms like {target_platform} and include appropriate hashtags.Heavy snow expected from 8 PM tonight to 6 AM tomorrow morning. 6-10 inches of accumulation. All non-essential travel strongly discouraged.","The response includes information not present in the prompt, such as 'Weather Emergency Alert', the specific reasons for discouraging travel ('to ensure your safety and allow emergency crews to work unimpeded'), additional advice ('stay indoors, prepare for winter conditions, and check on vulnerable neighbors'), and a hypothetical website ('Auora.gov/Weather').",0.0
1,"You are a professional social media manager for a government entity, the City of Auora. Your task is to generate two versions of a public safety or general announcement based on the provided details. The tone must be clear, authoritative, and helpful. You must output a valid JSON object strictly following the provided schema. Ensure the posts are optimized for platforms like {target_platform} and include appropriate hashtags.All Exampleton Public Schools will be closed tomorrow, Friday, due to inclement weather. All after-school activities are also canceled.","The City of Auora announces that all Exampleton Public Schools will be closed tomorrow, Friday, due to anticipated inclement weather conditions. This decision prioritizes the safety of our students and staff. Additionally, all after-school activities scheduled for tomorrow are canceled. Please monitor official City of Auora channels and school district websites for further updates. We appreciate your understanding and cooperation as we work to ensure everyone's safety.","You are a professional social media manager for a government entity, the City of Auora. Your task is to generate two versions of a public safety or general announcement based on the provided details. The tone must be clear, authoritative, and helpful. You must output a valid JSON object strictly following the provided schema. Ensure the posts are optimized for platforms like {target_platform} and include appropriate hashtags.All Exampleton Public Schools will be closed tomorrow, Friday, due to inclement weather. All after-school activities are also canceled.","The response includes information not present in the user prompt, such as 'This decision prioritizes the safety of our students and staff,' 'Please monitor official City of Auora channels and school district websites for further updates,' and 'We appreciate your understanding and cooperation as we work to ensure everyone's safety.'",0.0


In [None]:
# notes
# the groundedness scores of 0 here are a bit concerning. the explanations do
# make sense though but they could be a bit more lax maybe

# I thought reducing the temperature of the LLM when we are creating the post
# would help but that didn't work...