In [4]:
import unittest
from google import genai
from google.genai import types
import base64
import pandas as pd
import datetime
import vertexai
from vertexai.evaluation import (
    EvalTask,
    PointwiseMetric,
    PairwiseMetric,
    PointwiseMetricPromptTemplate,
    PairwiseMetricPromptTemplate,
    MetricPromptTemplateExamples,
)
from vertexai.generative_models import (
    GenerativeModel,
    HarmCategory,
    HarmBlockThreshold,
)

# Gen AI Client
genai_client = genai.Client(
    vertexai=True,
    project="your-project-id",  # Replace with your project ID
    location="global",
)

# Gen AI Model
model = "gemini-2.0-flash-exp"

# Gen AI Content generation function
def generate(system_prompt, user_input):
    contents = [
        types.Content(
            role="user",
            parts=[
                types.Part.from_text(text=user_input)
            ]
        ),
    ]

    generate_content_config = types.GenerateContentConfig(
        temperature=0.7,  # Control the content creativity
        top_p=1,  # control the probability of token selection
        max_output_tokens=65535,
        system_instruction=[types.Part.from_text(text=system_prompt)],
        thinking_config=types.ThinkingConfig(
            thinking_budget=-1,
        ),
    )

    result = genai_client.models.generate_content(
        model=model,
        contents=contents,
        config=generate_content_config
    )
    return result.text

# DETERMINISTIC FUNCTION - Classification Unit Test
def classify_question(question):
    system_prompt = """
    You are a helpful AI Assistant. Your task is to classify user questions into one of the following categories:

    1. Employment
    2. General Information
    3. Emergency Services
    4. Tax Related

    If a specific category cannot be determined, assign the category: 'General Information'.

    Your response must include **only** the category name — no additional text.

    Example:
    Question: What is the ambulance contact number?
    Answer: Emergency Services
    """
    return generate(system_prompt, question)

class TestQuestionClassifier(unittest.TestCase):

    def test_employment_category(self):
        question = "How do I apply for a government job?"
        category = classify_question(question)
        self.assertEqual(category, "Employment")

    def test_emergency_services_category(self):
        question = "What is the ambulance contact number?"
        category = classify_question(question)
        self.assertEqual(category, "Emergency Services")

    def test_tax_related_category(self):
        question = "How do I file my income tax returns?"
        category = classify_question(question)
        self.assertEqual(category, "Tax Related")

    def test_general_info_category(self):
        question = "What are the office hours?"
        category = classify_question(question)
        self.assertEqual(category, "General Information")

    def test_default_to_general_information(self):
        question = "Can you help me with this?"
        category = classify_question(question)
        self.assertEqual(category, "General Information")

# INDETERMINATE FUNCTION - Social Media Post Generation
post_system_prompt = """
You are a communications assistant for a government agency. Your task is to draft clear, concise, and polite social media posts for official public announcements. These may include weather emergencies, public holidays, school closings, or other alerts affecting citizens.

### Instructions:
1. Keep the message under **200 words**.
2. Use a **calm, professional, and reassuring** tone — no slang or informal phrasing.
3. Clearly state:
  - What the announcement is about
  - Who it affects (e.g., residents, students, commuters)
  - When it is in effect and for how long
4. Include **basic safety tips or recommended actions**, if applicable.
5. Use **simple, accessible language** — avoid technical or bureaucratic terms.
6. End the post with **relevant hashtags** (e.g., #WeatherAlert, #SchoolClosure, #PublicNotice). Include 2–4 concise, meaningful hashtags that help categorize the message.
7. Do not use emojis.
8. Avoid adding contact numbers, links, or excessive formatting unless instructed.

### Output format:
Only return the post text with hashtags at the end. Do not include any commentary, metadata, or instructions.
"""

def generate_post(news):
    return generate(post_system_prompt, news)

def evaluate_post(post):
    evaluation_prompt = """
    You are a communications quality reviewer. Your task is to evaluate whether a social media post for a government announcement meets all the required criteria.

    Criteria:
    1. The message is clear and easy to understand.
    2. The message is within 200 words.
    3. The tone is polite, professional, and reassuring.
    4. It clearly states what the announcement is about, who it affects, and when it applies.
    5. It includes 2 to 4 relevant hashtags placed at the end.
    6. The language is accessible, without jargon or overly complex terms.

    Respond with **only one word**:
    - **Yes** — if the post satisfies **all** of the above criteria.
    - **No** — if the post fails to satisfy **any** of the criteria.

    Do not provide any explanation or additional text.
    """
    return generate(evaluation_prompt, post)

class TestPostEvaluation(unittest.TestCase):

    def test_valid_post(self):
        post = (
            "Due to heavy rainfall, all public schools will remain closed on Monday, July 10th. "
            "This applies to all grades in the city limits. Stay safe and avoid unnecessary travel. "
            "#SchoolClosure #WeatherAlert #StaySafe"
        )
        result = evaluate_post(post)
        self.assertEqual(result, "Yes")

    def test_missing_hashtags(self):
        post = (
            "Due to heavy rainfall, all public schools will remain closed on Monday, July 10th. "
            "This applies to all grades in the city limits. Stay safe and avoid unnecessary travel."
        )
        result = evaluate_post(post)
        self.assertEqual(result, "No")

    def test_unclear_message(self):
        post = (
            "Important change to facilities schedule, more info soon."
            " #Notice"
        )
        result = evaluate_post(post)
        self.assertEqual(result, "No")

    def test_informal_tone(self):
        post = (
            "OMG guys! Schools are out Monday due to rain ☔️ Stay dry!! #RainDay #SchoolClosed"
        )
        result = evaluate_post(post)
        self.assertEqual(result, "No")

    def test_too_many_hashtags(self):
        post = (
            "Public offices will be closed on January 26th in observance of Republic Day. "
            "We wish everyone a safe and respectful holiday. #RepublicDay #HolidayNotice #GovtClosed #StaySafe #Announcement"
        )
        result = evaluate_post(post)
        self.assertEqual(result, "No")

# SENTIMENT ANALYSIS FUNCTION - Additional deterministic example
def analyze_sentiment(text):
    system_prompt = """
    You are a sentiment analysis AI. Classify the given text as one of the following:
    - Positive
    - Negative
    - Neutral

    Respond with only the classification label, no additional text.
    """
    return generate(system_prompt, text)

class TestSentimentAnalysis(unittest.TestCase):

    def test_positive_sentiment(self):
        text = "I love this new policy! It will really help citizens."
        result = analyze_sentiment(text)
        self.assertEqual(result, "Positive")

    def test_negative_sentiment(self):
        text = "This service is terrible and needs immediate improvement."
        result = analyze_sentiment(text)
        self.assertEqual(result, "Negative")

    def test_neutral_sentiment(self):
        text = "The office will be open from 9 AM to 5 PM."
        result = analyze_sentiment(text)
        self.assertEqual(result, "Neutral")

# EMAIL RESPONSE GENERATION - Indeterminate function example
email_system_prompt = """
You are an AI assistant helping draft professional email responses for a government office.

### Instructions:
1. Keep responses under 150 words
2. Use formal, polite tone
3. Address the query directly
4. Include next steps if applicable
5. End with professional closing

### Output format:
Return only the email body text, no subject line or signature.
"""

def generate_email_response(query):
    return generate(email_system_prompt, f"Please draft a response to this citizen query: {query}")

def evaluate_email_response(email_text):
    evaluation_prompt = """
    Evaluate if this email response meets professional government communication standards:

    Criteria:
    1. Professional and polite tone
    2. Under 150 words
    3. Addresses the query clearly
    4. Uses appropriate formal language
    5. Includes helpful next steps

    Respond with only "Yes" or "No".
    """
    return generate(evaluation_prompt, email_text)

class TestEmailGeneration(unittest.TestCase):

    def test_professional_email_response(self):
        query = "How do I renew my driver's license?"
        response = generate_email_response(query)
        evaluation = evaluate_email_response(response)
        self.assertEqual(evaluation, "Yes")

    def test_tax_inquiry_response(self):
        query = "What documents do I need for tax filing?"
        response = generate_email_response(query)
        evaluation = evaluate_email_response(response)
        self.assertEqual(evaluation, "Yes")

# VERTEX AI EVALUATION SETUP
def setup_vertex_evaluation():
    """Initialize Vertex AI for comprehensive evaluation"""
    vertexai.init(project="your-project-id", location="us-central1")  # Replace with your project ID

# COMPREHENSIVE EVALUATION DATASET
def create_evaluation_dataset():
    """Create dataset for comprehensive LLM evaluation"""
    posts = [
        {
            "prompt": "Flood warnings have been issued for low-lying areas in Mumbai from July 5 to July 7. Residents are advised to stay alert and avoid unnecessary travel.",
            "post": "Flood warnings in Mumbai from July 5-7. Please stay alert and avoid travel unless necessary. Stay safe everyone! #MumbaiFloods #StaySafe"
        },
        {
            "prompt": "City hospitals will offer free vaccination camps on August 1 and August 2. Citizens are encouraged to participate.",
            "post": "Free vaccines at city hospitals on Aug 1 & 2. Don't miss it! Protect yourself and loved ones. #VaccinationDrive #HealthFirst"
        },
        {
            "prompt": "All markets in Hyderabad will be closed on September 10 due to the Ganesh Chaturthi festival.",
            "post": "Markets closed Sept 10 for Ganesh Chaturthi. Plan your shopping ahead. #GaneshChaturthi #Hyderabad"
        },
        {
            "prompt": "Severe air pollution levels are expected in Delhi over the next three days. People are advised to limit outdoor activities.",
            "post": "Air pollution bad in Delhi for next 3 days. Don't go outside much. #DelhiPollution"
        },
        {
            "prompt": "Due to ongoing construction, Main Street in Pune will be closed from June 15 to June 20. Use alternative routes.",
            "post": "Main Street Pune closed June 15-20. Find other roads. #PuneTraffic"
        },
        {
            "prompt": "An outbreak of seasonal flu has been reported in several schools across Kolkata. Students with symptoms should stay home and seek medical advice.",
            "post": "Flu outbreak in Kolkata schools. Sick students must stay home and see a doctor. #KolkataFlu #HealthAlert"
        },
        {
            "prompt": "Power outage scheduled in Sector 5 and 6 of Chandigarh on July 18 from 9 AM to 5 PM for maintenance.",
            "post": "POWER OUTAGE IN SECTOR 5 & 6 ON JULY 18. PREPARE ACCORDINGLY!!! #PowerOutage #Chandigarh"
        },
        {
            "prompt": "The annual city marathon will be held on October 10. Roads around the city center will be closed from 6 AM to 12 PM.",
            "post": "City marathon on Oct 10. Road closures 6 AM-12 PM downtown. Plan alternate routes. #CityMarathon #RoadClosure"
        }
    ]

    # Dataset Creation
    eval_dataset = pd.DataFrame([
        {
            "instruction": post_system_prompt,
            "context": f"announcement: {item['prompt']}",
            "response": item["post"],
        } for item in posts
    ])

    return eval_dataset

def run_vertex_evaluation(eval_dataset):
    """Run comprehensive evaluation using Vertex AI"""
    run_ts = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    eval_task = EvalTask(
        dataset=eval_dataset,
        metrics=[
            MetricPromptTemplateExamples.Pointwise.GROUNDEDNESS,
            MetricPromptTemplateExamples.Pointwise.VERBOSITY,
            MetricPromptTemplateExamples.Pointwise.INSTRUCTION_FOLLOWING,
            MetricPromptTemplateExamples.Pointwise.SAFETY
        ],
        experiment=f"social-media-post-{run_ts}"
    )

    # Evaluation
    prompt_template = (
        "Instruction: {instruction}. Prompt: {context}. Post: {response}"
    )

    result = eval_task.evaluate(
        prompt_template=prompt_template,
        experiment_run_name=f"social-media-post-{run_ts}"
    )

    return result

# DEMONSTRATION FUNCTIONS
def run_classification_tests():
    """Run deterministic classification tests"""
    print("=== CLASSIFICATION TESTS ===")
    unittest.main(argv=[''], verbosity=2, exit=False)

def run_generation_tests():
    """Run indeterminate generation tests with LLM evaluation"""
    print("=== GENERATION TESTS ===")

    # Test post generation
    test_news = "Due to flight accident, all schools & colleges in Delhi are closed"
    generated_post = generate_post(test_news)
    print(f"Generated Post: {generated_post}")

    evaluation = evaluate_post(generated_post)
    print(f"Post Evaluation: {evaluation}")

def run_comprehensive_evaluation():
    """Run comprehensive evaluation using Vertex AI"""
    print("=== COMPREHENSIVE VERTEX AI EVALUATION ===")

    # Setup and run evaluation
    setup_vertex_evaluation()
    eval_dataset = create_evaluation_dataset()
    result = run_vertex_evaluation(eval_dataset)

    print("Evaluation completed. Check Vertex AI console for detailed results.")
    return result

# MAIN EXECUTION
if __name__ == "__main__":
    print("LLM Testing Framework")
    print("=" * 50)

    # Run all test suites
    print("\n1. Running Classification Tests...")
    # Uncomment to run: run_classification_tests()

    print("\n2. Running Generation Tests...")
    # Uncomment to run: run_generation_tests()

    print("\n3. Running Comprehensive Evaluation...")
    # Uncomment to run: run_comprehensive_evaluation()

    print("\nAll tests completed!")

# Run in notebook - uncomment these lines to execute
# unittest.main(argv=[''], verbosity=2, exit=False)

LLM Testing Framework

1. Running Classification Tests...

2. Running Generation Tests...

3. Running Comprehensive Evaluation...

All tests completed!
