<a href="https://colab.research.google.com/github/rashmisingh100-dev/Project-X/blob/main/GenAIOps_Framework_Module1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#GenAI Ops Framework - Module 1: Foundation
#This module builds the core GenAI components
print("GenAIOps Framework - Starting Setup")
print("=" * 50)

GenAIOps Framework - Starting Setup


In [2]:
#Create Visual Directory Setup
import os
import json
from pathlib import Path

In [3]:
#Setup directories
base_dir= Path('/content/genaiops')
base_dir.mkdir(exist_ok=True)

In [4]:
#create subsidirectories
(base_dir/'prompts').mkdir(exist_ok=True)
(base_dir / 'models').mkdir(exist_ok=True)
(base_dir / 'evaluations').mkdir(exist_ok=True)
(base_dir / 'logs').mkdir(exist_ok=True)

In [5]:
print("‚úÖ Environment ready!")
print(f"üìÅ Base directory: {base_dir}")
print()
print("üëâ You can now run the rest of the notebook")

‚úÖ Environment ready!
üìÅ Base directory: /content/genaiops

üëâ You can now run the rest of the notebook


In [6]:
#verify setup worked
import os
print("üîç Verifying GenAIOps directory structure...")
print()

for folder in ['prompts', 'models', 'evaluations', 'logs']:
    path = f'/content/genaiops/{folder}'
    exists = os.path.exists(path)
    status = "‚úÖ" if exists else "‚ùå"
    print(f"{status} {path}")

üîç Verifying GenAIOps directory structure...

‚úÖ /content/genaiops/prompts
‚úÖ /content/genaiops/models
‚úÖ /content/genaiops/evaluations
‚úÖ /content/genaiops/logs


In [7]:
# ========================================
# COMPONENT 1: Prompt Management System
# ========================================
print("üìù Building Prompt Management System...")
print()
#Define Prompt Template for Customer Support
customer_support_prompt_v1 = """
You are a helpful customer service representative for Prudential Financial.

Customer Question:{customer_question}

Instructions:
- Be professional and empathetic
- Provide accurate information about policies, only factual and grounded answer with no hallucination
- If you don't know the answer, say so clearly
- Keep response under 150 words
- Include next steps when applicable
- Professional yet conversational tone
- Include 2-3 specific next steps
- Offer specialist escalation if complex

Safety Rules:
- Never provide medical advice
- Never make financial predictions
- Don't discuss other customers
- Escalate legal questions to compliance team
Response:"""


üìù Building Prompt Management System...



In [8]:
# Save this prompt to our prompts directory
prompt_file_path = '/content/genaiops/prompts/customer_support_v1.0.txt'

with open(prompt_file_path, 'w') as f:
    f.write(customer_support_prompt_v1)

print(f"‚úÖ Prompt saved to: {prompt_file_path}")
print()
print("üìÑ Prompt content:")
print("-" * 50)
print(customer_support_prompt_v1)


‚úÖ Prompt saved to: /content/genaiops/prompts/customer_support_v1.0.txt

üìÑ Prompt content:
--------------------------------------------------

You are a helpful customer service representative for Prudential Financial.

Customer Question:{customer_question}

Instructions:
- Be professional and empathetic
- Provide accurate information about policies, only factual and grounded answer with no hallucination
- If you don't know the answer, say so clearly
- Keep response under 150 words
- Include next steps when applicable
- Professional yet conversational tone
- Include 2-3 specific next steps
- Offer specialist escalation if complex

Safety Rules:
- Never provide medical advice
- Never make financial predictions
- Don't discuss other customers
- Escalate legal questions to compliance team
Response:


In [9]:
#Prompt Metadata (Governance)
import json
from datetime import datetime

# Create metadata for our prompt
prompt_metadata = {
    "prompt_id": "customer_support_v1.0",
    "version": "1.0",
    "created_date": datetime.now().strftime("%Y-%m-%d"),
    "created_by": "Rashmi Singh",
    "status": "approved",
    "use_case": "Customer service chatbot",
    "model_compatibility": ["gemini-1.5-pro", "gemini-1.5-flash"],
    "approved_by": "Data & AI COE (Group)",
    "approval_date": "2024-02-14",
    "description": "Professional customer service prompt with empathy and accuracy focus",
    "test_pass_rate": 0.95,  # 95% of test cases passed
    "production_apps": ["CustomerSupportBot", "EmailAutomation"]
}

# Save metadata as JSON
metadata_file = '/content/genaiops/prompts/customer_support_v1.0_metadata.json'

with open(metadata_file, 'w') as f:
    json.dump(prompt_metadata, f, indent=2)

print("‚úÖ Prompt metadata saved")
print()
print("üìã Metadata:")
print(json.dumps(prompt_metadata, indent=2))

‚úÖ Prompt metadata saved

üìã Metadata:
{
  "prompt_id": "customer_support_v1.0",
  "version": "1.0",
  "created_date": "2026-02-15",
  "created_by": "Rashmi Singh",
  "status": "approved",
  "use_case": "Customer service chatbot",
  "model_compatibility": [
    "gemini-1.5-pro",
    "gemini-1.5-flash"
  ],
  "approved_by": "Data & AI COE (Group)",
  "approval_date": "2024-02-14",
  "description": "Professional customer service prompt with empathy and accuracy focus",
  "test_pass_rate": 0.95,
  "production_apps": [
    "CustomerSupportBot",
    "EmailAutomation"
  ]
}


In [10]:
#Prompt Loader Function

def load_prompt(prompt_id, version="latest"):
    """
    Load a prompt template by ID and version

    Args:
        prompt_id: Name of the prompt (e.g., 'customer_support')
        version: Version number (e.g., '1.0') or 'latest'

    Returns:
        dict with 'template' and 'metadata'
    """

    # Construct file paths
    if version == "latest":
        # In real system, would query database for latest version
        # For now, we'll use v1.0
        version = "1.0"

    prompt_file = f'/content/genaiops/prompts/{prompt_id}_v{version}.txt'
    metadata_file = f'/content/genaiops/prompts/{prompt_id}_v{version}_metadata.json'

    # Load prompt template
    try:
        with open(prompt_file, 'r') as f:
            template = f.read()
    except FileNotFoundError:
        return {"error": f"Prompt {prompt_id} v{version} not found"}

    # Load metadata
    try:
        with open(metadata_file, 'r') as f:
            metadata = json.load(f)
    except FileNotFoundError:
        metadata = {"warning": "No metadata found"}

    return {
        "template": template,
        "metadata": metadata
    }


# Test the loader
print("üß™ Testing prompt loader...")
print()

result = load_prompt("customer_support", version="1.0")

print("‚úÖ Prompt loaded successfully!")
print()
print("üìÑ Template:")
print(result['template'][:200] + "...")  # First 200 chars
print()
print("üìã Metadata:")
print(f"  Version: {result['metadata']['version']}")
print(f"  Status: {result['metadata']['status']}")
print(f"  Use Case: {result['metadata']['use_case']}")

üß™ Testing prompt loader...

‚úÖ Prompt loaded successfully!

üìÑ Template:

You are a helpful customer service representative for Prudential Financial.

Customer Question:{customer_question}

Instructions:
- Be professional and empathetic
- Provide accurate information about...

üìã Metadata:
  Version: 1.0
  Status: approved
  Use Case: Customer service chatbot


In [11]:
#Prompt Version Comparison Tool
# Create an improved version (v1.1)
customer_support_prompt_v1_1 = """
You are an empathetic customer service representative for Prudential Financial with deep knowledge of our insurance products and policies.

Customer Profile:
- Name: {customer_name}
- Policy Type: {policy_type}
- Customer Since: {customer_since}

Customer Question:
{customer_question}

Instructions:
- Address customer by name to personalize the response
- Be professional, empathetic, and solution-oriented
- Reference their specific policy type when relevant
- Provide accurate information about Prudential policies
- If you don't know the answer, be honest and offer to connect them with a specialist
- Keep response under 150 words
- Always include clear next steps
- End with "Is there anything else I can help you with today?"
- Professional yet conversational tone
- Include 2-3 specific next steps
- Offer specialist escalation if complex

Safety Rules:
- Never provide medical advice
- Never make financial predictions
- Don't discuss other customers
- Escalate legal questions to compliance team

Response:
"""

# Save v1.1
prompt_v1_1_path = '/content/genaiops/prompts/customer_support_v1.1.txt'
with open(prompt_v1_1_path, 'w') as f:
    f.write(customer_support_prompt_v1_1)

# Create metadata for v1.1
metadata_v1_1 = {
    "prompt_id": "customer_support_v1.1",
    "version": "1.1",
    "created_date": datetime.now().strftime("%Y-%m-%d"),
    "created_by": "Rashmi Singh",
    "status": "testing",  # Not yet approved for production
    "use_case": "Customer service chatbot",
    "model_compatibility": ["gemini-1.5-pro", "gemini-1.5-flash"],
    "description": "Enhanced with personalization and policy-type awareness",
    "improvements_over_v1.0": [
        "Personalization with customer name",
        "Policy-type specific responses",
        "Customer tenure awareness",
        "Standardized closing question"
    ],
    "test_pass_rate": None,  # Not yet tested
    "production_apps": []  # Not yet deployed
}

metadata_v1_1_path = '/content/genaiops/prompts/customer_support_v1.1_metadata.json'
with open(metadata_v1_1_path, 'w') as f:
    json.dump(metadata_v1_1, f, indent=2)

print("‚úÖ Created prompt v1.1 (improved version)")
print()
print("üÜö Comparing v1.0 vs v1.1:")
print("-" * 60)
print("v1.0 (Production):")
print("  - Generic customer addressing")
print("  - No personalization")
print("  - Status: Approved ‚úÖ")
print()
print("v1.1 (Testing):")
print("  - Personalized with customer name")
print("  - Policy-type aware")
print("  - Customer tenure aware")
print("  - Standardized closing")
print("  - Status: Testing üß™")
print()
print("üìä Next step: A/B testing to compare quality")

‚úÖ Created prompt v1.1 (improved version)

üÜö Comparing v1.0 vs v1.1:
------------------------------------------------------------
v1.0 (Production):
  - Generic customer addressing
  - No personalization
  - Status: Approved ‚úÖ

v1.1 (Testing):
  - Personalized with customer name
  - Policy-type aware
  - Customer tenure aware
  - Standardized closing
  - Status: Testing üß™

üìä Next step: A/B testing to compare quality


In [16]:
# ========================================
# FEATURE 1: A/B Testing Framework
# ========================================

print("üî¨ Building A/B Testing Framework for Prompts...")
print()

import hashlib
import random

class ABTestManager:
    """
    Manages A/B tests for prompt versions
    """

    def __init__(self):
        self.active_tests = {}
        self.test_results = {}

    def create_ab_test(self, test_id, prompt_id, variant_a_version,
                       variant_b_version, traffic_split=0.5):
        """
        Create a new A/B test

        Args:
            test_id: Unique test identifier
            prompt_id: Which prompt to test
            variant_a_version: Control version (e.g., "1.0")
            variant_b_version: Treatment version (e.g., "1.1")
            traffic_split: % of traffic to variant B (0.0 to 1.0)
        """

        self.active_tests[test_id] = {
            "test_id": test_id,
            "prompt_id": prompt_id,
            "variant_a": {
                "version": variant_a_version,
                "traffic": 1 - traffic_split,
                "requests": 0,
                "label": "Control (A)"
            },
            "variant_b": {
                "version": variant_b_version,
                "traffic": traffic_split,
                "requests": 0,
                "label": "Treatment (B)"
            },
            "status": "active",
            "created_date": "2024-02-14",
            "total_requests": 0
        }

        # Save test configuration
        test_file = f'/content/genaiops/prompts/ab_test_{test_id}.json'
        with open(test_file, 'w') as f:
            json.dump(self.active_tests[test_id], f, indent=2)

        print(f"‚úÖ A/B Test Created: {test_id}")
        print(f"   Prompt: {prompt_id}")
        print(f"   Variant A (Control): v{variant_a_version} - {(1-traffic_split)*100:.0f}% traffic")
        print(f"   Variant B (Treatment): v{variant_b_version} - {traffic_split*100:.0f}% traffic")

        return self.active_tests[test_id]

    def assign_variant(self, test_id, user_id):
        """
        Assign a user to variant A or B
        Uses consistent hashing so same user always gets same variant

        Args:
            test_id: Which test
            user_id: User identifier (email, customer ID, etc.)

        Returns:
            dict with assigned variant info
        """

        if test_id not in self.active_tests:
            return {"error": f"Test {test_id} not found"}

        test = self.active_tests[test_id]

        # Consistent hashing: same user_id always gets same variant
        hash_input = f"{test_id}:{user_id}".encode()
        hash_value = int(hashlib.md5(hash_input).hexdigest(), 16)
        user_hash = (hash_value % 100) / 100  # 0.00 to 0.99

        # Assign variant based on traffic split
        if user_hash < test["variant_b"]["traffic"]:
            assigned_variant = "B"
            version = test["variant_b"]["version"]
            test["variant_b"]["requests"] += 1
        else:
            assigned_variant = "A"
            version = test["variant_a"]["version"]
            test["variant_a"]["requests"] += 1

        test["total_requests"] += 1

        return {
            "test_id": test_id,
            "user_id": user_id,
            "assigned_variant": assigned_variant,
            "prompt_version": version,
            "prompt_id": test["prompt_id"]
        }

    def get_prompt_for_user(self, test_id, user_id):
        """
        Get the appropriate prompt version for a user in an A/B test

        Args:
            test_id: Which test
            user_id: User identifier

        Returns:
            Prompt template for the assigned variant
        """

        # Assign variant
        assignment = self.assign_variant(test_id, user_id)

        if "error" in assignment:
            return assignment

        # Load the appropriate prompt version
        prompt_id = assignment["prompt_id"]
        version = assignment["prompt_version"]

        prompt_data = load_prompt(prompt_id, version)

        return {
            "assignment": assignment,
            "prompt": prompt_data
        }

    def get_test_stats(self, test_id):
        """
        Get statistics for an A/B test
        """

        if test_id not in self.active_tests:
            return {"error": f"Test {test_id} not found"}

        test = self.active_tests[test_id]

        return {
            "test_id": test_id,
            "status": test["status"],
            "total_requests": test["total_requests"],
            "variant_a": {
                "version": test["variant_a"]["version"],
                "requests": test["variant_a"]["requests"],
                "percentage": (test["variant_a"]["requests"] / test["total_requests"] * 100)
                              if test["total_requests"] > 0 else 0
            },
            "variant_b": {
                "version": test["variant_b"]["version"],
                "requests": test["variant_b"]["requests"],
                "percentage": (test["variant_b"]["requests"] / test["total_requests"] * 100)
                              if test["total_requests"] > 0 else 0
            }
        }


# ========================================
# Test the A/B Testing Framework
# ========================================

print("\n" + "=" * 70)
print("üß™ Testing A/B Framework...")
print("=" * 70 + "\n")

# Create A/B test manager
ab_manager = ABTestManager()

# Create a test: 80% get v1.0, 20% get v1.1
test = ab_manager.create_ab_test(
    test_id="customer_support_feb_2024",
    prompt_id="customer_support",
    variant_a_version="1.0",  # Control (80%)
    variant_b_version="1.1",  # Treatment (20%)
    traffic_split=0.2         # 20% to variant B
)

print("\n" + "-" * 70)
print("üìä Simulating 100 User Requests...")
print("-" * 70 + "\n")

# Simulate 100 users
for i in range(100):
    user_id = f"user_{i}@prudential.com"
    result = ab_manager.get_prompt_for_user("customer_support_feb_2024", user_id)

# Get statistics
stats = ab_manager.get_test_stats("customer_support_feb_2024")

print("üìà A/B Test Results:")
print("-" * 70)
print(f"Test ID: {stats['test_id']}")
print(f"Status: {stats['status']}")
print(f"Total Requests: {stats['total_requests']}")
print()
print(f"Variant A (Control - v{stats['variant_a']['version']}):")
print(f"  Requests: {stats['variant_a']['requests']}")
print(f"  Percentage: {stats['variant_a']['percentage']:.1f}%")
print()
print(f"Variant B (Treatment - v{stats['variant_b']['version']}):")
print(f"  Requests: {stats['variant_b']['requests']}")
print(f"  Percentage: {stats['variant_b']['percentage']:.1f}%")
print()

# Demonstrate consistent hashing
print("-" * 70)
print("üîí Testing Consistent Hashing (same user = same variant)...")
print("-" * 70 + "\n")

test_user = "alice@prudential.com"
assignments = []

for i in range(5):
    result = ab_manager.assign_variant("customer_support_feb_2024", test_user)
    assignments.append(result["assigned_variant"])

print(f"User: {test_user}")
print(f"Assignment (5 requests): {assignments}")
print(f"‚úÖ All same variant: {len(set(assignments)) == 1}")

print("\n" + "=" * 70)
print("‚úÖ A/B Testing Framework Complete!")
print("=" * 70)

üî¨ Building A/B Testing Framework for Prompts...


üß™ Testing A/B Framework...

‚úÖ A/B Test Created: customer_support_feb_2024
   Prompt: customer_support
   Variant A (Control): v1.0 - 80% traffic
   Variant B (Treatment): v1.1 - 20% traffic

----------------------------------------------------------------------
üìä Simulating 100 User Requests...
----------------------------------------------------------------------

üìà A/B Test Results:
----------------------------------------------------------------------
Test ID: customer_support_feb_2024
Status: active
Total Requests: 100

Variant A (Control - v1.0):
  Requests: 82
  Percentage: 82.0%

Variant B (Treatment - v1.1):
  Requests: 18
  Percentage: 18.0%

----------------------------------------------------------------------
üîí Testing Consistent Hashing (same user = same variant)...
----------------------------------------------------------------------

User: alice@prudential.com
Assignment (5 requests): ['A', 'A', 'A', 'A'

In [12]:
# ========================================
# COMPONENT 2: Model Registry
# ========================================

print("ü§ñ Building Model Registry...")
print()

# Define our approved model catalog
model_catalog = {
    "gemini-1.5-pro": {
        "model_id": "gemini-1.5-pro",
        "display_name": "Gemini 1.5 Pro",
        "provider": "Google",
        "model_type": "foundation",
        "status": "approved",
        "tier": "premium",
        "capabilities": ["text-generation", "code-generation", "analysis"],
        "max_tokens": 2000000,  # 2M token context window
        "approved_date": "2024-11-01",
        "approved_by": "ML Governance Committee"
    },

    "gemini-1.5-flash": {
        "model_id": "gemini-1.5-flash",
        "display_name": "Gemini 1.5 Flash",
        "provider": "Google",
        "model_type": "foundation",
        "status": "approved",
        "tier": "standard",
        "capabilities": ["text-generation", "high-volume-tasks"],
        "max_tokens": 1000000,  # 1M token context window
        "approved_date": "2024-11-01",
        "approved_by": "ML Governance Committee"
    },

    "claude-3-opus": {
        "model_id": "claude-3-opus",
        "display_name": "Claude 3 Opus",
        "provider": "Anthropic",
        "model_type": "foundation",
        "status": "approved",
        "tier": "premium",
        "capabilities": ["text-generation", "analysis", "long-context"],
        "max_tokens": 200000,  # 200K token context
        "approved_date": "2024-10-15",
        "approved_by": "ML Governance Committee"
    },

    "customer-support-v1.2": {
        "model_id": "customer-support-v1.2",
        "display_name": "Customer Support Model v1.2",
        "provider": "Prudential (Fine-tuned Gemini)",
        "model_type": "fine-tuned",
        "base_model": "gemini-1.5-pro",
        "status": "approved",
        "tier": "custom",
        "capabilities": ["customer-service", "policy-questions"],
        "max_tokens": 2000000,
        "approved_date": "2024-11-15",
        "approved_by": "ML Governance Committee",
        "training_data": "gs://prudential-data/customer-support-conversations",
        "use_case_restriction": "customer-support-only"
    },

    "gpt-4": {
        "model_id": "gpt-4",
        "display_name": "GPT-4",
        "provider": "OpenAI",
        "model_type": "foundation",
        "status": "deprecated",
        "tier": "premium",
        "capabilities": ["text-generation"],
        "max_tokens": 128000,
        "deprecated_date": "2024-12-01",
        "deprecated_reason": "Security review failed - data residency concerns",
        "replacement_model": "gemini-1.5-pro"
    }
}

# Save catalog to file
catalog_file = '/content/genaiops/models/model_catalog.json'
with open(catalog_file, 'w') as f:
    json.dump(model_catalog, f, indent=2)

print(f"‚úÖ Model catalog created with {len(model_catalog)} models")
print()

# Display summary
print("üìä Model Summary:")
print("-" * 60)
for model_id, details in model_catalog.items():
    status_emoji = "‚úÖ" if details["status"] == "approved" else "‚ö†Ô∏è" if details["status"] == "testing" else "‚ùå"
    print(f"{status_emoji} {details['display_name']}")
    print(f"   Status: {details['status']} | Provider: {details['provider']} | Tier: {details['tier']}")
    print()

ü§ñ Building Model Registry...

‚úÖ Model catalog created with 5 models

üìä Model Summary:
------------------------------------------------------------
‚úÖ Gemini 1.5 Pro
   Status: approved | Provider: Google | Tier: premium

‚úÖ Gemini 1.5 Flash
   Status: approved | Provider: Google | Tier: standard

‚úÖ Claude 3 Opus
   Status: approved | Provider: Anthropic | Tier: premium

‚úÖ Customer Support Model v1.2
   Status: approved | Provider: Prudential (Fine-tuned Gemini) | Tier: custom

‚ùå GPT-4
   Status: deprecated | Provider: OpenAI | Tier: premium



In [13]:
# ========================================
# Add Cost Information to Model Registry
# ========================================

print("üí∞ Adding cost tracking to Model Registry...")
print()

# Cost data for each model (pricing per 1K tokens)
model_costs = {
    "gemini-1.5-pro": {
        "input_cost_per_1k": 0.00125,   # $0.00125 per 1K input tokens
        "output_cost_per_1k": 0.005,    # $0.005 per 1K output tokens
        "cost_tier": "premium",
        "notes": "Best quality, highest cost"
    },

    "gemini-1.5-flash": {
        "input_cost_per_1k": 0.000075,  # $0.000075 per 1K input tokens
        "output_cost_per_1k": 0.0003,   # $0.0003 per 1K output tokens
        "cost_tier": "budget",
        "notes": "Fast and cheap, good for high-volume"
    },

    "claude-3-opus": {
        "input_cost_per_1k": 0.015,     # $0.015 per 1K input tokens
        "output_cost_per_1k": 0.075,    # $0.075 per 1K output tokens
        "cost_tier": "premium-plus",
        "notes": "Most expensive, best for complex tasks"
    },

    "customer-support-v1.2": {
        "input_cost_per_1k": 0.00125,   # Same as base Gemini Pro
        "output_cost_per_1k": 0.005,
        "cost_tier": "premium",
        "training_cost": 850.00,        # One-time training cost
        "notes": "Fine-tuned model, training cost already paid"
    },

    "gpt-4": {
        "input_cost_per_1k": 0.03,      # Expensive (deprecated)
        "output_cost_per_1k": 0.06,
        "cost_tier": "deprecated",
        "notes": "Deprecated - do not use"
    }
}

# Save cost data
cost_file = '/content/genaiops/models/model_costs.json'
with open(cost_file, 'w') as f:
    json.dump(model_costs, f, indent=2)

print("‚úÖ Cost tracking added for all models")
print()

# Display cost comparison
print("üíµ Cost Comparison (per 1K tokens):")
print("-" * 80)
print(f"{'Model':<30} {'Input Cost':>12} {'Output Cost':>12} {'Tier':<15}")
print("-" * 80)

for model_id, costs in model_costs.items():
    if model_id in model_catalog and model_catalog[model_id]["status"] != "deprecated":
        input_cost = f"${costs['input_cost_per_1k']:.6f}"
        output_cost = f"${costs['output_cost_per_1k']:.6f}"
        tier = costs['cost_tier']

        model_name = model_catalog[model_id]["display_name"]
        print(f"{model_name:<30} {input_cost:>12} {output_cost:>12} {tier:<15}")

print()

# Calculate example: 1 million tokens
print("üìä Example Cost Calculation:")
print("Scenario: Process 1M input tokens + generate 500K output tokens")
print("-" * 80)

example_input_tokens = 1000000  # 1M tokens
example_output_tokens = 500000  # 500K tokens

for model_id in ["gemini-1.5-pro", "gemini-1.5-flash", "claude-3-opus"]:
    if model_id in model_costs:
        costs = model_costs[model_id]

        # Calculate cost
        input_cost = (example_input_tokens / 1000) * costs['input_cost_per_1k']
        output_cost = (example_output_tokens / 1000) * costs['output_cost_per_1k']
        total_cost = input_cost + output_cost

        model_name = model_catalog[model_id]["display_name"]
        print(f"{model_name:<30} Total Cost: ${total_cost:,.2f}")

print()
print("üí° Insight: Gemini Flash is 94% cheaper than Claude Opus for high-volume tasks!")

üí∞ Adding cost tracking to Model Registry...

‚úÖ Cost tracking added for all models

üíµ Cost Comparison (per 1K tokens):
--------------------------------------------------------------------------------
Model                            Input Cost  Output Cost Tier           
--------------------------------------------------------------------------------
Gemini 1.5 Pro                    $0.001250    $0.005000 premium        
Gemini 1.5 Flash                  $0.000075    $0.000300 budget         
Claude 3 Opus                     $0.015000    $0.075000 premium-plus   
Customer Support Model v1.2       $0.001250    $0.005000 premium        

üìä Example Cost Calculation:
Scenario: Process 1M input tokens + generate 500K output tokens
--------------------------------------------------------------------------------
Gemini 1.5 Pro                 Total Cost: $3.75
Gemini 1.5 Flash               Total Cost: $0.22
Claude 3 Opus                  Total Cost: $52.50

üí° Insight: Gemini 

In [14]:
# ========================================
# Model Loader Function
# ========================================

def load_model_info(model_id):
    """
    Load model information from registry

    Args:
        model_id: ID of the model (e.g., 'gemini-1.5-pro')

    Returns:
        dict with model details, costs, and approval status
    """

    # Check if model exists in catalog
    if model_id not in model_catalog:
        return {
            "error": f"Model '{model_id}' not found in registry",
            "available_models": list(model_catalog.keys())
        }

    # Get model details
    model_details = model_catalog[model_id]

    # Check if model is approved
    if model_details["status"] == "deprecated":
        return {
            "error": f"Model '{model_id}' is deprecated",
            "status": "deprecated",
            "deprecated_reason": model_details.get("deprecated_reason", "Unknown"),
            "replacement": model_details.get("replacement_model", "Contact ML team")
        }

    if model_details["status"] != "approved":
        return {
            "error": f"Model '{model_id}' is not approved for use",
            "status": model_details["status"],
            "message": "Only approved models can be used in production"
        }

    # Get cost information
    cost_info = model_costs.get(model_id, {
        "input_cost_per_1k": "Unknown",
        "output_cost_per_1k": "Unknown",
        "cost_tier": "Unknown"
    })

    # Combine all information
    return {
        "model_id": model_id,
        "details": model_details,
        "costs": cost_info,
        "status": "ready",
        "message": f"‚úÖ {model_details['display_name']} is approved and ready to use"
    }


# ========================================
# Test the Model Loader
# ========================================

print("üß™ Testing Model Loader Function...")
print()

# Test 1: Load approved model
print("Test 1: Load Gemini 1.5 Flash (approved)")
print("-" * 60)
result1 = load_model_info("gemini-1.5-flash")

if "error" not in result1:
    print(f"‚úÖ {result1['message']}")
    print(f"   Provider: {result1['details']['provider']}")
    print(f"   Tier: {result1['details']['tier']}")
    print(f"   Input Cost: ${result1['costs']['input_cost_per_1k']:.6f} per 1K tokens")
    print(f"   Output Cost: ${result1['costs']['output_cost_per_1k']:.6f} per 1K tokens")
else:
    print(f"‚ùå {result1['error']}")

print()

# Test 2: Try to load deprecated model
print("Test 2: Try to load GPT-4 (deprecated)")
print("-" * 60)
result2 = load_model_info("gpt-4")

if "error" in result2:
    print(f"‚ùå {result2['error']}")
    print(f"   Reason: {result2.get('deprecated_reason', 'N/A')}")
    print(f"   Use instead: {result2.get('replacement', 'N/A')}")
else:
    print(f"‚úÖ Model loaded")

print()

# Test 3: Try to load non-existent model
print("Test 3: Try to load non-existent model")
print("-" * 60)
result3 = load_model_info("gpt-5-turbo")

if "error" in result3:
    print(f"‚ùå {result3['error']}")
    print(f"   Available models: {', '.join(result3['available_models'][:3])}...")
else:
    print(f"‚úÖ Model loaded")

print()
print("=" * 60)
print("‚úÖ Model Loader function is working correctly!")

üß™ Testing Model Loader Function...

Test 1: Load Gemini 1.5 Flash (approved)
------------------------------------------------------------
‚úÖ ‚úÖ Gemini 1.5 Flash is approved and ready to use
   Provider: Google
   Tier: standard
   Input Cost: $0.000075 per 1K tokens
   Output Cost: $0.000300 per 1K tokens

Test 2: Try to load GPT-4 (deprecated)
------------------------------------------------------------
‚ùå Model 'gpt-4' is deprecated
   Reason: Security review failed - data residency concerns
   Use instead: gemini-1.5-pro

Test 3: Try to load non-existent model
------------------------------------------------------------
‚ùå Model 'gpt-5-turbo' not found in registry
   Available models: gemini-1.5-pro, gemini-1.5-flash, claude-3-opus...

‚úÖ Model Loader function is working correctly!


In [15]:
# ========================================
# Cost Calculator Function
# ========================================

def calculate_cost(model_id, input_tokens, output_tokens):
    """
    Calculate cost for using a specific model

    Args:
        model_id: ID of the model
        input_tokens: Number of input tokens
        output_tokens: Number of output tokens

    Returns:
        dict with cost breakdown
    """

    # Load model info first (includes validation)
    model_info = load_model_info(model_id)

    # Check if model can be used
    if "error" in model_info:
        return {
            "error": model_info["error"],
            "suggestion": model_info.get("replacement", "Choose an approved model")
        }

    # Get costs
    costs = model_info["costs"]

    # Calculate
    input_cost = (input_tokens / 1000) * costs["input_cost_per_1k"]
    output_cost = (output_tokens / 1000) * costs["output_cost_per_1k"]
    total_cost = input_cost + output_cost

    return {
        "model_id": model_id,
        "model_name": model_info["details"]["display_name"],
        "breakdown": {
            "input_tokens": input_tokens,
            "input_cost": input_cost,
            "output_tokens": output_tokens,
            "output_cost": output_cost,
            "total_cost": total_cost
        },
        "formatted": f"${total_cost:.4f}",
        "cost_tier": costs["cost_tier"]
    }


def compare_model_costs(input_tokens, output_tokens, models=None):
    """
    Compare costs across multiple models

    Args:
        input_tokens: Number of input tokens
        output_tokens: Number of output tokens
        models: List of model IDs to compare (default: all approved)

    Returns:
        list of cost comparisons, sorted by price
    """

    # Default to all approved models
    if models is None:
        models = [
            model_id for model_id, details in model_catalog.items()
            if details["status"] == "approved"
        ]

    # Calculate cost for each model
    comparisons = []
    for model_id in models:
        result = calculate_cost(model_id, input_tokens, output_tokens)
        if "error" not in result:
            comparisons.append(result)

    # Sort by cost (cheapest first)
    comparisons.sort(key=lambda x: x["breakdown"]["total_cost"])

    return comparisons


# ========================================
# Test Cost Calculator
# ========================================

print("üß™ Testing Cost Calculator...")
print()

# Test 1: Calculate cost for single model
print("Test 1: Cost for 10,000 customer support messages")
print("Assumptions: 100 input tokens + 150 output tokens per message")
print("-" * 70)

messages = 10000
input_per_message = 100
output_per_message = 150

total_input = messages * input_per_message   # 1M tokens
total_output = messages * output_per_message # 1.5M tokens

result = calculate_cost("gemini-1.5-flash", total_input, total_output)

if "error" not in result:
    print(f"Model: {result['model_name']}")
    print(f"  Input: {result['breakdown']['input_tokens']:,} tokens ‚Üí ${result['breakdown']['input_cost']:.2f}")
    print(f"  Output: {result['breakdown']['output_tokens']:,} tokens ‚Üí ${result['breakdown']['output_cost']:.2f}")
    print(f"  Total Cost: ${result['breakdown']['total_cost']:.2f}")
    print(f"  Cost per message: ${result['breakdown']['total_cost'] / messages:.4f}")

print()

# Test 2: Compare all approved models
print("Test 2: Compare costs across all approved models")
print(f"Scenario: {total_input:,} input tokens + {total_output:,} output tokens")
print("-" * 70)

comparisons = compare_model_costs(total_input, total_output)

print(f"{'Model':<30} {'Total Cost':>12} {'Cost Tier':<15}")
print("-" * 70)

for i, comp in enumerate(comparisons, 1):
    model_name = comp["model_name"]
    total_cost = comp["breakdown"]["total_cost"]
    tier = comp["cost_tier"]

    rank_emoji = "ü•á" if i == 1 else "ü•à" if i == 2 else "ü•â" if i == 3 else "  "
    print(f"{rank_emoji} {model_name:<28} ${total_cost:>10,.2f} {tier:<15}")

print()

# Calculate savings
if len(comparisons) > 1:
    cheapest = comparisons[0]["breakdown"]["total_cost"]
    most_expensive = comparisons[-1]["breakdown"]["total_cost"]
    savings = most_expensive - cheapest
    savings_percent = (savings / most_expensive) * 100

    print(f"üí° Insight:")
    print(f"   Cheapest: {comparisons[0]['model_name']} (${cheapest:,.2f})")
    print(f"   Most expensive: {comparisons[-1]['model_name']} (${most_expensive:,.2f})")
    print(f"   Potential savings: ${savings:,.2f} ({savings_percent:.1f}% cheaper)")

print()

# Test 3: Budget planning
print("Test 3: Budget Planning - What can I afford?")
print("-" * 70)

monthly_budget = 1000  # $1,000/month
messages_per_month = 50000

input_per_msg = 100
output_per_msg = 150

total_input_monthly = messages_per_month * input_per_msg
total_output_monthly = messages_per_month * output_per_msg

print(f"Budget: ${monthly_budget}/month")
print(f"Expected volume: {messages_per_month:,} messages/month")
print(f"Tokens per message: {input_per_msg} input + {output_per_msg} output")
print()

comparisons = compare_model_costs(total_input_monthly, total_output_monthly)

print(f"{'Model':<30} {'Monthly Cost':>12} {'Within Budget?':<15}")
print("-" * 70)

for comp in comparisons:
    model_name = comp["model_name"]
    monthly_cost = comp["breakdown"]["total_cost"]

    within_budget = monthly_cost <= monthly_budget
    status = "‚úÖ Yes" if within_budget else "‚ùå Over budget"

    print(f"{model_name:<30} ${monthly_cost:>10,.2f} {status:<15}")

print()
print("=" * 70)
print("‚úÖ Cost Calculator is working!")


üß™ Testing Cost Calculator...

Test 1: Cost for 10,000 customer support messages
Assumptions: 100 input tokens + 150 output tokens per message
----------------------------------------------------------------------
Model: Gemini 1.5 Flash
  Input: 1,000,000 tokens ‚Üí $0.07
  Output: 1,500,000 tokens ‚Üí $0.45
  Total Cost: $0.52
  Cost per message: $0.0001

Test 2: Compare costs across all approved models
Scenario: 1,000,000 input tokens + 1,500,000 output tokens
----------------------------------------------------------------------
Model                            Total Cost Cost Tier      
----------------------------------------------------------------------
ü•á Gemini 1.5 Flash             $      0.52 budget         
ü•à Gemini 1.5 Pro               $      8.75 premium        
ü•â Customer Support Model v1.2  $      8.75 premium        
   Claude 3 Opus                $    127.50 premium-plus   

üí° Insight:
   Cheapest: Gemini 1.5 Flash ($0.52)
   Most expensive: Claude 3 O