# GAIA Agent Checker
## Design and test agent architecture

**Objective:** Build 4-agent system with proper GAIA formatting  
**Output:** Working agents with routing and format compliance

---

In [None]:
# Setup and imports
import json
import re
import random
from typing import Dict, List, Optional

print("ü§ñ GAIA Agent Checker")
print("=" * 40)
print("Goal: Design and test agent architecture")
print("=" * 40)

# Section 4: Agent Architecture Design

In [None]:
def design_agent_architecture():
    """Design 4 specialized agents based on GAIA analysis"""
    
    agents = {
        "data_analyst": {
            "role": "Handle calculations, spreadsheets, math",
            "tools": ["calculator", "excel_reader", "csv_processor"],
            "triggers": ["calculate", "total", "average", "sum", ".xlsx", ".csv"],
            "priority": "HIGH - handles most file + math tasks"
        },
        
        "web_researcher": {
            "role": "Search web, find information online",
            "tools": ["web_search", "wikipedia", "google_search"],
            "triggers": ["search", "according to", "website", "find information"],
            "priority": "HIGH - handles most web research"
        },
        
        "document_reader": {
            "role": "Read PDFs, images, text files",
            "tools": ["pdf_reader", "ocr", "text_extractor"],
            "triggers": [".pdf", ".png", ".jpg", ".txt", ".docx"],
            "priority": "MEDIUM - handles document processing"
        },
        
        "general_helper": {
            "role": "Everything else, reasoning, fallback",
            "tools": ["basic_reasoning"],
            "triggers": ["explain", "why", "how", "default"],
            "priority": "MEDIUM - handles reasoning tasks"
        }
    }
    
    print("üèóÔ∏è  Agent Architecture:")
    for name, spec in agents.items():
        print(f"\n  ü§ñ {name.upper()}")
        print(f"     Role: {spec['role']}")
        print(f"     Tools: {', '.join(spec['tools'])}")
        print(f"     Triggers: {', '.join(spec['triggers'][:3])}...")
        print(f"     Priority: {spec['priority']}")
    
    return agents

def create_routing_logic():
    """Simple but effective agent routing"""
    
    def route_to_agent(question, file_attachment=None):
        """Route question to appropriate agent"""
        
        question = question.lower()
        
        # File-based routing (highest priority)
        if file_attachment:
            ext = file_attachment.lower()
            if '.xlsx' in ext or '.csv' in ext:
                return "data_analyst"
            elif '.pdf' in ext or '.png' in ext or '.jpg' in ext:
                return "document_reader"
        
        # Keyword-based routing
        if any(word in question for word in ['calculate', 'total', 'sum', 'average']):
            return "data_analyst"
        elif any(word in question for word in ['search', 'find', 'website', 'according']):
            return "web_researcher"
        elif any(word in question for word in ['read', 'extract', 'text']):
            return "document_reader"
        else:
            return "general_helper"  # Default fallback
    
    print("üîÄ Routing Logic:")
    print("  1. Check file extension ‚Üí route to appropriate agent")
    print("  2. Check keywords ‚Üí route based on content")
    print("  3. Default ‚Üí general_helper")
    print("  4. Simple and effective!")
    
    return route_to_agent

# Design the architecture
agents = design_agent_architecture()
route_question = create_routing_logic()

print("\n‚úÖ Agent architecture designed!")

# Section 5: System Prompts & Configuration

In [None]:
def create_agent_prompts():
    """Create GAIA-compliant system prompts"""
    
    # Base GAIA prompt (mandatory format)
    base_prompt = """You are a general AI assistant. Report your thoughts, and finish with: FINAL ANSWER: [YOUR FINAL ANSWER].
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list.
- Numbers: no commas, no units ($ %) unless specified
- Strings: no articles (the, a, an), no abbreviations, digits as text unless specified  
- Lists: apply above rules to each element

Think through the problem step by step."""

    # Agent-specific prompts
    agent_prompts = {
        "data_analyst": base_prompt + "\n\nYou specialize in calculations, spreadsheets, and data analysis. Use mathematical reasoning and precise calculations.",
        "web_researcher": base_prompt + "\n\nYou specialize in finding information online and web research. Search thoroughly and verify facts.",
        "document_reader": base_prompt + "\n\nYou specialize in reading and extracting information from documents, PDFs, and images.",
        "general_helper": base_prompt + "\n\nYou handle general questions and reasoning tasks with clear logical thinking."
    }
    
    print("üìã GAIA-Compliant System Prompts:")
    for agent, prompt in agent_prompts.items():
        print(f"  ‚îú‚îÄ‚îÄ {agent}: {len(prompt)} chars")
    
    # Save prompts for later use
    with open('gaia_agent_prompts.json', 'w') as f:
        json.dump(agent_prompts, f, indent=2)
    
    print("üíæ Saved to gaia_agent_prompts.json")
    
    return agent_prompts

def create_agent_config():
    """Create complete agent configuration"""
    
    config = {
        "model_settings": {
            "temperature": 0.1,  # Low for consistency
            "max_tokens": 1000,
            "top_p": 0.9
        },
        "tools_priority": {
            "essential": ["web_search", "calculator", "file_reader"],
            "important": ["excel_processor", "pdf_reader", "image_ocr"],
            "optional": ["wikipedia", "speech_recognition"]
        },
        "routing_weights": {
            "file_extension": 0.8,  # Highest priority
            "keywords": 0.6,
            "rag_similarity": 0.4,
            "default_fallback": 0.2
        }
    }
    
    print("‚öôÔ∏è  Agent Configuration:")
    print(f"  ‚îú‚îÄ‚îÄ Model temp: {config['model_settings']['temperature']}")
    print(f"  ‚îú‚îÄ‚îÄ Max tokens: {config['model_settings']['max_tokens']}")
    print(f"  ‚îú‚îÄ‚îÄ Essential tools: {len(config['tools_priority']['essential'])}")
    print(f"  ‚îî‚îÄ‚îÄ Routing weights configured")
    
    return config

# Create prompts and config
system_prompts = create_agent_prompts()
agent_config = create_agent_config()

# Section 6: GAIA Format Testing & Compliance

In [None]:
def create_gaia_formatter():
    """Create GAIA format compliance checker"""
    
    def clean_gaia_answer(text):
        """Clean answer according to GAIA rules"""
        
        # Extract final answer if present
        if "FINAL ANSWER:" in text:
            text = text.split("FINAL ANSWER:")[-1].strip()
        
        # Remove articles (the, a, an)
        text = re.sub(r'\b(the|a|an)\b\s*', '', text, flags=re.IGNORECASE)
        
        # Remove commas from numbers
        text = re.sub(r'(\d),(\d)', r'\1\2', text)
        
        # Handle currency and percentages (remove unless specified)
        # This is simplified - real implementation needs context awareness
        text = text.replace(', '').replace('%', '')
        
        # Clean extra whitespace
        text = re.sub(r'\s+', ' ', text).strip()
        
        return text
    
    def validate_gaia_format(response):
        """Validate response follows GAIA format"""
        
        errors = []
        
        # Check for FINAL ANSWER
        if "FINAL ANSWER:" not in response:
            errors.append("Missing 'FINAL ANSWER:' prefix")
        
        # Extract and check final answer
        if "FINAL ANSWER:" in response:
            answer = response.split("FINAL ANSWER:")[-1].strip()
            
            # Check for common violations
            if re.search(r'\b(the|a|an)\b', answer, re.IGNORECASE):
                errors.append("Contains articles (the, a, an)")
            
            if re.search(r'\d,\d', answer):
                errors.append("Contains commas in numbers")
            
            if len(answer.split()) > 10:  # Arbitrary threshold
                errors.append("Answer too verbose - should be as few words as possible")
        
        return len(errors) == 0, errors
    
    return clean_gaia_answer, validate_gaia_format

def test_gaia_compliance():
    """Test GAIA formatting with examples"""
    
    clean_answer, validate_format = create_gaia_formatter()
    
    test_cases = [
        # (input, expected_output, should_pass)
        ("The answer is 1,234", "1234", True),
        ("Total: $25.50", "25.50", True),  
        ("The city is Paris", "Paris", True),
        ("Cities: New York, Boston", "New York,Boston", True),
        ("I think the answer is definitely 42", "I think answer is definitely 42", False),  # Too verbose
    ]
    
    print("üß™ GAIA Compliance Testing:")
    print("=" * 50)
    
    passed = 0
    total = len(test_cases)
    
    for i, (input_text, expected, should_pass) in enumerate(test_cases, 1):
        # Test cleaning
        cleaned = clean_answer(input_text)
        clean_match = cleaned == expected
        
        # Test validation
        full_response = f"Let me think... FINAL ANSWER: {input_text}"
        is_valid, errors = validate_format(full_response)
        
        # Overall pass
        test_passed = clean_match and (is_valid == should_pass)
        if test_passed:
            passed += 1
        
        status = "‚úÖ" if test_passed else "‚ùå"
        print(f"{i}. {status} '{input_text}'")
        print(f"   Cleaned: '{cleaned}' (expected: '{expected}')")
        print(f"   Valid: {is_valid} | Errors: {errors if errors else 'None'}")
        print()
    
    print(f"üìä Compliance Results: {passed}/{total} ({passed/total*100:.0f}%) passed")
    
    return clean_answer, validate_format

# Test GAIA compliance
gaia_cleaner, gaia_validator = test_gaia_compliance()

In [None]:
def create_mock_agent_responses():
    """Create mock responses to test agent routing and formatting"""
    
    test_questions = [
        {
            "question": "Calculate 15% of 2500",
            "file": None,
            "expected_agent": "data_analyst",
            "expected_answer": "375"
        },
        {
            "question": "What is the population of Tokyo in 2023?",
            "file": None,
            "expected_agent": "web_researcher",
            "expected_answer": "37194000"
        },
        {
            "question": "Extract the total from this spreadsheet",
            "file": "data.xlsx",
            "expected_agent": "data_analyst",
            "expected_answer": "1250"
        },
        {
            "question": "Read the text in this PDF",
            "file": "document.pdf",
            "expected_agent": "document_reader",
            "expected_answer": "extracted text content"
        },
        {
            "question": "Explain the concept of entropy",
            "file": None,
            "expected_agent": "general_helper",
            "expected_answer": "measure of disorder in system"
        }
    ]
    
    def mock_agent_response(agent_type, question, expected_answer):
        """Generate mock response for testing"""
        reasoning = f"I am the {agent_type} agent. Let me process this question: {question[:50]}..."
        
        if agent_type == "data_analyst":
            reasoning += " I'll use mathematical calculations to solve this."
        elif agent_type == "web_researcher":
            reasoning += " I'll search for the most current information online."
        elif agent_type == "document_reader":
            reasoning += " I'll extract and process the document content."
        else:
            reasoning += " I'll use general reasoning to explain this concept."
        
        return f"{reasoning}\n\nFINAL ANSWER: {expected_answer}"
    
    print("üîÑ Testing Agent Routing & Responses:")
    print("=" * 50)
    
    routing_correct = 0
    format_correct = 0
    total_tests = len(test_questions)
    
    for i, test in enumerate(test_questions, 1):
        # Test routing
        selected_agent = route_question(test["question"], test["file"])
        routing_ok = selected_agent == test["expected_agent"]
        if routing_ok:
            routing_correct += 1
        
        # Generate mock response
        response = mock_agent_response(selected_agent, test["question"], test["expected_answer"])
        
        # Test GAIA formatting
        is_valid, errors = gaia_validator(response)
        if is_valid:
            format_correct += 1
        
        # Results
        route_status = "‚úÖ" if routing_ok else "‚ùå"
        format_status = "‚úÖ" if is_valid else "‚ùå"
        
        print(f"{i}. Q: '{test['question'][:40]}...'")  
        print(f"   File: {test['file'] or 'None'}")
        print(f"   {route_status} Routing: {selected_agent} (expected: {test['expected_agent']})")
        print(f"   {format_status} Format: {'Valid' if is_valid else f'Errors: {errors}'}")
        print(f"   Response: {response[-50:]}...")  # Show end of response
        print()
    
    print(f"üìä Test Results:")
    print(f"  ‚îú‚îÄ‚îÄ Routing: {routing_correct}/{total_tests} ({routing_correct/total_tests*100:.0f}%)")
    print(f"  ‚îú‚îÄ‚îÄ Format: {format_correct}/{total_tests} ({format_correct/total_tests*100:.0f}%)")
    print(f"  ‚îî‚îÄ‚îÄ Overall: {min(routing_correct, format_correct)}/{total_tests} ({min(routing_correct, format_correct)/total_tests*100:.0f}%)")
    
    return routing_correct == total_tests and format_correct == total_tests

# Run comprehensive testing
all_tests_passed = create_mock_agent_responses()

print("\n" + "="*50)
print("üéâ AGENT CHECKER COMPLETE!")
print("="*50)
print("‚úÖ 4-agent architecture designed")
print("‚úÖ GAIA-compliant system prompts created")
print("‚úÖ Format compliance tested")
print("‚úÖ Routing logic validated")
print(f"‚úÖ All tests passed: {all_tests_passed}")
print("="*50)