jwlai-cloud · codegen-sh · Jun 23, 2025
diff --git a/multi_agent_document_translator/.env.example b/multi_agent_document_translator/.env.example
@@ -0,0 +1,28 @@
+# Google Cloud Configuration
+GOOGLE_CLOUD_PROJECT=your-project-id
+GOOGLE_APPLICATION_CREDENTIALS=path/to/your/service-account.json
+GEMINI_API_KEY=your-gemini-api-key
+
+# Agent Configuration
+MAX_CONCURRENT_AGENTS=3
+AGENT_TIMEOUT=300
+
+# Document Processing
+MAX_FILE_SIZE_MB=50
+IMAGE_DPI=300
+IMAGE_FORMAT=PNG
+
+# Translation Settings
+DEFAULT_SOURCE_LANGUAGE=auto
+MIN_TRANSLATION_CONFIDENCE=0.8
+LAYOUT_SIMILARITY_THRESHOLD=0.85
+
+# Paths
+TEMP_DIR=temp
+OUTPUT_DIR=output
+LOGS_DIR=logs
+
+# API Configuration
+API_HOST=0.0.0.0
+API_PORT=8000
+
diff --git a/multi_agent_document_translator/README.md b/multi_agent_document_translator/README.md
@@ -0,0 +1,59 @@
+# Multi-Agent Document Translation App
+
+A sophisticated document translation system that preserves layout integrity using Google's Agent Development Kit (ADK) and A2A protocol.
+
+## Overview
+
+This application solves the common problem of layout destruction in document translation by using a multi-agent architecture where each agent performs specialized tasks while maintaining the original document's visual integrity.
+
+## Architecture
+
+### Agent 1: Document-to-Image Converter
+- Converts PDF pages to high-quality images
+- Maintains original resolution and formatting
+- Handles various document formats
+
+### Agent 2: Multimodal Translation Agent
+- Uses Google Gemini Vision API for image-based translation
+- Preserves layout, fonts, and visual elements
+- Translates text while maintaining spatial relationships
+
+### Agent 3: Quality Validation Agent
+- Validates translation accuracy
+- Checks layout preservation
+- Provides quality metrics and feedback
+
+## Features
+
+- **Layout Preservation**: Maintains original document formatting
+- **Multi-format Support**: PDF, DOCX, and other document formats
+- **Quality Assurance**: Built-in validation and quality checks
+- **Scalable Architecture**: Agent-based system for easy extension
+- **Google AI Integration**: Leverages Gemini's multimodal capabilities
+
+## Installation
+
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+```python
+from multi_agent_translator import MultiAgentTranslator
+
+translator = MultiAgentTranslator()
+result = translator.translate_document(
+    document_path="input.pdf",
+    target_language="Spanish",
+    output_path="translated_output.pdf"
+)
+```
+
+## Requirements
+
+- Python 3.8+
+- Google Cloud credentials
+- Gemini API access
+- Required Python packages (see requirements.txt)
+
diff --git a/multi_agent_document_translator/__init__.py b/multi_agent_document_translator/__init__.py
@@ -0,0 +1,25 @@
+"""Multi-Agent Document Translation System."""
+
+from .orchestrator import orchestrator, TranslationOrchestrator
+from .config import settings
+from .agents import (
+    BaseAgent,
+    DocumentConverterAgent,
+    TranslationAgent,
+    ValidationAgent
+)
+
+__version__ = "1.0.0"
+__author__ = "Multi-Agent Translation Team"
+__description__ = "AI-powered document translation with layout preservation using Google's ADK and A2A protocol"
+
+__all__ = [
+    'orchestrator',
+    'TranslationOrchestrator',
+    'settings',
+    'BaseAgent',
+    'DocumentConverterAgent', 
+    'TranslationAgent',
+    'ValidationAgent'
+]
+
diff --git a/multi_agent_document_translator/agents/__init__.py b/multi_agent_document_translator/agents/__init__.py
@@ -0,0 +1,16 @@
+"""Multi-Agent Document Translation System - Agents Module."""
+
+from .base_agent import BaseAgent, AgentMessage, AgentResult
+from .document_converter_agent import DocumentConverterAgent
+from .translation_agent import TranslationAgent
+from .validation_agent import ValidationAgent
+
+__all__ = [
+    'BaseAgent',
+    'AgentMessage', 
+    'AgentResult',
+    'DocumentConverterAgent',
+    'TranslationAgent',
+    'ValidationAgent'
+]
+
diff --git a/multi_agent_document_translator/agents/base_agent.py b/multi_agent_document_translator/agents/base_agent.py
@@ -0,0 +1,135 @@
+"""Base agent class for the multi-agent document translation system."""
+
+import asyncio
+import logging
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Optional
+from datetime import datetime
+from pathlib import Path
+
+from pydantic import BaseModel
+
+
+class AgentMessage(BaseModel):
+    """Message structure for agent communication."""
+
+    agent_id: str
+    message_type: str
+    content: Dict[str, Any]
+    timestamp: datetime
+    correlation_id: Optional[str] = None
+
+
+class AgentResult(BaseModel):
+    """Result structure for agent operations."""
+
+    success: bool
+    data: Optional[Dict[str, Any]] = None
+    error: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+    processing_time: Optional[float] = None
+
+
+class BaseAgent(ABC):
+    """Base class for all agents in the system."""
+
+    def __init__(self, agent_id: str, config: Dict[str, Any]):
+        self.agent_id = agent_id
+        self.config = config
+        self.logger = self._setup_logger()
+        self.is_running = False
+        self.message_queue = asyncio.Queue()
+
+    def _setup_logger(self) -> logging.Logger:
+        """Set up logger for the agent."""
+        logger = logging.getLogger(f"agent.{self.agent_id}")
+        logger.setLevel(logging.INFO)
+
+        # Create logs directory if it doesn't exist
+        logs_dir = Path(self.config.get("logs_dir", "logs"))
+        logs_dir.mkdir(exist_ok=True)
+
+        # File handler
+        file_handler = logging.FileHandler(logs_dir / f"{self.agent_id}.log")
+        file_handler.setLevel(logging.INFO)
+
+        # Console handler
+        console_handler = logging.StreamHandler()
+        console_handler.setLevel(logging.INFO)
+
+        # Formatter
+        formatter = logging.Formatter(
+            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+        )
+        file_handler.setFormatter(formatter)
+        console_handler.setFormatter(formatter)
+
+        logger.addHandler(file_handler)
+        logger.addHandler(console_handler)
+
+        return logger
+
+    async def start(self):
+        """Start the agent."""
+        self.logger.info(f"Starting agent {self.agent_id}")
+        self.is_running = True
+        await self._initialize()
+
+    async def stop(self):
+        """Stop the agent."""
+        self.logger.info(f"Stopping agent {self.agent_id}")
+        self.is_running = False
+        await self._cleanup()
+
+    async def send_message(self, target_agent: str, message_type: str, content: Dict[str, Any], correlation_id: Optional[str] = None):
+        """Send a message to another agent."""
+        message = AgentMessage(
+            agent_id=self.agent_id,
+            message_type=message_type,
+            content=content,
+            timestamp=datetime.now(),
+            correlation_id=correlation_id
+        )
+
+        self.logger.info(f"Sending message to {target_agent}: {message_type}")
+        # In a real implementation, this would use the A2A protocol
+        # For now, we'll use a simple message passing mechanism
+
+    async def receive_message(self) -> Optional[AgentMessage]:
+        """Receive a message from the queue."""
+        try:
+            message = await asyncio.wait_for(self.message_queue.get(), timeout=1.0)
+            self.logger.info(f"Received message: {message.message_type} from {message.agent_id}")
+            return message
+        except asyncio.TimeoutError:
+            return None
+
+    @abstractmethod
+    async def process(self, input_data: Dict[str, Any]) -> AgentResult:
+        """Process input data and return result."""
+        pass
+
+    @abstractmethod
+    async def _initialize(self):
+        """Initialize agent-specific resources."""
+        pass
+
+    @abstractmethod
+    async def _cleanup(self):
+        """Clean up agent-specific resources."""
+        pass
+
+    def _create_temp_dir(self) -> Path:
+        """Create a temporary directory for this agent."""
+        temp_dir = Path(self.config.get("temp_dir", "temp")) / self.agent_id
+        temp_dir.mkdir(parents=True, exist_ok=True)
+        return temp_dir
+
+    def _validate_input(self, input_data: Dict[str, Any], required_fields: list) -> bool:
+        """Validate input data contains required fields."""
+        for field in required_fields:
+            if field not in input_data:
+                self.logger.error(f"Missing required field: {field}")
+                return False
+        return True
+