Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions multi_agent_document_translator/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Google Cloud Configuration
GOOGLE_CLOUD_PROJECT=your-project-id
GOOGLE_APPLICATION_CREDENTIALS=path/to/your/service-account.json
GEMINI_API_KEY=your-gemini-api-key

# Agent Configuration
MAX_CONCURRENT_AGENTS=3
AGENT_TIMEOUT=300

# Document Processing
MAX_FILE_SIZE_MB=50
IMAGE_DPI=300
IMAGE_FORMAT=PNG

# Translation Settings
DEFAULT_SOURCE_LANGUAGE=auto
MIN_TRANSLATION_CONFIDENCE=0.8
LAYOUT_SIMILARITY_THRESHOLD=0.85

# Paths
TEMP_DIR=temp
OUTPUT_DIR=output
LOGS_DIR=logs

# API Configuration
API_HOST=0.0.0.0
API_PORT=8000

59 changes: 59 additions & 0 deletions multi_agent_document_translator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Multi-Agent Document Translation App

A sophisticated document translation system that preserves layout integrity using Google's Agent Development Kit (ADK) and A2A protocol.

## Overview

This application solves the common problem of layout destruction in document translation by using a multi-agent architecture where each agent performs specialized tasks while maintaining the original document's visual integrity.

## Architecture

### Agent 1: Document-to-Image Converter
- Converts PDF pages to high-quality images
- Maintains original resolution and formatting
- Handles various document formats

### Agent 2: Multimodal Translation Agent
- Uses Google Gemini Vision API for image-based translation
- Preserves layout, fonts, and visual elements
- Translates text while maintaining spatial relationships

### Agent 3: Quality Validation Agent
- Validates translation accuracy
- Checks layout preservation
- Provides quality metrics and feedback

## Features

- **Layout Preservation**: Maintains original document formatting
- **Multi-format Support**: PDF, DOCX, and other document formats
- **Quality Assurance**: Built-in validation and quality checks
- **Scalable Architecture**: Agent-based system for easy extension
- **Google AI Integration**: Leverages Gemini's multimodal capabilities

## Installation

```bash
pip install -r requirements.txt
```

## Usage

```python
from multi_agent_translator import MultiAgentTranslator

translator = MultiAgentTranslator()
result = translator.translate_document(
document_path="input.pdf",
target_language="Spanish",
output_path="translated_output.pdf"
)
```

## Requirements

- Python 3.8+
- Google Cloud credentials
- Gemini API access
- Required Python packages (see requirements.txt)

25 changes: 25 additions & 0 deletions multi_agent_document_translator/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Multi-Agent Document Translation System."""

from .orchestrator import orchestrator, TranslationOrchestrator
from .config import settings
from .agents import (
BaseAgent,
DocumentConverterAgent,
TranslationAgent,
ValidationAgent
)

__version__ = "1.0.0"
__author__ = "Multi-Agent Translation Team"
__description__ = "AI-powered document translation with layout preservation using Google's ADK and A2A protocol"

__all__ = [
'orchestrator',
'TranslationOrchestrator',
'settings',
'BaseAgent',
'DocumentConverterAgent',
'TranslationAgent',
'ValidationAgent'
]

16 changes: 16 additions & 0 deletions multi_agent_document_translator/agents/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""Multi-Agent Document Translation System - Agents Module."""

from .base_agent import BaseAgent, AgentMessage, AgentResult
from .document_converter_agent import DocumentConverterAgent
from .translation_agent import TranslationAgent
from .validation_agent import ValidationAgent

__all__ = [
'BaseAgent',
'AgentMessage',
'AgentResult',
'DocumentConverterAgent',
'TranslationAgent',
'ValidationAgent'
]

135 changes: 135 additions & 0 deletions multi_agent_document_translator/agents/base_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
"""Base agent class for the multi-agent document translation system."""

import asyncio
import logging
from abc import ABC, abstractmethod
from typing import Any, Dict, Optional
from datetime import datetime
from pathlib import Path

from pydantic import BaseModel


class AgentMessage(BaseModel):
"""Message structure for agent communication."""

agent_id: str
message_type: str
content: Dict[str, Any]
timestamp: datetime
correlation_id: Optional[str] = None


class AgentResult(BaseModel):
"""Result structure for agent operations."""

success: bool
data: Optional[Dict[str, Any]] = None
error: Optional[str] = None
metadata: Optional[Dict[str, Any]] = None
processing_time: Optional[float] = None


class BaseAgent(ABC):
"""Base class for all agents in the system."""

def __init__(self, agent_id: str, config: Dict[str, Any]):
self.agent_id = agent_id
self.config = config
self.logger = self._setup_logger()
self.is_running = False
self.message_queue = asyncio.Queue()

def _setup_logger(self) -> logging.Logger:
"""Set up logger for the agent."""
logger = logging.getLogger(f"agent.{self.agent_id}")
logger.setLevel(logging.INFO)

# Create logs directory if it doesn't exist
logs_dir = Path(self.config.get("logs_dir", "logs"))
logs_dir.mkdir(exist_ok=True)

# File handler
file_handler = logging.FileHandler(logs_dir / f"{self.agent_id}.log")
file_handler.setLevel(logging.INFO)

# Console handler
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.INFO)

# Formatter
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)

logger.addHandler(file_handler)
logger.addHandler(console_handler)

return logger

async def start(self):
"""Start the agent."""
self.logger.info(f"Starting agent {self.agent_id}")
self.is_running = True
await self._initialize()

async def stop(self):
"""Stop the agent."""
self.logger.info(f"Stopping agent {self.agent_id}")
self.is_running = False
await self._cleanup()

async def send_message(self, target_agent: str, message_type: str, content: Dict[str, Any], correlation_id: Optional[str] = None):
"""Send a message to another agent."""
message = AgentMessage(
agent_id=self.agent_id,
message_type=message_type,
content=content,
timestamp=datetime.now(),
correlation_id=correlation_id
)

self.logger.info(f"Sending message to {target_agent}: {message_type}")
# In a real implementation, this would use the A2A protocol
# For now, we'll use a simple message passing mechanism

async def receive_message(self) -> Optional[AgentMessage]:
"""Receive a message from the queue."""
try:
message = await asyncio.wait_for(self.message_queue.get(), timeout=1.0)
self.logger.info(f"Received message: {message.message_type} from {message.agent_id}")
return message
except asyncio.TimeoutError:
return None

@abstractmethod
async def process(self, input_data: Dict[str, Any]) -> AgentResult:
"""Process input data and return result."""
pass

@abstractmethod
async def _initialize(self):
"""Initialize agent-specific resources."""
pass

@abstractmethod
async def _cleanup(self):
"""Clean up agent-specific resources."""
pass

def _create_temp_dir(self) -> Path:
"""Create a temporary directory for this agent."""
temp_dir = Path(self.config.get("temp_dir", "temp")) / self.agent_id
temp_dir.mkdir(parents=True, exist_ok=True)
return temp_dir

def _validate_input(self, input_data: Dict[str, Any], required_fields: list) -> bool:
"""Validate input data contains required fields."""
for field in required_fields:
if field not in input_data:
self.logger.error(f"Missing required field: {field}")
return False
return True

Loading