In [None]:
# 필수 패키지 설치
# pip install python-pptx openai python-dotenv nest_asyncio

import os
import asyncio
import json
import re
from typing import List, Dict, Optional, Any
from pathlib import Path

from pptx import Presentation
from pptx.enum.shapes import MSO_SHAPE_TYPE
from openai import AsyncAzureOpenAI
from dotenv import load_dotenv
import nest_asyncio

# Jupyter에서 asyncio 실행을 위한 설정
nest_asyncio.apply()

# 환경 변수 로드
load_dotenv()

In [None]:
class AzureOpenAITranslator:
    """Azure OpenAI를 사용한 비동기 번역 엔진 (최적화됨)"""
    
    def __init__(
        self,
        api_key: str = None,
        api_version: str = "2024-08-01-preview",
        azure_endpoint: str = None,
        deployment_name: str = "gpt-4o",
        temperature: float = 0.1,
        max_tokens: int = 4000
    ):
        self.api_key = api_key or os.getenv("AZURE_OPENAI_API_KEY")
        self.azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
        self.deployment_name = deployment_name
        self.temperature = temperature
        self.max_tokens = max_tokens
        
        # 비동기 클라이언트 초기화
        self.client = AsyncAzureOpenAI(
            api_key=self.api_key,
            api_version=api_version,
            azure_endpoint=self.azure_endpoint
        )
    
    async def translate_batch(
        self,
        texts: List[str],
        target_language: str = "ko",
        enable_polishing: bool = True,
        context: str = ""
    ) -> List[str]:
        """텍스트 배치 비동기 번역 (재시도 로직 포함)"""
        if not texts:
            return []
        
        prompt = self._create_translation_prompt(texts, target_language, enable_polishing, context)
        
        retries = 3
        for attempt in range(retries):
            try:
                response = await self.client.chat.completions.create(
                    model=self.deployment_name,
                    messages=[
                        {
                            "role": "system",
                            "content": "You are a professional translator specializing in PowerPoint presentations. You preserve formatting, technical terms, and tone accurately."
                        },
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ],
                    temperature=self.temperature,
                    max_tokens=self.max_tokens,
                    response_format={ "type": "json_object" }  # JSON 모드 강제
                )
                
                content = response.choices[0].message.content
                result = json.loads(content)
                translated_texts = result.get("translations", [])
                
                if len(translated_texts) == len(texts):
                    return translated_texts
                else:
                    print(f"⚠️ 번역 개수 불일치 (요청: {len(texts)}, 응답: {len(translated_texts)}). 재시도 중...")
            
            except Exception as e:
                print(f"❌ 번역 오류 (시도 {attempt+1}/{retries}): {str(e)}")
                if attempt == retries - 1:
                    return texts  # 최후의 수단으로 원본 반환
                await asyncio.sleep(2 ** attempt)  # Exponential Backoff
        
        return texts
    
    def _create_translation_prompt(
        self,
        texts: List[str],
        target_language: str,
        enable_polishing: bool,
        context: str
    ) -> str:
        language_map = {
            "ko": "Korean", "en": "English", "ja": "Japanese",
            "zh": "Chinese (Simplified)", "es": "Spanish"
        }
        target_lang = language_map.get(target_language, target_language)
        
        instruction = f"Translate the following texts to {target_lang}."
        if context:
            instruction += f"\nContext: {context}"
            
        return json.dumps({
            "instruction": instruction,
            "requirements": [
                "Return a JSON object with a key 'translations' containing the list of translated strings.",
                "Maintain the exact order.",
                "Preserve all formatting codes and special characters.",
                "Use professional and natural phrasing."
            ],
            "inputs": texts
        }, ensure_ascii=False)

In [None]:
class PowerPointTranslator:
    """PowerPoint 번역 핸들러 (그룹/표 지원 및 병렬 처리)"""
    
    def __init__(self, translator: AzureOpenAITranslator):
        self.translator = translator
        self.font_map = {
            "ko": "맑은 고딕", "ja": "Yu Gothic UI", "en": "Arial",
            "zh": "Microsoft YaHei"
        }
    
    def _get_all_shapes(self, shapes):
        """그룹 포함 모든 도형 재귀 추출"""
        all_shapes = []
        for shape in shapes:
            if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
                all_shapes.extend(self._get_all_shapes(shape.shapes))
            else:
                all_shapes.append(shape)
        return all_shapes

    def extract_texts_from_slide(self, slide) -> List[Dict]:
        """슬라이드에서 텍스트 추출 (표, 그룹 지원)"""
        texts_data = []
        shapes = self._get_all_shapes(slide.shapes)
        
        for shape in shapes:
            # 1. 일반 텍스트 상자
            if hasattr(shape, "text_frame") and shape.text_frame:
                for paragraph in shape.text_frame.paragraphs:
                    for run in paragraph.runs:
                        if run.text.strip():
                            texts_data.append({
                                "run": run,
                                "text": run.text,
                                "type": "text"
                            })
            
            # 2. 표 (Table)
            if shape.has_table:
                for row in shape.table.rows:
                    for cell in row.cells:
                        if cell.text_frame:
                            for paragraph in cell.text_frame.paragraphs:
                                for run in paragraph.runs:
                                    if run.text.strip():
                                        texts_data.append({
                                            "run": run,
                                            "text": run.text,
                                            "type": "table"
                                        })
        return texts_data
    
    async def process_slide(
        self, 
        slide, 
        slide_idx: int, 
        target_language: str, 
        batch_size: int,
        semaphore: asyncio.Semaphore
    ):
        """단일 슬라이드 처리 (세마포어로 동시성 제어)"""
        async with semaphore:
            print(f"Processing Slide {slide_idx}...")
            texts_data = self.extract_texts_from_slide(slide)
            
            if not texts_data:
                return
            
            texts_to_translate = [item["text"] for item in texts_data]
            
            # 배치 처리
            for i in range(0, len(texts_to_translate), batch_size):
                batch = texts_to_translate[i:i + batch_size]
                translated_batch = await self.translator.translate_batch(
                    batch,
                    target_language=target_language,
                    context=f"Slide {slide_idx} content"
                )
                
                # 결과 적용
                for j, translated_text in enumerate(translated_batch):
                    idx = i + j
                    if idx < len(texts_data):
                        item = texts_data[idx]
                        item["run"].text = translated_text
                        
                        # 폰트 변경
                        target_font = self.font_map.get(target_language, "Arial")
                        item["run"].font.name = target_font
                        # 동아시아 폰트 설정 (한글/중국어 등)
                        if hasattr(item["run"].font, "_element"):
                            rPr = item["run"].font._element.rPr
                            if rPr is not None:
                                ea = rPr.find("{http://schemas.openxmlformats.org/drawingml/2006/main}ea")
                                if ea is not None:
                                    ea.set("typeface", target_font)

    async def translate_presentation(
        self,
        input_path: str,
        output_path: str,
        target_language: str = "ko",
        concurrency: int = 5
    ):
        prs = Presentation(input_path)
        total_slides = len(prs.slides)
        print(f"총 슬라이드: {total_slides}, 동시 처리: {concurrency}")
        
        semaphore = asyncio.Semaphore(concurrency)
        tasks = []
        
        for idx, slide in enumerate(prs.slides, 1):
            tasks.append(
                self.process_slide(slide, idx, target_language, 20, semaphore)
            )
            
        await asyncio.gather(*tasks)
        
        prs.save(output_path)
        print(f"\n✅ 번역 완료: {output_path}")

In [None]:
# ===========================
# 실행 (비동기)
# ===========================

async def main():
    # 설정
    AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
    AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
    DEPLOYMENT_NAME = "gpt-4o" # 고성능 모델 권장
    
    translator = AzureOpenAITranslator(
        api_key=AZURE_OPENAI_API_KEY,
        azure_endpoint=AZURE_OPENAI_ENDPOINT,
        deployment_name=DEPLOYMENT_NAME
    )
    
    ppt_translator = PowerPointTranslator(translator)
    
    input_file = "ai_idea.pptx"
    output_file = "ai_idea_ko_optimized.pptx"
    
    if os.path.exists(input_file):
        await ppt_translator.translate_presentation(
            input_path=input_file,
            output_path=output_file,
            target_language="ko",
            concurrency=5  # 5개 슬라이드 동시 처리
        )
    else:
        print(f"파일을 찾을 수 없습니다: {input_file}")

# 실행
if __name__ == "__main__":
    await main()