In [None]:
import os
import time
from dotenv import load_dotenv
import azure.cognitiveservices.speech as speechsdk
from azure.ai.translation.text import TextTranslationClient
from azure.core.credentials import AzureKeyCredential
from pydub import AudioSegment
from datetime import datetime

# 환경변수 로드
load_dotenv()

# Azure 설정
speech_api_key = os.getenv("SPEECH_API_KEY")
region = os.getenv("SPEECH_REGION")
translator_api_key = os.getenv("TRANSLATOR_API_KEY")
translator_region = os.getenv("TRANSLATOR_REGION")
translator_endpoint = os.getenv("TRANSLATOR_ENDPOINT")

# FFmpeg 경로 설정
AudioSegment.converter = r"C:\\ffmpeg\\bin\\ffmpeg.exe"
AudioSegment.ffprobe = r"C:\\ffmpeg\\bin\\ffprobe.exe"

# ==================== 언어 설정 ====================
# 여기서 입력 언어와 출력 언어를 설정하세요
INPUT_LANGUAGE = "한국어"      # STT 입력 언어
OUTPUT_LANGUAGE = "영어"       # 번역 출력 + TTS 출력 언어

# 언어 매핑 테이블
LANGUAGE_MAPPING = {
    # 언어명: (STT코드, 번역코드, TTS음성)
    "한국어": ("ko-KR", "ko", "ko-KR-SunHiNeural"),
    "영어": ("en-US", "en", "en-US-AvaMultilingualNeural"),
    "중국어(간체)": ("zh-CN", "zh-Hans", "zh-CN-XiaoxiaoNeural"),
    "일본어": ("ja-JP", "ja", "ja-JP-NanamiNeural"),
    "독일어": ("de-DE", "de", "de-DE-KatjaNeural"),
    "프랑스어": ("fr-FR", "fr", "fr-FR-DeniseNeural"),
    "스페인어": ("es-ES", "es", "es-ES-ElviraNeural"),
    "이탈리아어": ("it-IT", "it", "it-IT-IsabellaNeural"),
    "러시아어": ("ru-RU", "ru", "ru-RU-DariyaNeural"),
    "포르투갈어": ("pt-BR", "pt", "pt-BR-FranciscaNeural"),
    "중국어(번체)": ("zh-HK", "zh-Hant", "zh-HK-HiuGaaiNeural"),
    "네덜란드어": ("nl-NL", "nl", "nl-NL-FennaNeural"),
    "폴란드어": ("pl-PL", "pl", "pl-PL-AgnieszkaNeural"),
    "스웨덴어": ("sv-SE", "sv", "sv-SE-SofieNeural"),
    "덴마크어": ("da-DK", "da", "da-DK-ChristelNeural"),
    "핀란드어": ("fi-FI", "fi", "fi-FI-SelmaNeural"),
    "노르웨이어": ("nb-NO", "nb", "nb-NO-PernilleNeural"),
    "체코어": ("cs-CZ", "cs", "cs-CZ-VlastaNeural"),
    "헝가리어": ("hu-HU", "hu", "hu-HU-NoemiNeural"),
    "아랍어": ("ar-SA", "ar", "ar-SA-ZariyahNeural"),
    "힌디어": ("hi-IN", "hi", "hi-IN-SwaraNeural"),
    "터키어": ("tr-TR", "tr", "tr-TR-EmelNeural"),
    "우크라이나어": ("uk-UA", "uk", "uk-UA-PolinaNeural"),
}

# 설정한 언어에서 코드 추출
INPUT_STT_CODE, INPUT_TRANSLATE_CODE, INPUT_TTS_VOICE = LANGUAGE_MAPPING[INPUT_LANGUAGE]
OUTPUT_STT_CODE, OUTPUT_TRANSLATE_CODE, OUTPUT_TTS_VOICE = LANGUAGE_MAPPING[OUTPUT_LANGUAGE]

print(f"🎤 입력 언어: {INPUT_LANGUAGE} ({INPUT_STT_CODE})")
print(f"🔄 출력 언어: {OUTPUT_LANGUAGE} ({OUTPUT_TRANSLATE_CODE})")
print("=" * 50)

# ==================== STT 클래스 ====================
class AzureSTT:
    def __init__(self):
        self.speech_config = speechsdk.SpeechConfig(subscription=speech_api_key, region=region)
        self.speech_config.speech_recognition_language = INPUT_STT_CODE
        self.recognizer = None
        self.is_listening = False
        self.text_updater = TextUpdater()
    
    def start_continuous_recognition(self):
        if self.is_listening:
            return
        
        self.text_updater.clear()
        
        audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
        self.recognizer = speechsdk.SpeechRecognizer(self.speech_config, audio_config)
        
        def recognized_handler(evt):
            if evt.result.reason == speechsdk.ResultReason.RecognizedSpeech and evt.result.text.strip():
                self.text_updater.update_text(evt.result.text.strip(), is_partial=False)
                print(f"✅ STT 인식: {evt.result.text.strip()}")
        
        def recognizing_handler(evt):
            if evt.result.reason == speechsdk.ResultReason.RecognizingSpeech and evt.result.text.strip():
                self.text_updater.update_text(f"인식중... {evt.result.text.strip()}", is_partial=True)
        
        self.recognizer.recognized.connect(recognized_handler)
        self.recognizer.recognizing.connect(recognizing_handler)
        
        self.recognizer.start_continuous_recognition()
        self.is_listening = True
        print(f"🎤 {INPUT_LANGUAGE} 실시간 음성 인식 시작")
    
    def stop_continuous_recognition(self):
        if not self.is_listening or self.recognizer is None:
            return
        
        self.recognizer.stop_continuous_recognition()
        self.is_listening = False
        self.recognizer = None
        print("🛑 음성 인식 중지")
    
    def get_current_text(self):
        return self.text_updater.get_full_text()
    
    def clear_text(self):
        self.text_updater.clear()

class TextUpdater:
    def __init__(self):
        self.text = ""
        self.partial = ""
    
    def update_text(self, new_text, is_partial=False):
        if is_partial:
            self.partial = new_text
        else:
            self.text += new_text + " "
            self.partial = ""
    
    def get_full_text(self):
        if self.partial:
            return f"{self.text}\n\n🎤 {self.partial}"
        return self.text
    
    def clear(self):
        self.text = ""
        self.partial = ""

# ==================== 번역 클래스 ====================
class AzureTranslator:
    def __init__(self):
        self.credential = AzureKeyCredential(translator_api_key)
        self.translator = TextTranslationClient(
            credential=self.credential,
            endpoint=translator_endpoint,
            region=translator_region
        )
    
    def translate_text(self, text, target_language=OUTPUT_TRANSLATE_CODE, source_language=None):
        try:
            if source_language is None:
                response = self.translator.translate(
                    body=[{"text": text}],
                    to_language=[target_language]
                )
            else:
                response = self.translator.translate(
                    body=[{"text": text}],
                    to_language=[target_language],
                    from_language=source_language
                )
            
            translation_result = response[0]
            translated_text = translation_result.translations[0].text
            
            detected_language = None
            confidence = None
            if hasattr(translation_result, 'detected_language') and translation_result.detected_language:
                detected_language = translation_result.detected_language.language
                confidence = translation_result.detected_language.score
            
            return {
                "original_text": text,
                "translated_text": translated_text,
                "target_language": target_language,
                "detected_language": detected_language,
                "confidence": confidence
            }
        except Exception as e:
            print(f"❌ 번역 오류: {e}")
            return {"original_text": text, "translated_text": None, "error": str(e)}

# ==================== TTS 클래스 ====================
class AzureTTS:
    def __init__(self):
        self.speech_config = speechsdk.SpeechConfig(subscription=speech_api_key, region=region)
        self.speech_config.speech_synthesis_voice_name = OUTPUT_TTS_VOICE
        self.synthesizer = None
    
    def speak_text(self, text, output_method="speaker"):
        if not text.strip():
            print("❌ TTS할 텍스트가 없습니다.")
            return None
        
        try:
            if output_method == "speaker":
                return self._speak_to_speaker(text)
            elif output_method == "file":
                return self._speak_to_file(text)
        except Exception as e:
            print(f"❌ TTS 오류: {e}")
            return None
    
    def _speak_to_speaker(self, text):
        print(f"🔊 [{OUTPUT_LANGUAGE}] TTS 출력: {text}")
        
        audio_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
        self.synthesizer = speechsdk.SpeechSynthesizer(self.speech_config, audio_config)
        
        result = self.synthesizer.speak_text_async(text).get()
        
        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            print("✅ TTS 출력 완료")
            return True
        else:
            print(f"❌ TTS 실패: {result.cancellation_details.reason}")
            return False
    
    def _speak_to_file(self, text):
        now = datetime.now().strftime("%y%m%d_%H%M%S")
        filename = f"tts_output_{now}.wav"
        
        print(f"💾 [{OUTPUT_LANGUAGE}] TTS 파일 저장: {filename}")
        
        audio_config = speechsdk.audio.AudioOutputConfig(filename=filename)
        self.synthesizer = speechsdk.SpeechSynthesizer(self.speech_config, audio_config)
        
        result = self.synthesizer.speak_text_async(text).get()
        
        if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted:
            print(f"✅ 파일 저장 완료: {filename}")
            return filename
        else:
            print(f"❌ 파일 저장 실패: {result.cancellation_details.reason}")
            return None

# ==================== 통합 파이프라인 클래스 ====================
class STTTranslateTTSPipeline:
    def __init__(self):
        self.stt = AzureSTT()
        self.translator = AzureTranslator()
        self.tts = AzureTTS()
    
    def start_realtime_pipeline(self, auto_translate=True, auto_tts=True, tts_method="speaker"):
        """실시간 STT → 번역 → TTS 파이프라인"""
        print("🚀 실시간 파이프라인 시작")
        print(f"   입력: {INPUT_LANGUAGE} → 출력: {OUTPUT_LANGUAGE}")
        print("   'q' + Enter로 종료\n")
        
        self.stt.start_continuous_recognition()
        
        try:
            while True:
                user_input = input()
                if user_input.lower() == 'q':
                    break
                
                # 현재 인식된 텍스트 가져오기
                current_text = self.stt.get_current_text()
                if not current_text or "🎤" in current_text:
                    continue
                
                print(f"\n📝 원본 텍스트: {current_text}")
                
                # 자동 번역
                if auto_translate and current_text.strip():
                    result = self.translator.translate_text(current_text)
                    if result.get("translated_text"):
                        translated = result["translated_text"]
                        detected = result.get("detected_language", "unknown")
                        print(f"🔄 번역 결과: {translated} (감지언어: {detected})")
                        
                        # 자동 TTS
                        if auto_tts:
                            self.tts.speak_text(translated, tts_method)
                
                self.stt.clear_text()
                print("-" * 50)
                
        except KeyboardInterrupt:
            print("\n사용자가 중단했습니다.")
        finally:
            self.stt.stop_continuous_recognition()
    
    def process_file(self, file_path, tts_method="speaker"):
        """파일에서 STT → 번역 → TTS 처리"""
        print(f"📁 파일 처리: {file_path}")
        
        # 파일 STT 처리
        recognized_text = self._recognize_file(file_path)
        if not recognized_text:
            print("❌ 파일 인식 실패")
            return
        
        print(f"📝 원본 텍스트: {recognized_text}")
        
        # 번역
        result = self.translator.translate_text(recognized_text)
        if result.get("translated_text"):
            translated = result["translated_text"]
            print(f"🔄 번역 결과: {translated}")
            
            # TTS 출력
            self.tts.speak_text(translated, tts_method)
        else:
            print("❌ 번역 실패")
    
    def _recognize_file(self, file_path):
        """파일 음성 인식"""
        if not os.path.exists(file_path):
            return None
        
        # 파일 형식 변환 (필요시)
        file_ext = os.path.splitext(file_path)[1].lower()
        if file_ext not in [".wav", ".pcm", ".wave", ".flac"]:
            audio_path = "temp_converted.wav"
            sound = AudioSegment.from_file(file_path)
            sound = sound.set_channels(1).set_frame_rate(16000)
            sound.export(audio_path, format="wav")
        else:
            audio_path = file_path
        
        # 음성 인식
        speech_config = speechsdk.SpeechConfig(subscription=speech_api_key, region=region)
        speech_config.speech_recognition_language = INPUT_STT_CODE
        audio_config = speechsdk.audio.AudioConfig(filename=audio_path)
        recognizer = speechsdk.SpeechRecognizer(speech_config, audio_config)
        
        result = recognizer.recognize_once()
        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
            return result.text
        return None

# ==================== 메인 실행 ====================
if __name__ == "__main__":
    # 파이프라인 생성
    pipeline = STTTranslateTTSPipeline()
    
    print("🎯 Azure STT → 번역 → TTS 통합 시스템")
    print("=" * 50)
    print("1. 실시간 처리 (마이크)")
    print("2. 파일 처리")
    print("0. 종료")
    
    while True:
        choice = input("\n선택하세요 > ").strip()
        
        if choice == "1":
            # 실시간 파이프라인
            pipeline.start_realtime_pipeline(
                auto_translate=True, 
                auto_tts=True, 
                tts_method="speaker"  # 또는 "file"
            )
            
        elif choice == "2":
            # 파일 처리
            file_path = input("오디오 파일 경로 입력 > ").strip()
            pipeline.process_file(file_path, tts_method="speaker")
            
        elif choice == "0":
            print("프로그램을 종료합니다.")
            break
            
        else:
            print("❌ 잘못된 선택입니다.")
