<a href="https://colab.research.google.com/github/mehdihoore/STTforPersian/blob/main/sttbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install telethon google-generativeai python-dotenv nest_asyncio Pillow

In [None]:
!pip install pydub

In [None]:
import os
import asyncio
import nest_asyncio
import datetime
from pathlib import Path
import logging
import re
import math
from telethon import TelegramClient, events, Button
import google.generativeai as genai
from google.colab import userdata
from pydub import AudioSegment  # New import for audio processing

# --- Configuration ---
try:
    from google.colab import userdata
    API_ID = int(userdata.get('TELEGRAM_API_ID'))
    API_HASH = userdata.get('TELEGRAM_API_HASH')
    BOT_TOKEN = userdata.get('TELEGRAM_BOT_TTS')
    GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')  # Main key
    GOOGLE_SUMMARY_API_KEY = userdata.get('GOOGLE_SUMMARY_API_KEY')  # Specific key for summaries

    if not all([API_ID, API_HASH, BOT_TOKEN, GOOGLE_API_KEY, GOOGLE_SUMMARY_API_KEY]):
        raise ValueError("One or more secrets are missing.")
    if GOOGLE_API_KEY == GOOGLE_SUMMARY_API_KEY:
        print("Main Google API Key and Summary API Key are the same. No key switching needed.")

except Exception as e:
    print(f"Error loading secrets: {e}")
    exit()

# Apply nest_asyncio
nest_asyncio.apply()

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- Initial Google AI SDK Configuration (with the MAIN key) ---
try:
    logger.info(f"Configuring Google AI SDK with MAIN API key ending with ...{GOOGLE_API_KEY[-4:]}")
    genai.configure(api_key=GOOGLE_API_KEY)
except Exception as e:
    logger.error(f"Error configuring Google AI SDK with main key: {e}")
    exit()

# Model Configuration
MODEL_CONFIG = {
    "text_model_name": "gemini-1.5-flash-latest",
    "multimodal_model_name": "gemini-1.5-flash-latest",
    "generation_config": {"temperature": 0.5},
    "summarization_generation_config": {"temperature": 0.6},
    "safety_settings": [
        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"},
    ]
}

# Telethon Client Initialization
session_name = f"bot_session_{BOT_TOKEN.split(':')[0]}"  # Ensure unique session name
client = TelegramClient(session_name, API_ID, API_HASH)
TEMP_DIR = Path("./temp_audio_telethon_bot")
TEMP_DIR.mkdir(exist_ok=True)

# --- Audio Processing Constants ---
MAX_DURATION_MINUTES = 30  # Maximum duration in minutes before splitting
MAX_DURATION_MS = MAX_DURATION_MINUTES * 60 * 1000  # Convert to milliseconds

# --- Helper Functions ---

def get_gemini_model_instance(model_name_key, custom_generation_config_key=None):
    """
    Creates and returns a Gemini model instance.
    ASSUMES genai IS ALREADY CONFIGURED with the correct API key FOR THIS CALL.
    """
    try:
        model_name_actual = MODEL_CONFIG[model_name_key]
        generation_config_actual = MODEL_CONFIG[custom_generation_config_key] if custom_generation_config_key else MODEL_CONFIG["generation_config"]

        model = genai.GenerativeModel(
            model_name=model_name_actual,
            generation_config=generation_config_actual,
            safety_settings=MODEL_CONFIG["safety_settings"]
        )
        logger.info(f"Created model instance for {model_name_actual} (current global API key is in use)")
        return model
    except Exception as e:
        logger.error(f"Error creating Gemini model {model_name_actual}: {e}")
        raise

async def cleanup_files(*files):
    for file_path in files:
        if file_path and Path(file_path).exists():
            try:
                Path(file_path).unlink()
                logger.info(f"Deleted temporary file: {file_path}")
            except OSError as e:
                logger.error(f"Error deleting file {file_path}: {e}")

def generate_srt_with_timecodes(segmented_text):
    lines = [line for line in segmented_text.split("\n") if line.strip()]
    if not lines:
        return "1\n00:00:00,000 --> 00:00:05,000\n(محتوایی برای زمان‌بندی وجود ندارد)\n"
    srt_content = []
    current_time_total_seconds = 0
    segment_duration_seconds = 5
    for i, line in enumerate(lines):
        start_seconds = current_time_total_seconds
        end_seconds = current_time_total_seconds + segment_duration_seconds
        def format_time(s):
            return f"{int(s // 3600):02}:{int(s % 3600 // 60):02}:{int(s % 60):02},{int((s % 1) * 1000):03}"
        srt_content.append(str(i + 1))
        srt_content.append(f"{format_time(start_seconds)} --> {format_time(end_seconds)}")
        srt_content.append(line)
        srt_content.append("")
        current_time_total_seconds = end_seconds
    return "\n".join(srt_content)

# --- New Audio Splitting Functions ---

async def get_audio_duration(file_path):
    """Get the duration of an audio file in milliseconds."""
    try:
        audio = AudioSegment.from_file(file_path)
        duration_ms = len(audio)
        logger.info(f"Audio duration: {duration_ms/1000:.2f} seconds ({duration_ms/60000:.2f} minutes)")
        return duration_ms
    except Exception as e:
        logger.error(f"Error getting audio duration: {e}", exc_info=True)
        raise

async def split_audio_file(file_path, base_name, max_duration_ms=MAX_DURATION_MS):
    """
    Split an audio file into chunks of max_duration_ms.
    Returns a list of paths to the split audio files.
    """
    try:
        audio = AudioSegment.from_file(file_path)
        total_duration_ms = len(audio)

        if total_duration_ms <= max_duration_ms:
            logger.info(f"Audio is shorter than {MAX_DURATION_MINUTES} minutes, no need to split")
            return [file_path]

        # Calculate number of chunks needed
        num_chunks = math.ceil(total_duration_ms / max_duration_ms)
        logger.info(f"Splitting audio into {num_chunks} chunks of {MAX_DURATION_MINUTES} minutes each")

        chunk_paths = []
        for i in range(num_chunks):
            start_ms = i * max_duration_ms
            end_ms = min((i + 1) * max_duration_ms, total_duration_ms)

            chunk = audio[start_ms:end_ms]
            chunk_filename = f"{base_name}_part{i+1}.ogg"
            chunk_path = TEMP_DIR / chunk_filename

            logger.info(f"Exporting chunk {i+1}/{num_chunks} to {chunk_path}")
            chunk.export(str(chunk_path), format="ogg")
            chunk_paths.append(str(chunk_path))

        logger.info(f"Successfully split audio into {len(chunk_paths)} chunks")
        return chunk_paths
    except Exception as e:
        logger.error(f"Error splitting audio file: {e}", exc_info=True)
        raise

# --- Google AI API Call Functions ---

async def transcribe_audio_google(file_path):
    logger.info(f"Transcribing audio file: {file_path}")
    google_audio_file_obj = None
    try:
        model = get_gemini_model_instance("multimodal_model_name")  # Assumes main key is active
        logger.info("Uploading audio file for transcription...")
        # upload_file is synchronous, so run in a thread
        google_audio_file_obj = await asyncio.to_thread(genai.upload_file, path=file_path)
        logger.info(f"Audio file uploaded: {google_audio_file_obj.name}")

        prompt = "Please transcribe the audio provided accurately. Return ONLY the plain text transcription."

        # Run the synchronous generate_content in a thread
        response = await asyncio.to_thread(
            model.generate_content,
            [prompt, google_audio_file_obj]  # Pass contents directly
        )

        transcription = response.text.strip()

        if not transcription:
             logger.warning("Transcription response was empty.")
             raise ValueError("Transcription failed: No text returned.")
        logger.info("Transcription successful.")
        return transcription, google_audio_file_obj
    except Exception as e:
        logger.error(f"Error during transcription: {e}", exc_info=True)
        raise

async def summarize_audio_google(audio_file_ref, transcription_context):
    logger.info("Summarizing audio content...")
    # --- API Key Switching Logic ---
    current_global_key_is_main = True  # Assume it's the main key initially

    if GOOGLE_API_KEY != GOOGLE_SUMMARY_API_KEY and GOOGLE_SUMMARY_API_KEY:
        try:
            logger.info(f"Temporarily configuring genai for GOOGLE_SUMMARY_API_KEY (ends ...{GOOGLE_SUMMARY_API_KEY[-4:]}) for summarization")
            genai.configure(api_key=GOOGLE_SUMMARY_API_KEY)
            current_global_key_is_main = False  # Now it's the summary key

            model = get_gemini_model_instance("multimodal_model_name", "summarization_generation_config")
            summary_prompt = """
شما یک دستیار متخصص در تحلیل و خلاصه‌سازی محتوای صوتی به زبان فارسی هستید.
فایل صوتی ارائه شده است. متن پیاده‌سازی شده اولیه آن نیز برای کمک به زمینه و کلمات کلیدی در زیر آمده است.
لطفاً این فایل صوتی را با دقت تحلیل کرده و یک خلاصه جامع و دقیق به زبان فارسی روان تهیه کنید که شامل موارد زیر باشد:

متن پیاده‌سازی شده اولیه (برای کمک به زمینه):
\"\"\"
{transcription_context}
\"\"\"

دستورالعمل‌های خلاصه‌سازی:
1.  **خلاصه کلی (۲-۳ پاراگراف):** موضوع اصلی و هدف، زمینه بحث، نتیجه‌گیری اصلی.
2.  **نکات کلیدی و برجسته:** مهم‌ترین نقاط، آمار/ارقام مهم، تاریخ‌ها/رویدادهای کلیدی (حداقل ۵ مورد).
3.  **جزئیات و استدلال‌های مهم:** استدلال‌های اصلی، مثال‌ها/موارد خاص، نقل قول‌های مهم (حداکثر ۲-۳).
4.  **تحلیل محتوا (در صورت امکان):** ارتباط مفاهیم، نقاط قوت/ضعف، پیشنهادات/راهکارها.
5.  **دسته‌بندی موضوعی (اختیاری):** موضوعات فرعی و ارتباطشان با موضوع اصلی.

**خروجی مورد انتظار:**
*   خلاصه کاملاً به زبان فارسی سلیس و روان.
*   ساختاریافته با تیترهای مشخص فارسی (مانند "خلاصه کلی", "نکات کلیدی و برجسته").
*   استفاده از نشانه‌گذاری مناسب (لیست‌ها).
*   طول متناسب با محتوای صوتی.
*   فقط و فقط خلاصه نهایی مطابق ساختار درخواستی، بدون عبارت مقدماتی یا توضیحات اضافی.
"""
            response = await asyncio.to_thread(
                model.generate_content,
                [summary_prompt.format(transcription_context=transcription_context), audio_file_ref]
            )
            summary = response.text.strip()

            if not summary:
                logger.warning("Summarization response was empty.")
                raise ValueError("Summarization failed: No text returned.")
            logger.info("Summarization successful.")
            return summary
        except Exception as e:
            logger.error(f"Error during summarization: {e}", exc_info=True)
            raise
        finally:
            # --- Switch back to MAIN API key ---
            if not current_global_key_is_main:  # If we switched to summary key
                logger.info(f"Switching genai config back to main GOOGLE_API_KEY (ends ...{GOOGLE_API_KEY[-4:]})")
                genai.configure(api_key=GOOGLE_API_KEY)
    else:  # Keys are the same, or no specific summary key, so use the already configured main key
        try:
            logger.info("Using main GOOGLE_API_KEY for summarization as keys are same or summary key not distinct.")
            model = get_gemini_model_instance("multimodal_model_name", "summarization_generation_config")
            summary_prompt = f"""
شما یک دستیار متخصص در تحلیل و خلاصه‌سازی محتوای صوتی به زبان فارسی هستید.
فایل صوتی ارائه شده است. متن پیاده‌سازی شده اولیه آن نیز برای کمک به زمینه و کلمات کلیدی در زیر آمده است.
لطفاً این فایل صوتی را با دقت تحلیل کرده و یک خلاصه جامع و دقیق به زبان فارسی روان تهیه کنید که شامل موارد زیر باشد:

متن پیاده‌سازی شده اولیه (برای کمک به زمینه):
\"\"\"
{transcription_context}
\"\"\"

دستورالعمل‌های خلاصه‌سازی:
1.  **خلاصه کلی (۲-۳ پاراگراف):** موضوع اصلی و هدف، زمینه بحث، نتیجه‌گیری اصلی.
2.  **نکات کلیدی و برجسته:** مهم‌ترین نقاط، آمار/ارقام مهم، تاریخ‌ها/رویدادهای کلیدی (حداقل ۵ مورد).
3.  **جزئیات و استدلال‌های مهم:** استدلال‌های اصلی، مثال‌ها/موارد خاص، نقل قول‌های مهم (حداکثر ۲-۳).
4.  **تحلیل محتوا (در صورت امکان):** ارتباط مفاهیم، نقاط قوت/ضعف، پیشنهادات/راهکارها.
5.  **دسته‌بندی موضوعی (اختیاری):** موضوعات فرعی و ارتباطشان با موضوع اصلی.

**خروجی مورد انتظار:**
*   خلاصه کاملاً به زبان فارسی سلیس و روان.
*   ساختاریافته با تیترهای مشخص فارسی (مانند "خلاصه کلی", "نکات کلیدی و برجسته").
*   استفاده از نشانه‌گذاری مناسب (لیست‌ها).
*   طول متناسب با محتوای صوتی.
*   فقط و فقط خلاصه نهایی مطابق ساختار درخواستی، بدون عبارت مقدماتی یا توضیحات اضافی.
"""
            response = await asyncio.to_thread(
                model.generate_content,
                [summary_prompt.format(transcription_context=transcription_context), audio_file_ref]
            )
            summary = response.text.strip()
            if not summary:
                logger.warning("Summarization response was empty.")
                raise ValueError("Summarization failed: No text returned.")
            logger.info("Summarization successful.")
            return summary
        except Exception as e:
            logger.error(f"Error during summarization with main key: {e}", exc_info=True)
            raise

async def translate_to_persian_google(text):
    # Assumes GOOGLE_API_KEY is globally configured
    if not text or not text.strip(): return ""
    logger.info("Translating text to Persian...")
    try:
        model = get_gemini_model_instance("text_model_name")
        prompt = f'Translate the following text to Persian:\n\n"{text}"\n\nReturn ONLY the Persian translation.'
        response = await asyncio.to_thread(model.generate_content, prompt)
        translation = response.text.strip()
        if not translation:
            logger.warning("Translation response was empty.")
            raise ValueError("Translation failed: No text returned.")
        logger.info("Translation successful.")
        return translation
    except Exception as e:
        logger.error(f"Error during translation: {e}", exc_info=True)
        raise

async def segment_persian_text_google(persian_text):
    # Assumes GOOGLE_API_KEY is globally configured
    logger.info("Segmenting Persian text for SRT...")
    try:
        model = get_gemini_model_instance("text_model_name")
        segmentation_prompt = f"""Take the following Persian text and break it into suitable subtitle segments. Each segment should be on a new line. Aim for natural breaks and readable lengths for subtitles.
Return ONLY the segmented text, with each segment on a new line.
Persian text:
---
{persian_text}
---"""
        response = await asyncio.to_thread(model.generate_content, segmentation_prompt)
        segmented_text = response.text.strip()

        if not segmented_text:  # Fallback
            logger.warning("LLM Segmentation response was empty. Using regex fallback.")
            segments = re.split(r'[।\.؟!\n]+', persian_text)
            segmented_text = "\n".join(s.strip() for s in segments if s.strip())
            if not segmented_text: raise ValueError("Segmentation failed: No text from LLM or fallback.")
        logger.info("Segmentation successful.")
        return segmented_text
    except Exception as e:
        logger.error(f"Error during LLM segmentation: {e}. Using regex fallback.", exc_info=True)
        segments = re.split(r'[।\.؟!\n]+', persian_text)  # Fallback on any error
        segmented_text = "\n".join(s.strip() for s in segments if s.strip())
        if not segmented_text: raise ValueError(f"Segmentation failed: Error '{e}' and fallback also yielded no text.")
        return segmented_text

async def generate_persian_srt_google(transcription):
    logger.info("Generating Persian SRT...")
    try:
        persian_translation = await translate_to_persian_google(transcription)
        if not persian_translation: raise ValueError("Translation step failed for SRT.")
        segmented_persian_text = await segment_persian_text_google(persian_translation)
        if not segmented_persian_text: raise ValueError("Segmentation step failed for SRT.")
        srt_content = generate_srt_with_timecodes(segmented_persian_text)
        logger.info("SRT generation successful.")
        return srt_content
    except Exception as e:
        logger.error(f"Error generating SRT: {e}", exc_info=True)
        raise

async def get_bot_response_google(message_text):
    # Assumes GOOGLE_API_KEY is globally configured
    logger.info(f"Getting bot response for: {message_text[:50]}...")
    try:
        model = get_gemini_model_instance("text_model_name")
        prompt = f"""You are LinguaScribe_Bot, a helpful Telegram assistant. The user's language is Persian.
User says: "{message_text}"
Provide a concise and helpful response in Persian. If the user sends audio, you will have received the transcription as 'messageText'.
If they ask about services, mention audio transcription to text (پیاده‌سازی صوت), Persian translation (ترجمه به فارسی), SRT generation (تولید فایل زیرنویس SRT), and audio summarization (خلاصه‌سازی صوت).
Keep responses brief. If the input is non-sensical or just a greeting, respond politely and briefly in Persian.
Return ONLY the bot's reply."""
        response = await asyncio.to_thread(model.generate_content, prompt)
        reply = response.text.strip()
        if not reply:
            logger.warning("Bot response generation was empty.")
            return "متاسفانه در حال حاضر قادر به پاسخگویی نیستم."
        logger.info("Bot response generated.")
        return reply
    except Exception as e:
        logger.error(f"Error getting bot response: {e}", exc_info=True)
        return "متاسفانه در پردازش درخواست شما مشکلی پیش آمد."

# --- New Function for Processing Long Audio ---
async def process_long_audio(event, download_path, original_name_base, chat_id, processing_msg):
    """Process a long audio file by splitting it into chunks and processing each chunk"""
    try:
        # Check audio duration
        audio_duration_ms = await get_audio_duration(download_path)

        if audio_duration_ms <= MAX_DURATION_MS:
            # Audio is shorter than threshold, process normally
            logger.info(f"Audio duration ({audio_duration_ms/60000:.2f} min) is under threshold, processing normally")
            return await process_single_audio(str(download_path), original_name_base, chat_id, processing_msg)

        # Audio is longer than threshold, need to split
        await client.edit_message(
            processing_msg,
            f"⚠️ فایل صوتی شما طولانی است ({audio_duration_ms/60000:.1f} دقیقه). در حال تقسیم به قطعات {MAX_DURATION_MINUTES} دقیقه‌ای و پردازش..."
        )

        # Split the audio file
        chunk_paths = await split_audio_file(download_path, original_name_base)

        # Process each chunk and collect transcriptions
        all_transcriptions = []
        all_uploaded_refs = []  # Track all uploaded file references for cleanup

        for i, chunk_path in enumerate(chunk_paths):
            await client.edit_message(
                processing_msg,
                processing_msg.text + f"\n\n⏳ در حال پیاده‌سازی متن قطعه {i+1} از {len(chunk_paths)}..."
            )

            chunk_transcription, chunk_ref = await transcribe_audio_google(chunk_path)
            all_uploaded_refs.append(chunk_ref)
            all_transcriptions.append(chunk_transcription)

            await client.edit_message(
                processing_msg,
                processing_msg.text + f"\n✅ پیاده‌سازی قطعه {i+1} انجام شد."
            )

        # Combine all transcriptions
        full_transcription = "\n\n".join(all_transcriptions)

        # Save combined transcription
        transcription_filename = f"{original_name_base}_full_transcription.txt"
        transcription_path = TEMP_DIR / transcription_filename
        with open(transcription_path, "w", encoding="utf-8") as f:
            f.write(full_transcription)

        # Send the combined transcription
        await client.send_file(
            chat_id,
            str(transcription_path),
            caption="🎤 متن کامل پیاده‌سازی شده:"
        )

        # Generate SRT from combined transcription
        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال تولید زیرنویس (SRT) کامل...")
        srt_content = await generate_persian_srt_google(full_transcription)
        srt_filename = f"{original_name_base}_full_subtitles.srt"
        srt_path = TEMP_DIR / srt_filename
        with open(srt_path, "w", encoding="utf-8") as f:
            f.write(srt_content)
        await client.send_file(chat_id, str(srt_path), caption="🎬 فایل زیرنویس کامل (SRT):")
        await client.edit_message(processing_msg, processing_msg.text + "\n✅ فایل زیرنویس (SRT) ارسال شد.")

        # Generate summary using the first chunk's audio reference and the full transcription
        # (since we can't combine audio files for the API, we'll use one chunk but provide full transcription)
        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال تهیه خلاصه کلی...")
        summary = await summarize_audio_google(all_uploaded_refs[0], full_transcription)
        await client.send_message(
            chat_id,
            f"📝 *خلاصه محتوای کامل:*\n\n{summary}",
            parse_mode='md'
        )

        # Final status message
        await client.edit_message(
            processing_msg,
            f"✅ پردازش فایل صوتی {audio_duration_ms/60000:.1f} دقیقه‌ای شما با موفقیت تکمیل شد."
        )

        # Return all files for cleanup
        return all_uploaded_refs, [download_path, transcription_path, srt_path] + chunk_paths

    except Exception as e:
        logger.exception(f"Error in process_long_audio: {e}")
        await client.edit_message(
            processing_msg,
            f"❌ خطا در پردازش فایل صوتی بلند: {str(e)}"
        )
        return [], [download_path]

# --- Function to Process a Single Audio File (for reuse) ---
async def process_single_audio(file_path, original_name_base, chat_id, processing_msg):
    """Process a single audio file and return the uploaded ref and files for cleanup"""
    try:
        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال پیاده‌سازی متن...")
        transcription, google_audio_file_uploaded_ref = await transcribe_audio_google(file_path)

        transcription_filename = f"{original_name_base}_transcription.txt"
        transcription_path = TEMP_DIR / transcription_filename
        with open(transcription_path, "w", encoding="utf-8") as f:
            f.write(transcription)

        await client.send_file(chat_id, str(transcription_path), caption="🎤 متن پیاده‌سازی شده:")
        await client.edit_message(processing_msg, processing_msg.text + "\n✅ متن پیاده‌سازی و ارسال شد.")

        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال تهیه خلاصه...")
        summary = await summarize_audio_google(google_audio_file_uploaded_ref, transcription)
        await client.send_message(chat_id, f"📝 *خلاصه محتوا:*\n\n{summary}", parse_mode='md')
        await client.edit_message(processing_msg, processing_msg.text + "\n✅ خلاصه ارسال شد.")

        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال تولید زیرنویس (SRT)...")
        srt_content = await generate_persian_srt_google(transcription)
        srt_filename = f"{original_name_base}_subtitles.srt"
        srt_path = TEMP_DIR / srt_filename
        with open(srt_path, "w", encoding="utf-8") as f:
            f.write(srt_content)
        await client.send_file(chat_id, str(srt_path), caption="🎬 فایل زیرنویس (SRT):")
        await client.edit_message(processing_msg, processing_msg.text + "\n✅ فایل زیرنویس (SRT) ارسال شد.")
        await client.edit_message(processing_msg, "✅ پردازش فایل صوتی با موفقیت تکمیل شد!")

        # Return references and paths for cleanup
        return [google_audio_file_uploaded_ref], [file_path, transcription_path, srt_path]
    except Exception as e:
        logger.exception(f"Error in process_single_audio: {e}")
        await client.edit_message(processing_msg, f"❌ خطا در پردازش فایل صوتی: {str(e)}")
        return [], [file_path]  # Return empty refs and only the original file for cleanup

# --- Main Bot Event Handlers ---

@client.on(events.NewMessage(pattern='/start'))
async def start(event):
    sender = await event.get_sender()
    chat_id = event.chat_id
    logger.info(f"New /start command from User {sender.id} in Chat {chat_id}")

    await event.reply(
        "👋 سلام! به ربات *LinguaScribe* خوش آمدید.\n\n"
        "این ربات می‌تواند:\n"
        "🎤 **پیاده‌سازی متن**: فایل‌های صوتی را به متن تبدیل کند\n"
        "📝 **خلاصه‌سازی**: محتوای صوتی را خلاصه کند\n"
        "🎬 **زیرنویس**: فایل SRT فارسی تولید کند\n\n"
        "برای شروع، یک فایل صوتی برای من ارسال کنید.",
        parse_mode='md'
    )

@client.on(events.NewMessage(pattern='/help'))
async def help_command(event):
    await event.reply(
        "🔍 **راهنمای استفاده از LinguaScribe Bot**\n\n"
        "کاربرد:\n"
        "1️⃣ یک فایل صوتی (voice message, audio file) ارسال کنید\n"
        "2️⃣ ربات به صورت خودکار:\n"
        "   - متن پیاده‌سازی شده را ارسال می‌کند\n"
        "   - خلاصه‌ای از محتوا تهیه می‌کند\n"
        "   - فایل زیرنویس SRT تولید می‌کند\n\n"
        "📋 **نکات مهم**:\n"
        "• فایل‌های صوتی تا ۳۰ دقیقه پشتیبانی می‌شوند\n"
        "• برای فایل‌های طولانی‌تر، ربات آنها را به بخش‌های کوچکتر تقسیم می‌کند\n"
        "• زبان اصلی مورد پشتیبانی فارسی است\n\n"
        "📌 **دستورات:**\n"
        "/start - شروع کار با ربات\n"
        "/help - نمایش این راهنما",
        parse_mode='md'
    )

@client.on(events.NewMessage(func=lambda e: e.text and not e.text.startswith('/')))
async def handle_text_message(event):
    chat_id = event.chat_id
    message_text = event.text
    logger.info(f"Received text message in chat {chat_id}: {message_text[:50]}...")

    # Let the user know we're processing
    processing_msg = await event.reply("⏳ در حال پردازش پیام شما...")

    try:
        bot_response = await get_bot_response_google(message_text)
        await client.edit_message(processing_msg, bot_response)
    except Exception as e:
        logger.error(f"Error handling text message: {e}", exc_info=True)
        await client.edit_message(processing_msg, "❌ متأسفانه در پردازش پیام شما مشکلی پیش آمد.")

@client.on(events.NewMessage(func=lambda e: e.audio or e.voice or e.document))
async def handle_audio_message(event):
    try:
        chat_id = event.chat_id
        sender = await event.get_sender()
        logger.info(f"Received audio from User {sender.id} in Chat {chat_id}")

        # Check if the message contains audio, voice, or a document
        if event.audio:
            media = event.audio
            file_type = "audio"
        elif event.voice:
            media = event.voice
            file_type = "voice"
        elif event.document and hasattr(event.document, 'mime_type') and event.document.mime_type.startswith('audio/'):
            media = event.document
            file_type = "document"
        else:
            await event.reply("❌ لطفاً یک فایل صوتی معتبر ارسال کنید.")
            return

        # Initial processing message
        processing_msg = await event.reply("⏳ در حال دریافت فایل صوتی...")

        # Generate a unique filename based on timestamp and user
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        original_name = getattr(media, 'attributes', [{}])[0].file_name if hasattr(getattr(media, 'attributes', [{}])[0], 'file_name') else f"{file_type}_{timestamp}"
        original_name_base = Path(original_name).stem
        download_path = TEMP_DIR / f"{original_name_base}_{timestamp}.ogg"

        # Download the file
        try:
            await client.download_media(message=event.message, file=str(download_path))
            logger.info(f"File downloaded to {download_path}")
            await client.edit_message(processing_msg, "✅ فایل صوتی دریافت شد. در حال پردازش...")
        except Exception as download_error:
            logger.error(f"Error downloading file: {download_error}", exc_info=True)
            await client.edit_message(processing_msg, "❌ خطا در دریافت فایل صوتی.")
            return

        # Process the audio (handles both short and long audio files)
        uploaded_refs, files_to_cleanup = await process_long_audio(
            event, download_path, original_name_base, chat_id, processing_msg)

        # Clean up all temporary files and references
        await cleanup_files(*files_to_cleanup)
        for ref in uploaded_refs:
            try:
                # Only attempt to clean up Google API uploaded file references if they exist
                if ref and hasattr(ref, 'name'):
                    logger.info(f"Cleaning up Google API file reference: {ref.name}")
                    # No cleanup needed for now as these are handled by Google's API
            except Exception as ref_cleanup_error:
                logger.error(f"Error cleaning up reference: {ref_cleanup_error}")

    except Exception as e:
        logger.exception(f"Unhandled error in handle_audio_message: {e}")
        try:
            await event.reply("❌ متأسفانه در پردازش فایل صوتی شما مشکلی پیش آمد. لطفاً دوباره تلاش کنید.")
        except:
            pass

# --- Main Entry Point ---

async def main():
    logger.info("Starting the bot...")

    # Clear temp directory at startup
    for file_path in TEMP_DIR.glob("*"):
        try:
            file_path.unlink()
            logger.info(f"Cleaned up old file: {file_path}")
        except Exception as e:
            logger.error(f"Error cleaning up file {file_path}: {e}")

    # Start the client
    await client.start(bot_token=BOT_TOKEN)
    logger.info("Bot started successfully")

    # Get the bot info
    me = await client.get_me()
    logger.info(f"Bot Username: @{me.username}")

    # Keep the bot running
    try:
        logger.info("Bot is now running. Press Ctrl+C to stop.")
        await client.run_until_disconnected()
    except KeyboardInterrupt:
        logger.info("Bot stopped by user")
    finally:
        await client.disconnect()
        logger.info("Bot disconnected")

if __name__ == "__main__":
    # Run the bot
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())


ERROR:tornado.access:503 POST /v1beta/models/gemini-1.5-flash-latest:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 35385.19ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-1.5-flash-latest:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 16001.20ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-1.5-flash-latest:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 13293.33ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-1.5-flash-latest:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 23082.18ms
