<a href="https://colab.research.google.com/github/mehdihoore/STTforPersian/blob/main/sttbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install telethon google-generativeai python-dotenv nest_asyncio Pillow pydub

In [None]:
import os
import asyncio
import nest_asyncio
import datetime
from pathlib import Path
import logging
import re
import math
import zipfile
import shutil # For robust directory cleanup
from io import BytesIO

from telethon import TelegramClient, events, Button
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold, GenerationConfig
from google.api_core import exceptions as google_exceptions # For specific error handling
from pydub import AudioSegment

# --- Configuration ---
# Apply nest_asyncio early
nest_asyncio.apply()

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

try:
    from google.colab import userdata
    API_ID = int(userdata.get('TELEGRAM_API_ID'))
    API_HASH = userdata.get('TELEGRAM_API_HASH')
    BOT_TOKEN = userdata.get('TELEGRAM_BOT_TTS')

    # Load multiple Google API Keys
    # Name your secrets like GOOGLE_API_KEY_1, GOOGLE_API_KEY_2, etc. in Colab
    GOOGLE_API_KEYS_LIST = []
    for i in range(1, 6): # Try to load up to 5 keys, adjust as needed
        key = userdata.get(f'GOOGLE_API_KEY_{i}')
        if key:
            GOOGLE_API_KEYS_LIST.append(key)

    if not all([API_ID, API_HASH, BOT_TOKEN]):
        raise ValueError("Telegram API_ID, API_HASH, or BOT_TOKEN is missing.")
    if not GOOGLE_API_KEYS_LIST:
        raise ValueError("At least one GOOGLE_API_KEY_n must be configured in Colab secrets.")

except Exception as e:
    logger.critical(f"Error loading secrets: {e}")
    exit()

# --- Model and API Configuration ---
MODEL_PREFERENCES = {
    "transcription": ["gemini-1.5-flash-latest", "gemini-1.5-flash-latest"], # Pro for potentially better accuracy
    "summarization_detailed": ["gemini-2.5-flash-preview-04-17-thinking", "gemini-2.5-flash-preview-04-17"], # Pro essential for detailed summaries
    "translation_segmentation": ["gemini-1.5-flash-latest"],
    "bot_response": ["gemini-1.5-flash-latest"],
}

GENERATION_CONFIGS = {
    "default": GenerationConfig(temperature=0.5),
    "summarization_detailed": GenerationConfig(temperature=0.4, top_p=0.95), # Lower temp for factual detail
    "translation_segmentation": GenerationConfig(temperature=0.2), # More deterministic
    "bot_response": GenerationConfig(temperature=0.7),
}

SAFETY_SETTINGS = [
    {"category": HarmCategory.HARM_CATEGORY_HARASSMENT, "threshold": HarmBlockThreshold.BLOCK_ONLY_HIGH},
    {"category": HarmCategory.HARM_CATEGORY_HATE_SPEECH, "threshold": HarmBlockThreshold.BLOCK_ONLY_HIGH},
    {"category": HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, "threshold": HarmBlockThreshold.BLOCK_ONLY_HIGH},
    {"category": HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, "threshold": HarmBlockThreshold.BLOCK_ONLY_HIGH},
]

# --- Global State for API Key Management ---
_current_key_index = 0
_active_google_api_key = None

def configure_gemini_client(api_key_to_set):
    global _active_google_api_key
    if api_key_to_set != _active_google_api_key:
        logger.info(f"Configuring Google AI SDK with API key ending with ...{api_key_to_set[-4:]}")
        try:
            genai.configure(api_key=api_key_to_set)
            _active_google_api_key = api_key_to_set
            return True
        except Exception as e:
            logger.error(f"Failed to configure Google AI SDK with key ...{api_key_to_set[-4:]}: {e}")
            _active_google_api_key = None
            return False
    return True # Already configured with this key

# Initialize with the first key
if not configure_gemini_client(GOOGLE_API_KEYS_LIST[0]):
    logger.critical("Failed to configure Gemini with the initial API key. Exiting.")
    exit()


# Telethon Client Initialization
session_name = f"bot_session_{BOT_TOKEN.split(':')[0]}"
client = TelegramClient(session_name, API_ID, API_HASH)
TEMP_DIR = Path("./temp_files_linguascribe_bot")
TEMP_DIR.mkdir(parents=True, exist_ok=True)
TEMP_EXTRACTION_DIR = TEMP_DIR / "extracted_files"
TEMP_EXTRACTION_DIR.mkdir(parents=True, exist_ok=True)


# --- Audio/Video Processing Constants ---
MAX_DURATION_MINUTES = 25  # Max duration for single audio processing before splitting (Gemini Flash can handle up to 1h, Pro even more, but smaller chunks are safer for retries)
MAX_DURATION_MS = MAX_DURATION_MINUTES * 60 * 1000
VIDEO_MIME_TYPES = ['video/mp4', 'video/mpeg', 'video/quicktime', 'video/x-msvideo', 'video/x-flv', 'video/webm', 'video/x-matroska', 'video/avi']
AUDIO_OUTPUT_FORMAT = "ogg" # opus in ogg
AUDIO_OUTPUT_CODEC = "libopus"
AUDIO_OUTPUT_BITRATE = "48k" # Reduced bitrate for smaller size
AUDIO_SAMPLE_RATE = 16000 # Good for speech

# --- Helper Functions ---

async def cleanup_files_and_dirs(*paths):
    for path_obj in paths:
        path = Path(path_obj)
        if not path.exists():
            continue
        try:
            if path.is_file():
                path.unlink()
                logger.info(f"Deleted temporary file: {path}")
            elif path.is_dir():
                shutil.rmtree(path)
                logger.info(f"Deleted temporary directory: {path}")
        except OSError as e:
            logger.error(f"Error deleting {path}: {e}")

def generate_srt_with_timecodes(segmented_text):
    lines = [line for line in segmented_text.split("\n") if line.strip()]
    if not lines:
        return "1\n00:00:00,000 --> 00:00:05,000\n(محتوایی برای زمان‌بندی وجود ندارد)\n"
    srt_content = []
    current_time_total_seconds = 0
    segment_duration_seconds = 5 # Average duration per subtitle line
    for i, line in enumerate(lines):
        start_seconds = current_time_total_seconds
        end_seconds = current_time_total_seconds + segment_duration_seconds
        def format_time(s):
            return f"{int(s // 3600):02}:{int(s % 3600 // 60):02}:{int(s % 60):02},{int((s % 1) * 1000):03}"
        srt_content.append(str(i + 1))
        srt_content.append(f"{format_time(start_seconds)} --> {format_time(end_seconds)}")
        srt_content.append(line)
        srt_content.append("")
        current_time_total_seconds = end_seconds + 0.5 # Add a small gap
    return "\n".join(srt_content)

async def get_audio_duration(file_path):
    try:
        audio = AudioSegment.from_file(file_path)
        duration_ms = len(audio)
        logger.info(f"Audio duration for {file_path}: {duration_ms/1000:.2f}s ({duration_ms/60000:.2f}min)")
        return duration_ms
    except Exception as e:
        logger.error(f"Error getting audio duration for {file_path}: {e}", exc_info=True)
        raise

async def split_audio_file(file_path, base_name, max_duration_ms=MAX_DURATION_MS):
    try:
        audio = AudioSegment.from_file(file_path)
        total_duration_ms = len(audio)

        if total_duration_ms <= max_duration_ms:
            logger.info(f"Audio {base_name} is short enough, no split needed.")
            return [str(file_path)]

        num_chunks = math.ceil(total_duration_ms / max_duration_ms)
        logger.info(f"Splitting audio {base_name} into {num_chunks} chunks.")
        chunk_paths = []
        for i in range(num_chunks):
            start_ms = i * max_duration_ms
            end_ms = min((i + 1) * max_duration_ms, total_duration_ms)
            chunk = audio[start_ms:end_ms]
            # Ensure correct extension based on AUDIO_OUTPUT_FORMAT
            chunk_filename = f"{base_name}_part{i+1}.{AUDIO_OUTPUT_FORMAT}"
            chunk_path = TEMP_DIR / chunk_filename
            logger.info(f"Exporting chunk {i+1}/{num_chunks} to {chunk_path}")
            chunk.export(str(chunk_path), format=AUDIO_OUTPUT_FORMAT, codec=AUDIO_OUTPUT_CODEC if AUDIO_OUTPUT_FORMAT == "ogg" else None, bitrate=AUDIO_OUTPUT_BITRATE)
            chunk_paths.append(str(chunk_path))
        return chunk_paths
    except Exception as e:
        logger.error(f"Error splitting audio file {base_name}: {e}", exc_info=True)
        raise

# --- Core Gemini API Request Function with Retry ---
async def gemini_request_with_retry(
    task_name: str,
    model_preference_key: str,
    prompt_parts: list,
    generation_config_key: str,
    file_path_to_upload: str = None
):
    global _current_key_index
    uploaded_file_ref = None
    max_key_cycles = len(GOOGLE_API_KEYS_LIST)

    for key_cycle in range(max_key_cycles):
        current_api_key = GOOGLE_API_KEYS_LIST[_current_key_index]
        logger.info(f"[Task: {task_name}] Attempting with API key ending ...{current_api_key[-4:]} (Cycle {key_cycle+1}/{max_key_cycles})")

        if not configure_gemini_client(current_api_key):
            _current_key_index = (_current_key_index + 1) % len(GOOGLE_API_KEYS_LIST)
            continue # Try next key if this one fails to configure

        models_to_try = MODEL_PREFERENCES.get(model_preference_key, [MODEL_PREFERENCES["bot_response"][0]])
        gen_config = GENERATION_CONFIGS.get(generation_config_key, GENERATION_CONFIGS["default"])

        # File Upload (if needed, once per key attempt)
        if file_path_to_upload and not uploaded_file_ref: # Only upload once per successful key config
            try:
                logger.info(f"[Task: {task_name}] Uploading file: {file_path_to_upload} with key ...{current_api_key[-4:]}")
                # genai.upload_file is synchronous
                uploaded_file_ref = await asyncio.to_thread(
                    genai.upload_file, path=file_path_to_upload,
                    # mime_type can be specified if Path(file_path_to_upload).suffix is not reliable
                )
                logger.info(f"[Task: {task_name}] File uploaded successfully: {uploaded_file_ref.name}")
                # Prepend file to prompt_parts if upload successful
                current_prompt_parts = [uploaded_file_ref] + prompt_parts
            except Exception as e:
                logger.error(f"[Task: {task_name}] File upload failed with key ...{current_api_key[-4:]}: {e}")
                # If upload fails, this key might be problematic for uploads or file is bad.
                # Cycle to next key by incrementing index and continuing outer loop.
                _current_key_index = (_current_key_index + 1) % len(GOOGLE_API_KEYS_LIST)
                uploaded_file_ref = None # Reset for next key
                continue
        elif file_path_to_upload and uploaded_file_ref: # File already uploaded with this key
             current_prompt_parts = [uploaded_file_ref] + prompt_parts
        else: # No file to upload
            current_prompt_parts = prompt_parts


        for model_name in models_to_try:
            logger.info(f"[Task: {task_name}] Trying model: {model_name} with key ...{current_api_key[-4:]}")
            try:
                model = genai.GenerativeModel(
                    model_name=model_name,
                    generation_config=gen_config,
                    safety_settings=SAFETY_SETTINGS
                )
                # model.generate_content is synchronous
                response = await asyncio.to_thread(
                    model.generate_content, contents=current_prompt_parts
                )
                # Check for empty or problematic response
                if not response.candidates or not response.text:
                    if response.prompt_feedback and response.prompt_feedback.block_reason:
                        logger.warning(f"[Task: {task_name}] Request blocked for model {model_name}. Reason: {response.prompt_feedback.block_reason_message or response.prompt_feedback.block_reason}")
                        # This is often not retriable with same input. Break from model loop.
                        break
                    else:
                        logger.warning(f"[Task: {task_name}] Empty response from model {model_name}. Candidates: {response.candidates}")
                        # Treat as a failure for this model, try next model or key
                        continue # Try next model

                logger.info(f"[Task: {task_name}] Successful response from model {model_name} with key ...{current_api_key[-4:]}")
                return response.text.strip(), uploaded_file_ref # Return text and file_ref

            except (google_exceptions.ResourceExhausted,
                    google_exceptions.InternalServerError,
                    google_exceptions.DeadlineExceeded,
                    google_exceptions.ServiceUnavailable,
                    google_exceptions.Aborted) as e:
                logger.warning(f"[Task: {task_name}] Retryable API error with model {model_name} / key ...{current_api_key[-4:]}: {type(e).__name__} - {e}. Retrying with next model/key.")
                await asyncio.sleep(1) # Simple backoff
                continue # Try next model or, if end of models, will go to next key
            except (google_exceptions.InvalidArgument, google_exceptions.PermissionDenied) as e:
                logger.error(f"[Task: {task_name}] Non-retryable (for this model/key) API error with model {model_name} / key ...{current_api_key[-4:]}: {type(e).__name__} - {e}. Skipping model/key.")
                break # Break from model loop, try next key
            except genai.types.BlockedPromptException as e:
                logger.error(f"[Task: {task_name}] BlockedPromptException with model {model_name}: {e}. This input is problematic.")
                raise # Re-raise, as this is usually an input issue not fixable by retry
            except Exception as e:
                logger.error(f"[Task: {task_name}] Unexpected error with model {model_name} / key ...{current_api_key[-4:]}: {type(e).__name__} - {e}", exc_info=True)
                # For unexpected errors, break from model loop and try next key
                break

        # If all models for the current key failed or were skipped
        _current_key_index = (_current_key_index + 1) % len(GOOGLE_API_KEYS_LIST)
        uploaded_file_ref = None # Reset uploaded file ref if we are changing key

    # If all keys and models failed
    logger.error(f"[Task: {task_name}] All API keys and models failed after {max_key_cycles} cycles.")
    raise Exception(f"Failed to get response for {task_name} after multiple retries with all available keys/models.")


# --- Specific Google AI API Call Functions (using the retry wrapper) ---

async def transcribe_audio_google(file_path):
    logger.info(f"Transcribing audio file: {file_path}")
    prompt = "Please transcribe the audio provided accurately. Return ONLY the plain text transcription."
    transcription, uploaded_file_ref = await gemini_request_with_retry(
        task_name="AudioTranscription",
        model_preference_key="transcription",
        prompt_parts=[prompt],
        generation_config_key="default",
        file_path_to_upload=file_path
    )
    if not transcription:
        raise ValueError("Transcription failed: No text returned after retries.")
    return transcription, uploaded_file_ref

DETAILED_SUMMARY_PROMPT_FOR_COLLEGE_PREP_FA = """
شما یک دستیار آموزشی خبره هستید که وظیفه تهیه مطالب مطالعه برای آمادگی آزمون‌های ورودی دانشگاه (کنکور) را بر عهده دارید.
فایل صوتی (که محتوای آن در متن پیاده‌سازی شده آمده) و متن پیاده‌سازی شده اولیه آن ارائه شده است.
لطفاً این محتوا را با دقت بسیار بالا تحلیل کرده و یک خلاصه بسیار جامع، دقیق و با جزئیات فراوان به زبان فارسی روان تهیه کنید که برای دانشجویی که نیاز به یادآوری و درک کامل این اطلاعات برای یک آزمون مهم دارد، مناسب باشد.

متن پیاده‌سازی شده اولیه (برای کمک به زمینه و کلمات کلیدی):
\"\"\"
{transcription_context}
\"\"\"

دستورالعمل‌های خلاصه‌سازی جامع برای آمادگی آزمون:

1.  **مقدمه و هدف کلی (۱-۲ پاراگراف):**
    *   موضوع اصلی و هدف کلی از ارائه این محتوا چیست؟
    *   زمینه و بستر اصلی بحث چیست؟

2.  **مفاهیم و تعاریف کلیدی (لیست شماره‌گذاری شده):**
    *   تمامی اصطلاحات، مفاهیم و واژگان تخصصی مهم مطرح شده را شناسایی کنید.
    *   هر کدام را به طور واضح و دقیق در چارچوب بحث تعریف کنید.

3.  **نکات اصلی و استدلال‌ها (ساختار درختی یا با عنوان‌بندی مناسب با استفاده از Markdown):**
    *   تمامی نکات، ایده‌ها و استدلال‌های اصلی مطرح شده را به تفصیل بیان کنید.
    *   برای هر نکته یا استدلال، شواهد، مثال‌ها، آمار، ارقام، تاریخ‌ها و اسامی مهم ذکر شده را به طور کامل بیاورید.
    *   اگر زنجیره منطقی یا مراحل خاصی در استدلال‌ها وجود دارد، آن‌ها را گام به گام توضیح دهید.

4.  **جزئیات تکمیلی و مثال‌های مهم (حداقل ۵-۷ مورد یا بیشتر در صورت لزوم):**
    *   مثال‌های کلیدی، موارد خاص، مطالعات موردی یا نمونه‌هایی که برای روشن شدن مفاهیم ارائه شده‌اند را با جزئیات شرح دهید.
    *   نقل قول‌های مهم و تاثیرگذار را (در صورت وجود) با ذکر دقیق آورده و اهمیت آن‌ها را توضیح دهید.

5.  **تحلیل عمیق محتوا (در صورت امکان و مرتبط بودن):**
    *   ارتباط بین مفاهیم مختلف چگونه است؟
    *   نقاط قوت و ضعف استدلال‌های ارائه شده (در صورت تحلیل در خود محتوا) چیست؟
    *   پیشنهادات، راهکارها یا نتایج عملی که از بحث حاصل می‌شود، کدامند؟
    *   هرگونه پیش‌فرض، فرضیه زمینه‌ای یا پیامدهای پنهان را شناسایی کنید.

6.  **نتیجه‌گیری اصلی و پیام نهایی (۱-۲ پاراگراف):**
    *   جمع‌بندی نهایی بحث و مهمترین نتایجی که می‌توان گرفت چیست؟
    *   پیام اصلی یا درسی که مخاطب باید از این محتوا بگیرد چیست؟

7.  **ساختار و زبان:**
    *   خلاصه باید کاملاً به زبان فارسی رسمی، علمی و روان باشد.
    *   از ساختار منطقی با عنوان‌بندی و شماره‌گذاری مناسب (مانند لیست‌ها، تیترهای فرعی با استفاده از Markdown مانند #, ##, ###, *, -) برای سازماندهی اطلاعات استفاده کنید تا خوانایی و قابلیت مرور آن برای مطالعه افزایش یابد.
    *   در ارائه جزئیات کوتاهی نکنید؛ هدف، پوشش کامل و عمیق مطالب برای آمادگی آزمون است.
    *   فقط و فقط خلاصه نهایی مطابق ساختار درخواستی، بدون عبارت مقدماتی یا توضیحات اضافی درباره فرآیند خلاصه‌سازی ارائه شود.
"""

async def summarize_audio_google(audio_file_ref_for_context, transcription_context):
    logger.info("Generating detailed summary for college prep...")
    # audio_file_ref_for_context is used by the model to get richer context than just text
    summary_prompt_formatted = DETAILED_SUMMARY_PROMPT_FOR_COLLEGE_PREP_FA.format(transcription_context=transcription_context)

    # Note: We are passing audio_file_ref_for_context. The retry wrapper expects a file *path*.
    # For summarization, we pass the *transcription* as primary text content,
    # and the audio_file_ref is a reference to an *already uploaded* file.
    # The current `gemini_request_with_retry` uploads a file if path is given.
    # We need to adjust how `summarize_audio_google` calls it if it relies on an existing ref.

    # Option 1: If audio_file_ref is essential and already exists, prompt needs to be [text_prompt, audio_file_ref]
    # The retry wrapper needs to support passing pre-uploaded file objects.
    # For now, let's assume the model can work very well with detailed text prompt and transcription.
    # If the audio file itself *must* be re-sent for summarization, we'd need the path.
    # Let's assume the transcription is rich enough for the text-based summarization models (Pro)
    # and if we need multimodal summary, we'd pass the audio file path again.

    # Current summarization prompt is text-based but uses transcription for context.
    # If we want the model to *listen* again, we pass the audio_file_ref.
    # The prompt implies both audio and text are provided.

    # Simplification: For now, the `summarize_audio_google` will use the transcription text.
    # If the `audio_file_ref` is to be used, `gemini_request_with_retry` needs modification
    # to accept `genai.File` objects directly in `prompt_parts` without re-uploading.
    # Let's assume for detailed summary, a powerful model can work from transcription.
    # If we must use the audio file reference:
    # prompt_parts = [summary_prompt_formatted, audio_file_ref_for_context]
    # And `gemini_request_with_retry` would need to handle this case.

    # For now, using text-only summarization with transcription as primary input:
      summary_text, _ = await gemini_request_with_retry(
        task_name="DetailedSummarization",
        model_preference_key="summarization_detailed",
        prompt_parts=[summary_prompt_formatted],
        generation_config_key="summarization_detailed",
        file_path_to_upload=None # Or pass audio_file_ref_for_context if retry wrapper is adapted
    )


    # To use the audio_file_ref with the current retry wrapper, we'd effectively need to "re-upload" it logically
    # or the wrapper needs to accept genai.File objects.
    # Let's adapt the prompt_parts for summarize_audio_google specifically.
    # The `gemini_request_with_retry` will have to be smart.
    # If `file_path_to_upload` is a `genai.File` object, use it directly.

    # --- REVISED APPROACH for summarize_audio_google to use existing file ref ---
    # This requires `gemini_request_with_retry` to handle `file_path_to_upload` being a `genai.File` object.
    # Let's assume `gemini_request_with_retry` is NOT changed for now, and we pass audio file path if needed.
    # The original logic was: model.generate_content([summary_prompt, audio_file_ref])
    # The simplest for now is to use the transcription for the detailed summary,
    # relying on a strong Pro model.

    if not summary:
        raise ValueError("Summarization failed: No text returned after retries.")
    return "\u200F" + summary_text


async def translate_to_persian_google(text):
    if not text or not text.strip(): return ""
    logger.info("Translating text to Persian...")
    prompt = f'Translate the following text to Persian:\n\n"{text}"\n\nReturn ONLY the Persian translation, with no introductory phrases.'
    translation, _ = await gemini_request_with_retry(
        task_name="TranslationToPersian",
        model_preference_key="translation_segmentation",
        prompt_parts=[prompt],
        generation_config_key="translation_segmentation"
    )
    if not translation:
        raise ValueError("Translation failed: No text returned.")
    return translation

async def segment_persian_text_google(persian_text):
    logger.info("Segmenting Persian text for SRT...")
    segmentation_prompt = f"""Take the following Persian text and break it into suitable subtitle segments. Each segment should be on a new line. Aim for natural breaks and readable lengths for subtitles (typically 1-2 short sentences or phrases).
Return ONLY the segmented text, with each segment on a new line. Do not add numbering.
Persian text:
---
{persian_text}
---"""
    segmented_text, _ = await gemini_request_with_retry(
        task_name="TextSegmentation",
        model_preference_key="translation_segmentation",
        prompt_parts=[segmentation_prompt],
        generation_config_key="translation_segmentation"
    )
    if not segmented_text:
        logger.warning("LLM Segmentation response was empty. Using regex fallback.")
        segments = re.split(r'[।\.؟!\n]+', persian_text)
        segmented_text = "\n".join(s.strip() for s in segments if s.strip())
        if not segmented_text: raise ValueError("Segmentation failed: No text from LLM or fallback.")
    return segmented_text

async def generate_persian_srt_google(transcription):
    logger.info("Generating Persian SRT...")
    try:
        # For SRT, a direct, less "creative" translation might be better than a highly contextual one.
        # However, for now, using the same translation function.
        persian_translation = await translate_to_persian_google(transcription)
        if not persian_translation: raise ValueError("Translation step failed for SRT.")

        segmented_persian_text = await segment_persian_text_google(persian_translation)
        if not segmented_persian_text: raise ValueError("Segmentation step failed for SRT.")

        srt_content = generate_srt_with_timecodes(segmented_persian_text)
        logger.info("SRT generation successful.")
        return srt_content
    except Exception as e:
        logger.error(f"Error generating SRT: {e}", exc_info=True)
        raise

async def get_bot_response_google(message_text):
    logger.info(f"Getting bot response for: {message_text[:50]}...")
    prompt = f"""You are LinguaScribe_Bot, a helpful Telegram assistant. The user's language is Persian.
User says: "{message_text}"
Provide a concise and helpful response in Persian.
If they ask about your capabilities, mention:
- پیاده‌سازی صوت به متن (Audio transcription)
- خلاصه‌سازی جامع و تخصصی محتوای صوتی (Detailed audio summarization for study/prep)
- تولید فایل زیرنویس SRT به فارسی (Persian SRT subtitle generation)
- پردازش فایل‌های ZIP حاوی صوت یا متن (Processing ZIP files with audio/text)
- تبدیل ویدیو به صوت برای پردازش (Video to audio conversion for processing)

Keep responses brief. If the input is non-sensical or just a greeting, respond politely and briefly in Persian.
Return ONLY the bot's reply in Persian.
"""
    reply, _ = await gemini_request_with_retry(
        task_name="BotResponse",
        model_preference_key="bot_response",
        prompt_parts=[prompt],
        generation_config_key="bot_response"
    )
    if not reply:
        return "متاسفانه در حال حاضر قادر به پاسخگویی نیستم."
    return reply

# --- Core Media Processing Logic ---

async def process_single_audio_file_operations(
    audio_file_path: str,
    original_name_base: str,
    chat_id: int,
    processing_msg_event,
    is_part_of_long_audio=False
):
    """
    Processes a single audio file (or a chunk of a longer one).
    Returns transcription, path to transcription file, path to SRT file, and the Google uploaded file reference.
    """
    files_to_cleanup_later = []
    google_audio_file_ref = None
    transcription = ""
    transcription_path_str = None
    srt_path_str = None

    try:
        # 1. Transcription
        await client.edit_message(processing_msg_event, processing_msg_event.text + "\n\n⏳ در حال پیاده‌سازی متن...")
        transcription, google_audio_file_ref = await transcribe_audio_google(audio_file_path)
        files_to_cleanup_later.append(audio_file_path) # Original/converted audio chunk

        transcription_filename = f"{original_name_base}_transcription.txt"
        transcription_path = TEMP_DIR / transcription_filename
        with open(transcription_path, "w", encoding="utf-8") as f:
            f.write(transcription)
        transcription_path_str = str(transcription_path)
        files_to_cleanup_later.append(transcription_path_str)

        await client.send_file(chat_id, transcription_path_str, caption="🎤 متن پیاده‌سازی شده:")
        await client.edit_message(processing_msg_event, processing_msg_event.text + "\n✅ متن پیاده‌سازی و ارسال شد.")

        # 2. SRT Generation (only if not part of a larger audio being processed, or do it per chunk too?)
        # For long audio, SRT is generated for the full combined text later.
        if not is_part_of_long_audio:
            await client.edit_message(processing_msg_event, processing_msg_event.text + "\n\n⏳ در حال تولید زیرنویس (SRT)...")
            srt_content = await generate_persian_srt_google(transcription)
            srt_filename = f"{original_name_base}_subtitles.srt"
            srt_path = TEMP_DIR / srt_filename
            with open(srt_path, "w", encoding="utf-8") as f:
                f.write(srt_content)
            srt_path_str = str(srt_path)
            files_to_cleanup_later.append(srt_path_str)
            await client.send_file(chat_id, srt_path_str, caption="🎬 فایل زیرنویس (SRT):")
            await client.edit_message(processing_msg_event, processing_msg_event.text + "\n✅ فایل زیرنویس (SRT) ارسال شد.")

        # Summary is done after all chunks are processed for long audio.
        # For single short audio, summary is done here.
        if not is_part_of_long_audio:
            await client.edit_message(processing_msg_event, processing_msg_event.text + "\n\n⏳ در حال تهیه خلاصه جامع...")
            # For summary, we need the audio_file_ref if the model uses multimodal input.
            # The `summarize_audio_google` currently uses text-based approach with transcription.
            # If it were to use the audio file, we'd pass `google_audio_file_ref`.
            summary = await summarize_audio_google(google_audio_file_ref, transcription)
            summary_filename = f"{original_name_base}_summary.md" # Save as markdown
            summary_path = TEMP_DIR / summary_filename
            with open(summary_path, "w", encoding="utf-8") as f: f.write(summary)
            files_to_cleanup_later.append(str(summary_path))

            await client.send_file(chat_id, str(summary_path), caption="📝 *خلاصه جامع محتوا:*", parse_mode='md')
            await client.edit_message(processing_msg_event, processing_msg_event.text + "\n✅ خلاصه جامع ارسال شد.")
            await client.edit_message(processing_msg_event, "✅ پردازش فایل صوتی با موفقیت تکمیل شد!")

        return transcription, transcription_path_str, srt_path_str, google_audio_file_ref, files_to_cleanup_later

    except genai.types.BlockedPromptException as bpe:
        logger.error(f"Processing stopped due to blocked prompt: {bpe}", exc_info=True)
        await client.edit_message(processing_msg_event, f"❌ پردازش متوقف شد. محتوای ارسالی با سیاست‌های ایمنی سازگار نیست.")
        # Do not delete uploaded_file_ref here, it's managed by Google
        return None, None, None, google_audio_file_ref, files_to_cleanup_later # google_audio_file_ref might be None if upload failed
    except Exception as e:
        logger.exception(f"Error in process_single_audio_file_operations for {original_name_base}: {e}")
        await client.edit_message(processing_msg_event, f"❌ خطا در پردازش فایل صوتی ({original_name_base}): {str(e)[:100]}")
        return None, None, None, google_audio_file_ref, files_to_cleanup_later


async def process_audio_file(event, audio_file_path: str, original_name_base: str, chat_id: int, processing_msg_event):
    """Handles splitting if necessary, then processes audio, generates summary and SRT for the whole."""
    all_local_files_to_cleanup = [audio_file_path] # Start with the input audio path
    # google_uploaded_file_refs are not cleaned by us, Google manages them.

    try:
        audio_duration_ms = await get_audio_duration(audio_file_path)
        needs_splitting = audio_duration_ms > MAX_DURATION_MS

        chunk_paths = []
        if needs_splitting:
            await client.edit_message(
                processing_msg_event,
                f"⚠️ فایل صوتی شما طولانی است ({audio_duration_ms/60000:.1f} دقیقه). در حال تقسیم به قطعات ~{MAX_DURATION_MINUTES} دقیقه‌ای و پردازش..."
            )
            chunk_paths = await split_audio_file(audio_file_path, original_name_base, MAX_DURATION_MS)
            all_local_files_to_cleanup.extend(chunk_paths)
            if audio_file_path in chunk_paths: # If not split, original path is the only "chunk"
                 pass # original audio_file_path is already in all_local_files_to_cleanup
            elif audio_file_path not in chunk_paths and chunk_paths: # Original was split
                # The original large file (audio_file_path) is implicitly cleaned if split_audio_file creates new files from it.
                # If split_audio_file returns the original path (no split), it's handled.
                pass


        else:
            logger.info(f"Audio duration ({audio_duration_ms/60000:.2f} min) is under threshold, processing as single unit.")
            chunk_paths = [audio_file_path] # Process the original file as a single chunk

        full_transcription_parts = []
        # We need one audio_file_ref for the final summary if it's multimodal.
        # Let's use the ref from the first chunk.
        first_chunk_google_ref = None

        for i, chunk_path in enumerate(chunk_paths):
            chunk_name_base = f"{original_name_base}_part{i+1}" if needs_splitting else original_name_base
            await client.edit_message(
                processing_msg_event,
                processing_msg_event.text + f"\n\nProcessing chunk {i+1}/{len(chunk_paths)}: {Path(chunk_path).name}"
            )

            transcription, _, _, google_ref, chunk_cleanup_files = await process_single_audio_file_operations(
                audio_file_path=chunk_path,
                original_name_base=chunk_name_base,
                chat_id=chat_id,
                processing_msg_event=processing_msg_event,
                is_part_of_long_audio=needs_splitting # True if there are multiple chunks
            )
            all_local_files_to_cleanup.extend(chunk_cleanup_files)

            if transcription:
                full_transcription_parts.append(transcription)
                if i == 0 and google_ref: # Save ref from first chunk
                    first_chunk_google_ref = google_ref
            else:
                # If a chunk fails, we might stop or continue. For now, continue.
                logger.warning(f"Chunk {i+1} ({Path(chunk_path).name}) failed transcription. Skipping for combined output.")
                await client.edit_message(processing_msg_event, processing_msg_event.text + f"\n⚠️ خطایی در پردازش قطعه {i+1} رخ داد.")


        if not full_transcription_parts:
            await client.edit_message(processing_msg_event, "❌ پردازش هیچ بخشی از فایل صوتی موفقیت آمیز نبود.")
            return # Early exit

        full_transcription = "\n\n".join(full_transcription_parts)
        full_transcription_filename = f"{original_name_base}_FULL_transcription.txt"
        full_transcription_path = TEMP_DIR / full_transcription_filename
        with open(full_transcription_path, "w", encoding="utf-8") as f:
            f.write(full_transcription)
        all_local_files_to_cleanup.append(str(full_transcription_path))

        if needs_splitting: # If it was split, send the full transcription
            await client.send_file(chat_id, str(full_transcription_path), caption="🎤 متن کامل پیاده‌سازی شده (از تمامی قطعات):")

        # Generate combined SRT and Summary if multiple chunks were processed OR if it was single file not processed by `is_part_of_long_audio=False` path
        if needs_splitting: # For long audio, generate combined SRT and summary now
            await client.edit_message(processing_msg_event, processing_msg_event.text + "\n\n⏳ در حال تولید زیرنویس (SRT) کامل...")
            combined_srt_content = await generate_persian_srt_google(full_transcription)
            combined_srt_filename = f"{original_name_base}_FULL_subtitles.srt"
            combined_srt_path = TEMP_DIR / combined_srt_filename
            with open(combined_srt_path, "w", encoding="utf-8") as f:
                f.write(combined_srt_content)
            all_local_files_to_cleanup.append(str(combined_srt_path))
            await client.send_file(chat_id, str(combined_srt_path), caption="🎬 فایل زیرنویس کامل (SRT):")
            await client.edit_message(processing_msg_event, processing_msg_event.text + "\n✅ فایل زیرنویس کامل (SRT) ارسال شد.")

            await client.edit_message(processing_msg_event, processing_msg_event.text + "\n\n⏳ در حال تهیه خلاصه جامع کامل...")
            # Use transcription of all parts. If multimodal summary is desired, use first_chunk_google_ref
            combined_summary = await summarize_audio_google(first_chunk_google_ref, full_transcription) # Pass ref if summarize_audio_google is adapted

            summary_filename = f"{original_name_base}_FULL_summary.md"
            summary_path = TEMP_DIR / summary_filename
            with open(summary_path, "w", encoding="utf-8") as f: f.write(combined_summary)
            all_local_files_to_cleanup.append(str(summary_path))

            await client.send_file(chat_id, str(summary_path), caption="📝 *خلاصه جامع محتوای کامل:*", parse_mode='md')
            await client.edit_message(processing_msg_event, processing_msg_event.text + "\n✅ خلاصه جامع کامل ارسال شد.")

        final_message = "✅ پردازش فایل"
        if needs_splitting: final_message += f" صوتی طولانی ({audio_duration_ms/60000:.1f} دقیقه)"
        final_message += " با موفقیت تکمیل شد!"
        await client.edit_message(processing_msg_event, final_message)

    except Exception as e:
        logger.exception(f"Critical error in process_audio_file for {original_name_base}: {e}")
        await client.edit_message(processing_msg_event, f"❌ خطای جدی در پردازش فایل صوتی: {str(e)[:100]}")
    finally:
        # Cleanup all local temporary files accumulated
        unique_cleanup_paths = list(set(all_local_files_to_cleanup)) # Remove duplicates
        await cleanup_files_and_dirs(*unique_cleanup_paths)


# --- Main Bot Event Handlers ---

@client.on(events.NewMessage(pattern='/start'))
async def start(event):
    sender = await event.get_sender()
    logger.info(f"/start from User {sender.id} in Chat {event.chat_id}")
    await event.reply(
        "👋 سلام! به ربات *LinguaScribe* خوش آمدید.\n\n"
        "این ربات می‌تواند فایل‌های صوتی، ویدیویی یا ZIP را پردازش کند:\n"
        "🎤 **پیاده‌سازی متن دقیق**\n"
        "📝 **خلاصه‌سازی جامع و تخصصی** (مناسب آمادگی آزمون)\n"
        "🎬 **تولید زیرنویس SRT فارسی**\n"
        "📹 **تبدیل ویدیو به صوت** برای تحلیل\n"
        "🗜️ **پردازش فایل‌های ZIP** حاوی صوت یا متن\n\n"
        "یک فایل صوتی (voice, audio), ویدیویی, یا فایل ZIP برای من ارسال کنید.",
        parse_mode='md'
    )

@client.on(events.NewMessage(pattern='/help'))
async def help_command(event):
    await event.reply(
        "🔍 **راهنمای LinguaScribe Bot**\n\n"
        "1️⃣ یک فایل صوتی، ویدیویی، یا ZIP ارسال کنید.\n"
        "2️⃣ ربات به صورت خودکار آن را پردازش می‌کند.\n"
        "   - ویدیو به صوت تبدیل می‌شود.\n"
        "   - فایل‌های ZIP استخراج و محتوای پشتیبانی شده (صوت/متن) پردازش می‌شود.\n"
        "   - برای صوت: متن پیاده‌سازی، خلاصه جامع، و زیرنویس SRT ارائه می‌شود.\n"
        "   - برای متن (از ZIP): محتوا ترکیب و ارسال می‌شود.\n\n"
        "📋 **نکات**:\n"
        f"• فایل‌های صوتی تا {MAX_DURATION_MINUTES} دقیقه به صورت یکجا، طولانی‌تر به صورت بخش‌بندی شده پردازش می‌شوند.\n"
        "• زبان اصلی فارسی است.\n\n"
        "📌 **دستورات:** /start, /help",
        parse_mode='md'
    )

@client.on(events.NewMessage(func=lambda e: e.text and not e.text.startswith('/')))
async def handle_text_message(event):
    chat_id = event.chat_id
    message_text = event.text
    logger.info(f"Text message in chat {chat_id}: {message_text[:50]}...")
    processing_msg = await event.reply("⏳ در حال پردازش پیام شما...")
    try:
        bot_response = await get_bot_response_google(message_text)
        await client.edit_message(processing_msg, bot_response)
    except Exception as e:
        logger.error(f"Error handling text message: {e}", exc_info=True)
        await client.edit_message(processing_msg, "❌ متأسفانه در پردازش پیام شما مشکلی پیش آمد.")

@client.on(events.NewMessage(func=lambda e: e.audio or e.voice or e.document))
async def handle_media_message(event):
    chat_id = event.chat_id
    sender = await event.get_sender()
    message_id = event.message.id
    logger.info(f"Media from User {sender.id} (msg_id:{message_id}) in Chat {chat_id}")

    media_item = None
    file_name_attr = None
    mime_type_attr = None
    media_type_for_log = "unknown"

    if event.audio:
        media_item = event.audio
        media_type_for_log = "audio"
        mime_type_attr = getattr(media_item, 'mime_type', 'audio/ogg') # Default if not present
        file_name_attr = getattr(media_item, 'attributes', [{}])[0].file_name if media_item.attributes and hasattr(media_item.attributes[0], 'file_name') else f"audio_{message_id}.{mime_type_attr.split('/')[-1]}"
    elif event.voice:
        media_item = event.voice
        media_type_for_log = "voice"
        mime_type_attr = getattr(media_item, 'mime_type', 'audio/ogg')
        file_name_attr = f"voice_{message_id}.ogg"
    elif event.document:
        media_item = event.document
        mime_type_attr = getattr(media_item, 'mime_type', '')
        file_name_attr = getattr(media_item, 'attributes', [{}])[0].file_name if media_item.attributes and hasattr(media_item.attributes[0], 'file_name') else f"document_{message_id}"

        if mime_type_attr.startswith('audio/'):
            media_type_for_log = "document_audio"
        elif mime_type_attr.startswith('video/'):
            media_type_for_log = "document_video"
        elif mime_type_attr in ['application/zip', 'application/x-zip-compressed'] or file_name_attr.lower().endswith('.zip'):
            media_type_for_log = "document_zip"
        else:
            await event.reply("⚠️ این نوع فایل توسط ربات پشتیبانی نمی‌شود. لطفاً فایل صوتی، ویدیویی یا ZIP ارسال کنید.")
            return
    else: # Should not happen given the func filter, but as a safeguard
        return

    processing_msg = await event.reply("⏳ در حال دریافت و بررسی فایل...")

    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    original_name_base = Path(file_name_attr).stem
    # Use a generic download name, actual format determined later
    download_path_initial = TEMP_DIR / f"{original_name_base}_{timestamp}{Path(file_name_attr).suffix or '.dat'}"

    local_files_to_cleanup_main_handler = [str(download_path_initial)]

    try:
        await client.download_media(message=event.message, file=str(download_path_initial))
        logger.info(f"File {file_name_attr} ({media_type_for_log}) downloaded to {download_path_initial}")
        await client.edit_message(processing_msg, "✅ فایل دریافت شد. در حال پردازش اولیه...")

        # --- ZIP File Handling ---
        if media_type_for_log == "document_zip":
            await client.edit_message(processing_msg, processing_msg.text + "\n🗜️ فایل ZIP شناسایی شد، در حال استخراج...")
            extraction_path = TEMP_EXTRACTION_DIR / f"{original_name_base}_{timestamp}_extracted"
            extraction_path.mkdir(parents=True, exist_ok=True)
            local_files_to_cleanup_main_handler.append(str(extraction_path)) # ensure extracted dir is cleaned

            extracted_files_info = []
            try:
                with zipfile.ZipFile(download_path_initial, 'r') as zip_ref:
                    zip_ref.extractall(extraction_path)
                logger.info(f"ZIP extracted to {extraction_path}")

                # Process extracted files
                combined_text_content = []
                audio_files_in_zip = []

                for item in extraction_path.rglob('*'): # Recurse through subdirectories
                    if item.is_file():
                        item_name_base = item.stem
                        item_suffix_lower = item.suffix.lower()

                        # Check for audio types
                        if item_suffix_lower in ['.mp3', '.wav', '.ogg', '.m4a', '.opus', '.flac', '.aac']:
                            audio_files_in_zip.append(item)
                        # Check for text types
                        elif item_suffix_lower in ['.txt', '.md', '.rtf']: # Add more text types if needed
                            try:
                                combined_text_content.append(item.read_text(encoding='utf-8'))
                                logger.info(f"Read text from {item.name}")
                            except Exception as e:
                                logger.warning(f"Could not read text file {item.name}: {e}")

                if audio_files_in_zip:
                    await client.edit_message(processing_msg, processing_msg.text + f"\n🔊 {len(audio_files_in_zip)} فایل صوتی در ZIP یافت شد. پردازش آن‌ها...")
                    for i, audio_item_path in enumerate(audio_files_in_zip):
                        zip_audio_name_base = f"{original_name_base}_zip_audio{i+1}_{audio_item_path.stem}"
                        # Create a new processing message for each audio file to avoid super long message
                        audio_proc_msg = await event.reply(f"⏳ پردازش فایل صوتی {i+1}/{len(audio_files_in_zip)} از ZIP: {audio_item_path.name}")
                        await process_audio_file(event, str(audio_item_path), zip_audio_name_base, chat_id, audio_proc_msg)
                        # Don't add audio_item_path to local_files_to_cleanup_main_handler, process_audio_file handles its copies/chunks.
                        # The extraction_path itself will be cleaned, removing all original extracted files.

                if combined_text_content:
                    full_extracted_text = "\n\n---\n\n".join(combined_text_content)
                    text_output_filename = f"{original_name_base}_extracted_texts.txt"
                    text_output_path = TEMP_DIR / text_output_filename
                    with open(text_output_path, "w", encoding="utf-8") as f:
                        f.write(full_extracted_text)
                    local_files_to_cleanup_main_handler.append(str(text_output_path))
                    await client.send_file(chat_id, str(text_output_path), caption="📜 متن‌های استخراج شده از فایل ZIP:")
                    await client.edit_message(processing_msg, processing_msg.text + "\n📜 محتوای متنی از ZIP ارسال شد.")

                if not audio_files_in_zip and not combined_text_content:
                     await client.edit_message(processing_msg, processing_msg.text + "\n⚠️ هیچ فایل صوتی یا متنی قابل پردازشی در ZIP یافت نشد.")
                else:
                     await client.edit_message(processing_msg, processing_msg.text + "\n✅ پردازش محتوای ZIP تکمیل شد.")

            except zipfile.BadZipFile:
                logger.error(f"Bad ZIP file: {download_path_initial}")
                await client.edit_message(processing_msg, "❌ فایل ZIP نامعتبر است.")
            except Exception as e:
                logger.error(f"Error processing ZIP file: {e}", exc_info=True)
                await client.edit_message(processing_msg, f"❌ خطا در پردازش فایل ZIP: {e}")
            return # End of ZIP processing

        # --- Video to Audio Conversion ---
        actual_audio_file_to_process = download_path_initial
        if media_type_for_log.startswith("document_video") or mime_type_attr.startswith("video/"):
            await client.edit_message(processing_msg, processing_msg.text + "\n📹 فایل ویدیویی شناسایی شد، در حال تبدیل به صوت فشرده...")
            converted_audio_path = TEMP_DIR / f"{original_name_base}_{timestamp}_converted.{AUDIO_OUTPUT_FORMAT}"
            try:
                video_segment = AudioSegment.from_file(str(download_path_initial))
                audio_only = video_segment.set_channels(1).set_frame_rate(AUDIO_SAMPLE_RATE)
                audio_only.export(
                    str(converted_audio_path),
                    format=AUDIO_OUTPUT_FORMAT,
                    codec=AUDIO_OUTPUT_CODEC if AUDIO_OUTPUT_FORMAT == "ogg" else None,
                    bitrate=AUDIO_OUTPUT_BITRATE
                )
                actual_audio_file_to_process = converted_audio_path
                local_files_to_cleanup_main_handler.append(str(converted_audio_path)) # Add converted file for cleanup
                logger.info(f"Video converted to audio: {converted_audio_path}")
                await client.edit_message(processing_msg, processing_msg.text + "\n✅ ویدیو به صوت تبدیل شد.")
            except Exception as e:
                logger.error(f"Error converting video to audio: {e}", exc_info=True)
                await client.edit_message(processing_msg, f"❌ خطا در تبدیل ویدیو به صوت: {e}. تلاش برای پردازش فایل اصلی (اگر صوت باشد)...")
                # Fallback to actual_audio_file_to_process = download_path_initial if conversion fails
                # but it might not be an audio file. For now, we assume it fails cleanly if not convertible.
                if not mime_type_attr.startswith('audio/'): # If original was not audio, then fail
                     await client.edit_message(processing_msg, processing_msg.text + "\n❌ فایل اصلی ویدیو قابل پردازش مستقیم به عنوان صوت نیست.")
                     return

        # --- Audio Processing (Original Audio, Document Audio, or Converted Video) ---
        if media_type_for_log.startswith("audio") or media_type_for_log.startswith("voice") or \
           media_type_for_log.startswith("document_audio") or actual_audio_file_to_process != download_path_initial: # i.e. video was converted

            # Ensure the file is in a format pydub can handle well for duration check and potential re-encoding/splitting
            # Re-encoding to a standard format (like ogg opus) can help.
            standardized_audio_path = TEMP_DIR / f"{Path(actual_audio_file_to_process).stem}_standardized.{AUDIO_OUTPUT_FORMAT}"
            try:
                audio_seg = AudioSegment.from_file(str(actual_audio_file_to_process))
                # Standardize: mono, 16kHz, ogg opus
                audio_seg = audio_seg.set_channels(1).set_frame_rate(AUDIO_SAMPLE_RATE)
                audio_seg.export(
                    str(standardized_audio_path),
                    format=AUDIO_OUTPUT_FORMAT,
                    codec=AUDIO_OUTPUT_CODEC if AUDIO_OUTPUT_FORMAT == "ogg" else None,
                    bitrate=AUDIO_OUTPUT_BITRATE
                )
                logger.info(f"Audio standardized to: {standardized_audio_path}")
                if str(actual_audio_file_to_process) != str(download_path_initial): # if it was converted video
                    local_files_to_cleanup_main_handler.append(str(actual_audio_file_to_process)) # The intermediate converted video audio
                actual_audio_file_to_process = standardized_audio_path
                local_files_to_cleanup_main_handler.append(str(standardized_audio_path))

            except Exception as e:
                logger.warning(f"Could not standardize audio {actual_audio_file_to_process}: {e}. Proceeding with original/converted.")
                # actual_audio_file_to_process remains as it was

            await process_audio_file(event, str(actual_audio_file_to_process), original_name_base, chat_id, processing_msg)

        # If it was just a document that wasn't audio, video, or zip, it would have been filtered earlier.

    except Exception as e:
        logger.exception(f"Unhandled error in handle_media_message for {file_name_attr}: {e}")
        try:
            await client.edit_message(processing_msg, "❌ متأسفانه یک خطای ناشناخته در پردازش فایل شما رخ داد.")
        except: # If editing message fails (e.g., message deleted)
            pass
    finally:
        # General cleanup for files created directly in this handler (like initial download)
        # Files created by sub-processors (process_audio_file) are cleaned by them.
        unique_cleanup_paths_main = list(set(local_files_to_cleanup_main_handler))
        await cleanup_files_and_dirs(*unique_cleanup_paths_main)
        # Clean the entire extraction base dir if it was used
        if TEMP_EXTRACTION_DIR.exists():
            await cleanup_files_and_dirs(TEMP_EXTRACTION_DIR)
            TEMP_EXTRACTION_DIR.mkdir(parents=True, exist_ok=True) # Recreate for next use


# --- Main Entry Point ---
async def main():
    logger.info("Starting LinguaScribe Bot...")

    # Clear temp directory at startup (robustly)
    if TEMP_DIR.exists():
        try:
            shutil.rmtree(TEMP_DIR)
            logger.info(f"Cleaned up old temp directory: {TEMP_DIR}")
        except Exception as e:
            logger.error(f"Error cleaning up temp directory {TEMP_DIR} at startup: {e}")
    TEMP_DIR.mkdir(parents=True, exist_ok=True)
    TEMP_EXTRACTION_DIR.mkdir(parents=True, exist_ok=True)


    await client.start(bot_token=BOT_TOKEN)
    me = await client.get_me()
    logger.info(f"Bot @{me.username} started successfully!")
    logger.info(f"Using {len(GOOGLE_API_KEYS_LIST)} Google API Key(s).")
    logger.info(f"Initial Google API Key: ...{_active_google_api_key[-4:] if _active_google_api_key else 'N/A'}")

    try:
        await client.run_until_disconnected()
    except KeyboardInterrupt:
        logger.info("Bot stopped by user (Ctrl+C)")
    finally:
        await client.disconnect()
        logger.info("Bot disconnected.")

if __name__ == "__main__":
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())

# The old code for one job only:
## it do transcribe and get text srt and summary

In [None]:
import os
import asyncio
import nest_asyncio
import datetime
from pathlib import Path
import logging
import re
import math
from telethon import TelegramClient, events, Button
import google.generativeai as genai
from google.colab import userdata
from pydub import AudioSegment  # New import for audio processing

# --- Configuration ---
try:
    from google.colab import userdata
    API_ID = int(userdata.get('TELEGRAM_API_ID'))
    API_HASH = userdata.get('TELEGRAM_API_HASH')
    BOT_TOKEN = userdata.get('TELEGRAM_BOT_TTS')
    GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY1')  # Main key
    GOOGLE_SUMMARY_API_KEY = userdata.get('GOOGLE_SUMMARY_API_KEY')  # Specific key for summaries

    if not all([API_ID, API_HASH, BOT_TOKEN, GOOGLE_API_KEY, GOOGLE_SUMMARY_API_KEY]):
        raise ValueError("One or more secrets are missing.")
    if GOOGLE_API_KEY == GOOGLE_SUMMARY_API_KEY:
        print("Main Google API Key and Summary API Key are the same. No key switching needed.")

except Exception as e:
    print(f"Error loading secrets: {e}")
    exit()

# Apply nest_asyncio
nest_asyncio.apply()

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- Initial Google AI SDK Configuration (with the MAIN key) ---
try:
    logger.info(f"Configuring Google AI SDK with MAIN API key ending with ...{GOOGLE_API_KEY[-4:]}")
    genai.configure(api_key=GOOGLE_API_KEY)
except Exception as e:
    logger.error(f"Error configuring Google AI SDK with main key: {e}")
    exit()

# Model Configuration
MODEL_CONFIG = {
    "text_model_name": "gemini-2.5-flash-preview-04-17-thinking",
    "multimodal_model_name": "gemini-1.5-flash-latest",
    "generation_config": {"temperature": 0.5},
    "summarization_generation_config": {"temperature": 0.6},
    SAFETY_SETTINGS = [
    {"category": HarmCategory.HARM_CATEGORY_HARASSMENT, "threshold": HarmBlockThreshold.BLOCK_ONLY_HIGH},
    {"category": HarmCategory.HARM_CATEGORY_HATE_SPEECH, "threshold": HarmBlockThreshold.BLOCK_ONLY_HIGH},
    {"category": HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT, "threshold": HarmBlockThreshold.BLOCK_ONLY_HIGH},
    {"category": HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT, "threshold": HarmBlockThreshold.BLOCK_ONLY_HIGH},
]
}

# Telethon Client Initialization
session_name = f"bot_session_{BOT_TOKEN.split(':')[0]}"  # Ensure unique session name
client = TelegramClient(session_name, API_ID, API_HASH)
TEMP_DIR = Path("./temp_audio_telethon_bot")
TEMP_DIR.mkdir(exist_ok=True)

# --- Audio Processing Constants ---
MAX_DURATION_MINUTES = 30  # Maximum duration in minutes before splitting
MAX_DURATION_MS = MAX_DURATION_MINUTES * 60 * 1000  # Convert to milliseconds

# --- Helper Functions ---

def get_gemini_model_instance(model_name_key, custom_generation_config_key=None):
    """
    Creates and returns a Gemini model instance.
    ASSUMES genai IS ALREADY CONFIGURED with the correct API key FOR THIS CALL.
    """
    try:
        model_name_actual = MODEL_CONFIG[model_name_key]
        generation_config_actual = MODEL_CONFIG[custom_generation_config_key] if custom_generation_config_key else MODEL_CONFIG["generation_config"]

        model = genai.GenerativeModel(
            model_name=model_name_actual,
            generation_config=generation_config_actual,
            safety_settings=MODEL_CONFIG["safety_settings"]
        )
        logger.info(f"Created model instance for {model_name_actual} (current global API key is in use)")
        return model
    except Exception as e:
        logger.error(f"Error creating Gemini model {model_name_actual}: {e}")
        raise

async def cleanup_files(*files):
    for file_path in files:
        if file_path and Path(file_path).exists():
            try:
                Path(file_path).unlink()
                logger.info(f"Deleted temporary file: {file_path}")
            except OSError as e:
                logger.error(f"Error deleting file {file_path}: {e}")

def generate_srt_with_timecodes(segmented_text):
    lines = [line for line in segmented_text.split("\n") if line.strip()]
    if not lines:
        return "1\n00:00:00,000 --> 00:00:05,000\n(محتوایی برای زمان‌بندی وجود ندارد)\n"
    srt_content = []
    current_time_total_seconds = 0
    segment_duration_seconds = 5
    for i, line in enumerate(lines):
        start_seconds = current_time_total_seconds
        end_seconds = current_time_total_seconds + segment_duration_seconds
        def format_time(s):
            return f"{int(s // 3600):02}:{int(s % 3600 // 60):02}:{int(s % 60):02},{int((s % 1) * 1000):03}"
        srt_content.append(str(i + 1))
        srt_content.append(f"{format_time(start_seconds)} --> {format_time(end_seconds)}")
        srt_content.append(line)
        srt_content.append("")
        current_time_total_seconds = end_seconds
    return "\n".join(srt_content)

# --- New Audio Splitting Functions ---

async def get_audio_duration(file_path):
    """Get the duration of an audio file in milliseconds."""
    try:
        audio = AudioSegment.from_file(file_path)
        duration_ms = len(audio)
        logger.info(f"Audio duration: {duration_ms/1000:.2f} seconds ({duration_ms/60000:.2f} minutes)")
        return duration_ms
    except Exception as e:
        logger.error(f"Error getting audio duration: {e}", exc_info=True)
        raise

async def split_audio_file(file_path, base_name, max_duration_ms=MAX_DURATION_MS):
    """
    Split an audio file into chunks of max_duration_ms.
    Returns a list of paths to the split audio files.
    """
    try:
        audio = AudioSegment.from_file(file_path)
        total_duration_ms = len(audio)

        if total_duration_ms <= max_duration_ms:
            logger.info(f"Audio is shorter than {MAX_DURATION_MINUTES} minutes, no need to split")
            return [file_path]

        # Calculate number of chunks needed
        num_chunks = math.ceil(total_duration_ms / max_duration_ms)
        logger.info(f"Splitting audio into {num_chunks} chunks of {MAX_DURATION_MINUTES} minutes each")

        chunk_paths = []
        for i in range(num_chunks):
            start_ms = i * max_duration_ms
            end_ms = min((i + 1) * max_duration_ms, total_duration_ms)

            chunk = audio[start_ms:end_ms]
            chunk_filename = f"{base_name}_part{i+1}.ogg"
            chunk_path = TEMP_DIR / chunk_filename

            logger.info(f"Exporting chunk {i+1}/{num_chunks} to {chunk_path}")
            chunk.export(str(chunk_path), format="ogg")
            chunk_paths.append(str(chunk_path))

        logger.info(f"Successfully split audio into {len(chunk_paths)} chunks")
        return chunk_paths
    except Exception as e:
        logger.error(f"Error splitting audio file: {e}", exc_info=True)
        raise

# --- Google AI API Call Functions ---

async def transcribe_audio_google(file_path):
    logger.info(f"Transcribing audio file: {file_path}")
    google_audio_file_obj = None
    try:
        model = get_gemini_model_instance("multimodal_model_name")  # Assumes main key is active
        logger.info("Uploading audio file for transcription...")
        # upload_file is synchronous, so run in a thread
        google_audio_file_obj = await asyncio.to_thread(genai.upload_file, path=file_path)
        logger.info(f"Audio file uploaded: {google_audio_file_obj.name}")

        prompt = "Please transcribe the audio provided accurately. Return ONLY the plain text transcription."

        # Run the synchronous generate_content in a thread
        response = await asyncio.to_thread(
            model.generate_content,
            [prompt, google_audio_file_obj]  # Pass contents directly
        )

        transcription = response.text.strip()

        if not transcription:
             logger.warning("Transcription response was empty.")
             raise ValueError("Transcription failed: No text returned.")
        logger.info("Transcription successful.")
        return transcription, google_audio_file_obj
    except Exception as e:
        logger.error(f"Error during transcription: {e}", exc_info=True)
        raise

async def summarize_audio_google(audio_file_ref, transcription_context):
    logger.info("Summarizing audio content...")
    # --- API Key Switching Logic ---
    current_global_key_is_main = True  # Assume it's the main key initially

    if GOOGLE_API_KEY != GOOGLE_SUMMARY_API_KEY and GOOGLE_SUMMARY_API_KEY:
        try:
            logger.info(f"Temporarily configuring genai for GOOGLE_SUMMARY_API_KEY (ends ...{GOOGLE_SUMMARY_API_KEY[-4:]}) for summarization")
            genai.configure(api_key=GOOGLE_SUMMARY_API_KEY)
            current_global_key_is_main = False  # Now it's the summary key

            model = get_gemini_model_instance("multimodal_model_name", "summarization_generation_config")

            summary_prompt = """
شما یک دستیار متخصص در تحلیل و خلاصه‌سازی محتوای صوتی به زبان فارسی هستید.
فایل صوتی ارائه شده است. متن پیاده‌سازی شده اولیه آن نیز برای کمک به زمینه و کلمات کلیدی در زیر آمده است.
لطفاً این فایل صوتی را با دقت تحلیل کرده و یک خلاصه جامع و دقیق به زبان فارسی روان تهیه کنید که شامل موارد زیر باشد:

متن پیاده‌سازی شده اولیه (برای کمک به زمینه):
\"\"\"
{transcription_context}
\"\"\"

دستورالعمل‌های خلاصه‌سازی:
1.  **خلاصه کلی (۲-۳ پاراگراف):** موضوع اصلی و هدف، زمینه بحث، نتیجه‌گیری اصلی.
2.  **نکات کلیدی و برجسته:** مهم‌ترین نقاط، آمار/ارقام مهم، تاریخ‌ها/رویدادهای کلیدی (حداقل ۵ مورد).
3.  **جزئیات و استدلال‌های مهم:** استدلال‌های اصلی، مثال‌ها/موارد خاص، نقل قول‌های مهم (حداکثر ۲-۳).
4.  **تحلیل محتوا (در صورت امکان):** ارتباط مفاهیم، نقاط قوت/ضعف، پیشنهادات/راهکارها.
5.  **دسته‌بندی موضوعی (اختیاری):** موضوعات فرعی و ارتباطشان با موضوع اصلی.

**خروجی مورد انتظار:**
*   خلاصه کاملاً به زبان فارسی سلیس و روان.
*   ساختاریافته با تیترهای مشخص فارسی (مانند "خلاصه کلی", "نکات کلیدی و برجسته").
*   استفاده از نشانه‌گذاری مناسب (لیست‌ها).
*   طول متناسب با محتوای صوتی.
*   فقط و فقط خلاصه نهایی مطابق ساختار درخواستی، بدون عبارت مقدماتی یا توضیحات اضافی.
"""
            response = await asyncio.to_thread(
                model.generate_content,
                [summary_prompt.format(transcription_context=transcription_context), audio_file_ref]
            )
            summary = response.text.strip()

            if not summary:
                logger.warning("Summarization response was empty.")
                raise ValueError("Summarization failed: No text returned.")
            logger.info("Summarization successful.")
            return summary
        except Exception as e:
            logger.error(f"Error during summarization: {e}", exc_info=True)
            raise
        finally:
            # --- Switch back to MAIN API key ---
            if not current_global_key_is_main:  # If we switched to summary key
                logger.info(f"Switching genai config back to main GOOGLE_API_KEY (ends ...{GOOGLE_API_KEY[-4:]})")
                genai.configure(api_key=GOOGLE_API_KEY)
    else:  # Keys are the same, or no specific summary key, so use the already configured main key
        try:
            logger.info("Using main GOOGLE_API_KEY for summarization as keys are same or summary key not distinct.")
            model = get_gemini_model_instance("multimodal_model_name", "summarization_generation_config")
            summary_prompt = f"""
شما یک دستیار متخصص در تحلیل و خلاصه‌سازی محتوای صوتی به زبان فارسی هستید.
فایل صوتی ارائه شده است. متن پیاده‌سازی شده اولیه آن نیز برای کمک به زمینه و کلمات کلیدی در زیر آمده است.
لطفاً این فایل صوتی را با دقت تحلیل کرده و یک خلاصه جامع و دقیق به زبان فارسی روان تهیه کنید که شامل موارد زیر باشد:

متن پیاده‌سازی شده اولیه (برای کمک به زمینه):
\"\"\"
{transcription_context}
\"\"\"

دستورالعمل‌های خلاصه‌سازی:
1.  **خلاصه کلی (۲-۳ پاراگراف):** موضوع اصلی و هدف، زمینه بحث، نتیجه‌گیری اصلی.
2.  **نکات کلیدی و برجسته:** مهم‌ترین نقاط، آمار/ارقام مهم، تاریخ‌ها/رویدادهای کلیدی (حداقل ۵ مورد).
3.  **جزئیات و استدلال‌های مهم:** استدلال‌های اصلی، مثال‌ها/موارد خاص، نقل قول‌های مهم (حداکثر ۲-۳).
4.  **تحلیل محتوا (در صورت امکان):** ارتباط مفاهیم، نقاط قوت/ضعف، پیشنهادات/راهکارها.
5.  **دسته‌بندی موضوعی (اختیاری):** موضوعات فرعی و ارتباطشان با موضوع اصلی.

**خروجی مورد انتظار:**
*   خلاصه کاملاً به زبان فارسی سلیس و روان.
*   ساختاریافته با تیترهای مشخص فارسی (مانند "خلاصه کلی", "نکات کلیدی و برجسته").
*   استفاده از نشانه‌گذاری مناسب (لیست‌ها).
*   طول متناسب با محتوای صوتی.
*   فقط و فقط خلاصه نهایی مطابق ساختار درخواستی، بدون عبارت مقدماتی یا توضیحات اضافی.
"""
            response = await asyncio.to_thread(
                model.generate_content,
                [summary_prompt.format(transcription_context=transcription_context), audio_file_ref]
            )
            summary = response.text.strip()
            if not summary:
                logger.warning("Summarization response was empty.")
                raise ValueError("Summarization failed: No text returned.")
            logger.info("Summarization successful.")
            return summary
        except Exception as e:
            logger.error(f"Error during summarization with main key: {e}", exc_info=True)
            raise

async def translate_to_persian_google(text):
    # Assumes GOOGLE_API_KEY is globally configured
    if not text or not text.strip(): return ""
    logger.info("Translating text to Persian...")
    try:
        model = get_gemini_model_instance("text_model_name")
        prompt = f'Translate the following text to Persian:\n\n"{text}"\n\nReturn ONLY the Persian translation.'
        response = await asyncio.to_thread(model.generate_content, prompt)
        translation = response.text.strip()
        if not translation:
            logger.warning("Translation response was empty.")
            raise ValueError("Translation failed: No text returned.")
        logger.info("Translation successful.")
        return translation
    except Exception as e:
        logger.error(f"Error during translation: {e}", exc_info=True)
        raise

async def segment_persian_text_google(persian_text):
    # Assumes GOOGLE_API_KEY is globally configured
    logger.info("Segmenting Persian text for SRT...")
    try:
        model = get_gemini_model_instance("text_model_name")
        segmentation_prompt = f"""Take the following Persian text and break it into suitable subtitle segments. Each segment should be on a new line. Aim for natural breaks and readable lengths for subtitles.
Return ONLY the segmented text, with each segment on a new line.
Persian text:
---
{persian_text}
---"""
        response = await asyncio.to_thread(model.generate_content, segmentation_prompt)
        segmented_text = response.text.strip()

        if not segmented_text:  # Fallback
            logger.warning("LLM Segmentation response was empty. Using regex fallback.")
            segments = re.split(r'[।\.؟!\n]+', persian_text)
            segmented_text = "\n".join(s.strip() for s in segments if s.strip())
            if not segmented_text: raise ValueError("Segmentation failed: No text from LLM or fallback.")
        logger.info("Segmentation successful.")
        return segmented_text
    except Exception as e:
        logger.error(f"Error during LLM segmentation: {e}. Using regex fallback.", exc_info=True)
        segments = re.split(r'[।\.؟!\n]+', persian_text)  # Fallback on any error
        segmented_text = "\n".join(s.strip() for s in segments if s.strip())
        if not segmented_text: raise ValueError(f"Segmentation failed: Error '{e}' and fallback also yielded no text.")
        return segmented_text

async def generate_persian_srt_google(transcription):
    logger.info("Generating Persian SRT...")
    try:
        persian_translation = await translate_to_persian_google(transcription)
        if not persian_translation: raise ValueError("Translation step failed for SRT.")
        segmented_persian_text = await segment_persian_text_google(persian_translation)
        if not segmented_persian_text: raise ValueError("Segmentation step failed for SRT.")
        srt_content = generate_srt_with_timecodes(segmented_persian_text)
        logger.info("SRT generation successful.")
        return srt_content
    except Exception as e:
        logger.error(f"Error generating SRT: {e}", exc_info=True)
        raise

async def get_bot_response_google(message_text):
    # Assumes GOOGLE_API_KEY is globally configured
    logger.info(f"Getting bot response for: {message_text[:50]}...")
    try:
        model = get_gemini_model_instance("text_model_name")
        prompt = f"""You are LinguaScribe_Bot, a helpful Telegram assistant. The user's language is Persian.
User says: "{message_text}"
Provide a concise and helpful response in Persian. If the user sends audio, you will have received the transcription as 'messageText'.
If they ask about services, mention audio transcription to text (پیاده‌سازی صوت), Persian translation (ترجمه به فارسی), SRT generation (تولید فایل زیرنویس SRT), and audio summarization (خلاصه‌سازی صوت).
Keep responses brief. If the input is non-sensical or just a greeting, respond politely and briefly in Persian.
Return ONLY the bot's reply."""
        response = await asyncio.to_thread(model.generate_content, prompt)
        reply = response.text.strip()
        if not reply:
            logger.warning("Bot response generation was empty.")
            return "متاسفانه در حال حاضر قادر به پاسخگویی نیستم."
        logger.info("Bot response generated.")
        return reply
    except Exception as e:
        logger.error(f"Error getting bot response: {e}", exc_info=True)
        return "متاسفانه در پردازش درخواست شما مشکلی پیش آمد."

# --- New Function for Processing Long Audio ---
async def process_long_audio(event, download_path, original_name_base, chat_id, processing_msg):
    """Process a long audio file by splitting it into chunks and processing each chunk"""
    try:
        # Check audio duration
        audio_duration_ms = await get_audio_duration(download_path)

        if audio_duration_ms <= MAX_DURATION_MS:
            # Audio is shorter than threshold, process normally
            logger.info(f"Audio duration ({audio_duration_ms/60000:.2f} min) is under threshold, processing normally")
            return await process_single_audio(str(download_path), original_name_base, chat_id, processing_msg)

        # Audio is longer than threshold, need to split
        await client.edit_message(
            processing_msg,
            f"⚠️ فایل صوتی شما طولانی است ({audio_duration_ms/60000:.1f} دقیقه). در حال تقسیم به قطعات {MAX_DURATION_MINUTES} دقیقه‌ای و پردازش..."
        )

        # Split the audio file
        chunk_paths = await split_audio_file(download_path, original_name_base)

        # Process each chunk and collect transcriptions
        all_transcriptions = []
        all_uploaded_refs = []  # Track all uploaded file references for cleanup

        for i, chunk_path in enumerate(chunk_paths):
            await client.edit_message(
                processing_msg,
                processing_msg.text + f"\n\n⏳ در حال پیاده‌سازی متن قطعه {i+1} از {len(chunk_paths)}..."
            )

            chunk_transcription, chunk_ref = await transcribe_audio_google(chunk_path)
            all_uploaded_refs.append(chunk_ref)
            all_transcriptions.append(chunk_transcription)

            await client.edit_message(
                processing_msg,
                processing_msg.text + f"\n✅ پیاده‌سازی قطعه {i+1} انجام شد."
            )

        # Combine all transcriptions
        full_transcription = "\n\n".join(all_transcriptions)

        # Save combined transcription
        transcription_filename = f"{original_name_base}_full_transcription.txt"
        transcription_path = TEMP_DIR / transcription_filename
        with open(transcription_path, "w", encoding="utf-8") as f:
            f.write(full_transcription)

        # Send the combined transcription
        await client.send_file(
            chat_id,
            str(transcription_path),
            caption="🎤 متن کامل پیاده‌سازی شده:"
        )

        # Generate SRT from combined transcription
        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال تولید زیرنویس (SRT) کامل...")
        srt_content = await generate_persian_srt_google(full_transcription)
        srt_filename = f"{original_name_base}_full_subtitles.srt"
        srt_path = TEMP_DIR / srt_filename
        with open(srt_path, "w", encoding="utf-8") as f:
            f.write(srt_content)
        await client.send_file(chat_id, str(srt_path), caption="🎬 فایل زیرنویس کامل (SRT):")
        await client.edit_message(processing_msg, processing_msg.text + "\n✅ فایل زیرنویس (SRT) ارسال شد.")

        # Generate summary using the first chunk's audio reference and the full transcription
        # (since we can't combine audio files for the API, we'll use one chunk but provide full transcription)
        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال تهیه خلاصه کلی...")
        summary = await summarize_audio_google(all_uploaded_refs[0], full_transcription)
        await client.send_message(
            chat_id,
            f"📝 *خلاصه محتوای کامل:*\n\n{summary}",
            parse_mode='md'
        )

        # Final status message
        await client.edit_message(
            processing_msg,
            f"✅ پردازش فایل صوتی {audio_duration_ms/60000:.1f} دقیقه‌ای شما با موفقیت تکمیل شد."
        )

        # Return all files for cleanup
        return all_uploaded_refs, [download_path, transcription_path, srt_path] + chunk_paths

    except Exception as e:
        logger.exception(f"Error in process_long_audio: {e}")
        await client.edit_message(
            processing_msg,
            f"❌ خطا در پردازش فایل صوتی بلند: {str(e)}"
        )
        return [], [download_path]

# --- Function to Process a Single Audio File (for reuse) ---
async def process_single_audio(file_path, original_name_base, chat_id, processing_msg):
    """Process a single audio file and return the uploaded ref and files for cleanup"""
    try:
        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال پیاده‌سازی متن...")
        transcription, google_audio_file_uploaded_ref = await transcribe_audio_google(file_path)

        transcription_filename = f"{original_name_base}_transcription.txt"
        transcription_path = TEMP_DIR / transcription_filename
        with open(transcription_path, "w", encoding="utf-8") as f:
            f.write(transcription)

        await client.send_file(chat_id, str(transcription_path), caption="🎤 متن پیاده‌سازی شده:")
        await client.edit_message(processing_msg, processing_msg.text + "\n✅ متن پیاده‌سازی و ارسال شد.")

        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال تهیه خلاصه...")
        summary = await summarize_audio_google(google_audio_file_uploaded_ref, transcription)
        await client.send_message(chat_id, f"📝 *خلاصه محتوا:*\n\n{summary}", parse_mode='md')
        await client.edit_message(processing_msg, processing_msg.text + "\n✅ خلاصه ارسال شد.")

        await client.edit_message(processing_msg, processing_msg.text + "\n\n⏳ در حال تولید زیرنویس (SRT)...")
        srt_content = await generate_persian_srt_google(transcription)
        srt_filename = f"{original_name_base}_subtitles.srt"
        srt_path = TEMP_DIR / srt_filename
        with open(srt_path, "w", encoding="utf-8") as f:
            f.write(srt_content)
        await client.send_file(chat_id, str(srt_path), caption="🎬 فایل زیرنویس (SRT):")
        await client.edit_message(processing_msg, processing_msg.text + "\n✅ فایل زیرنویس (SRT) ارسال شد.")
        await client.edit_message(processing_msg, "✅ پردازش فایل صوتی با موفقیت تکمیل شد!")

        # Return references and paths for cleanup
        return [google_audio_file_uploaded_ref], [file_path, transcription_path, srt_path]
    except Exception as e:
        logger.exception(f"Error in process_single_audio: {e}")
        await client.edit_message(processing_msg, f"❌ خطا در پردازش فایل صوتی: {str(e)}")
        return [], [file_path]  # Return empty refs and only the original file for cleanup

# --- Main Bot Event Handlers ---

@client.on(events.NewMessage(pattern='/start'))
async def start(event):
    sender = await event.get_sender()
    chat_id = event.chat_id
    logger.info(f"New /start command from User {sender.id} in Chat {chat_id}")

    await event.reply(
        "👋 سلام! به ربات *LinguaScribe* خوش آمدید.\n\n"
        "این ربات می‌تواند:\n"
        "🎤 **پیاده‌سازی متن**: فایل‌های صوتی را به متن تبدیل کند\n"
        "📝 **خلاصه‌سازی**: محتوای صوتی را خلاصه کند\n"
        "🎬 **زیرنویس**: فایل SRT فارسی تولید کند\n\n"
        "برای شروع، یک فایل صوتی برای من ارسال کنید.",
        parse_mode='md'
    )

@client.on(events.NewMessage(pattern='/help'))
async def help_command(event):
    await event.reply(
        "🔍 **راهنمای استفاده از LinguaScribe Bot**\n\n"
        "کاربرد:\n"
        "1️⃣ یک فایل صوتی (voice message, audio file) ارسال کنید\n"
        "2️⃣ ربات به صورت خودکار:\n"
        "   - متن پیاده‌سازی شده را ارسال می‌کند\n"
        "   - خلاصه‌ای از محتوا تهیه می‌کند\n"
        "   - فایل زیرنویس SRT تولید می‌کند\n\n"
        "📋 **نکات مهم**:\n"
        "• فایل‌های صوتی تا ۳۰ دقیقه پشتیبانی می‌شوند\n"
        "• برای فایل‌های طولانی‌تر، ربات آنها را به بخش‌های کوچکتر تقسیم می‌کند\n"
        "• زبان اصلی مورد پشتیبانی فارسی است\n\n"
        "📌 **دستورات:**\n"
        "/start - شروع کار با ربات\n"
        "/help - نمایش این راهنما",
        parse_mode='md'
    )

@client.on(events.NewMessage(func=lambda e: e.text and not e.text.startswith('/')))
async def handle_text_message(event):
    chat_id = event.chat_id
    message_text = event.text
    logger.info(f"Received text message in chat {chat_id}: {message_text[:50]}...")

    # Let the user know we're processing
    processing_msg = await event.reply("⏳ در حال پردازش پیام شما...")

    try:
        bot_response = await get_bot_response_google(message_text)
        await client.edit_message(processing_msg, bot_response)
    except Exception as e:
        logger.error(f"Error handling text message: {e}", exc_info=True)
        await client.edit_message(processing_msg, "❌ متأسفانه در پردازش پیام شما مشکلی پیش آمد.")

@client.on(events.NewMessage(func=lambda e: e.audio or e.voice or e.document))
async def handle_audio_message(event):
    try:
        chat_id = event.chat_id
        sender = await event.get_sender()
        logger.info(f"Received audio from User {sender.id} in Chat {chat_id}")

        # Check if the message contains audio, voice, or a document
        if event.audio:
            media = event.audio
            file_type = "audio"
        elif event.voice:
            media = event.voice
            file_type = "voice"
        elif event.document and hasattr(event.document, 'mime_type') and event.document.mime_type.startswith('audio/'):
            media = event.document
            file_type = "document"
        else:
            await event.reply("❌ لطفاً یک فایل صوتی معتبر ارسال کنید.")
            return

        # Initial processing message
        processing_msg = await event.reply("⏳ در حال دریافت فایل صوتی...")

        # Generate a unique filename based on timestamp and user
        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
        original_name = getattr(media, 'attributes', [{}])[0].file_name if hasattr(getattr(media, 'attributes', [{}])[0], 'file_name') else f"{file_type}_{timestamp}"
        original_name_base = Path(original_name).stem
        download_path = TEMP_DIR / f"{original_name_base}_{timestamp}.ogg"

        # Download the file
        try:
            await client.download_media(message=event.message, file=str(download_path))
            logger.info(f"File downloaded to {download_path}")
            await client.edit_message(processing_msg, "✅ فایل صوتی دریافت شد. در حال پردازش...")
        except Exception as download_error:
            logger.error(f"Error downloading file: {download_error}", exc_info=True)
            await client.edit_message(processing_msg, "❌ خطا در دریافت فایل صوتی.")
            return

        # Process the audio (handles both short and long audio files)
        uploaded_refs, files_to_cleanup = await process_long_audio(
            event, download_path, original_name_base, chat_id, processing_msg)

        # Clean up all temporary files and references
        await cleanup_files(*files_to_cleanup)
        for ref in uploaded_refs:
            try:
                # Only attempt to clean up Google API uploaded file references if they exist
                if ref and hasattr(ref, 'name'):
                    logger.info(f"Cleaning up Google API file reference: {ref.name}")
                    # No cleanup needed for now as these are handled by Google's API
            except Exception as ref_cleanup_error:
                logger.error(f"Error cleaning up reference: {ref_cleanup_error}")

    except Exception as e:
        logger.exception(f"Unhandled error in handle_audio_message: {e}")
        try:
            await event.reply("❌ متأسفانه در پردازش فایل صوتی شما مشکلی پیش آمد. لطفاً دوباره تلاش کنید.")
        except:
            pass

# --- Main Entry Point ---

async def main():
    logger.info("Starting the bot...")

    # Clear temp directory at startup
    for file_path in TEMP_DIR.glob("*"):
        try:
            file_path.unlink()
            logger.info(f"Cleaned up old file: {file_path}")
        except Exception as e:
            logger.error(f"Error cleaning up file {file_path}: {e}")

    # Start the client
    await client.start(bot_token=BOT_TOKEN)
    logger.info("Bot started successfully")

    # Get the bot info
    me = await client.get_me()
    logger.info(f"Bot Username: @{me.username}")

    # Keep the bot running
    try:
        logger.info("Bot is now running. Press Ctrl+C to stop.")
        await client.run_until_disconnected()
    except KeyboardInterrupt:
        logger.info("Bot stopped by user")
    finally:
        await client.disconnect()
        logger.info("Bot disconnected")

if __name__ == "__main__":
    # Run the bot
    loop = asyncio.get_event_loop()
    loop.run_until_complete(main())

ERROR:__main__:Error during translation: Invalid operation: The `response.parts` quick accessor requires a single candidate, but but `response.candidates` is empty.
Traceback (most recent call last):
  File "<ipython-input-2-ec2a3394cdbd>", line 306, in translate_to_persian_google
    translation = response.text.strip()
                  ^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/google/generativeai/types/generation_types.py", line 463, in text
    parts = self.parts
            ^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/google/generativeai/types/generation_types.py", line 446, in parts
    raise ValueError(msg)
ValueError: Invalid operation: The `response.parts` quick accessor requires a single candidate, but but `response.candidates` is empty.
ERROR:__main__:Error generating SRT: Invalid operation: The `response.parts` quick accessor requires a single candidate, but but `response.candidates` is empty.
Traceback (most recent call last):
  File "<ipyt