In [None]:
!pip install -U -q "google-genai>=1.16.1"
!pip install pysrt

from google.colab import drive, userdata
import io
import json
import re
import wave
import os
import base64
import struct
import shutil
import pysrt, time

from IPython.display import Audio, display, HTML, Markdown
from google import genai
from google.genai import types
from google.genai.types import GenerateContentConfig, Tool

# -------------------------------
# Mount Google Drive
# -------------------------------
GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
drive.mount('/content/drive', force_remount=True)

# Initialize client
client = genai.Client(api_key=GOOGLE_API_KEY)


# -------------------------------
# Helper: parse .srt into segments
# -------------------------------
def parse_srt(path):
    subs = pysrt.open(path)
    segments = []
    for sub in subs:
        start = sub.start.hours*3600 + sub.start.minutes*60 + sub.start.seconds + sub.start.milliseconds/1000
        end   = sub.end.hours*3600   + sub.end.minutes*60   + sub.end.seconds   + sub.end.milliseconds/1000
        text = sub.text.replace("\n", " ").strip()
        segments.append((start, end, text))
    return segments


# -------------------------------
# Helper: write .wav file
# -------------------------------
def wave_file(filename, pcm, channels=1, rate=24000, sample_width=2):
    print(f"\nWriting audio file with parameters:")
    print(f"Channels: {channels}")
    print(f"Sample rate: {rate}")
    print(f"Sample width: {sample_width}")
    print(f"Data length: {len(pcm)} bytes")

    with wave.open(filename, "wb") as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(sample_width)
        wf.setframerate(rate)
        wf.writeframes(pcm)


# -------------------------------
# NEW Helper: Safe TTS with retry
# -------------------------------
def get_tts_audio(client, prompt, voice, retries=5, delay=5):
    """Call Gemini TTS with retry logic and safe extraction."""
    for attempt in range(retries):
        try:
            response = client.models.generate_content(
                model="gemini-2.5-pro-preview-tts",
                contents=prompt,
                config=types.GenerateContentConfig(
                    response_modalities=["audio"],
                    speech_config=types.SpeechConfig(
                        voice_config=types.VoiceConfig(
                            prebuilt_voice_config=types.PrebuiltVoiceConfig(
                                voice_name=voice
                            )
                        )
                    ),
                ),
            )

            # --- Safe extraction block ---
            data = None
            try:
                data = response.candidates[0].content.parts[0].inline_data.data
            except Exception:
                if hasattr(response.candidates[0].content, "inline_data"):
                    data = response.candidates[0].content.inline_data.data
                elif hasattr(response, "audio") and hasattr(response.audio, "data"):
                    data = response.audio.data

            if data:
                return data  # ✅ success
            else:
                print(f"⚠️ No audio returned on attempt {attempt+1}. Retrying...")
                time.sleep(delay)
        except Exception as e:
            print(f"⚠️ TTS error on attempt {attempt+1}: {e}")
            time.sleep(delay)
    return None  # ❌ all retries failed


# -------------------------------
# Input + setup
# -------------------------------
srt_file_path = '/content/drive/MyDrive/Test1/mt/Eng/1A.srt'  # replace with your path
VOICE = 'Charon'

segments = parse_srt(srt_file_path)
print(f"Found {len(segments)} subtitle segments.")

base_name = os.path.splitext(os.path.basename(srt_file_path))[0]
output_dir = f'/content/drive/MyDrive/Test1/tts/{base_name}_segments_tel'
os.makedirs(output_dir, exist_ok=True)

failed_log = os.path.join(output_dir, "failed_segments.txt")

# -------------------------------
# Main processing loop
# -------------------------------
for idx, (start, end, text) in enumerate(segments, 1):

    # NEW: Skip already-generated audio
    output_path = os.path.join(output_dir, f"{idx:03d}.wav")
    if os.path.exists(output_path):
        print(f"⏩ Skipping segment {idx} — audio already exists.")
        continue

    if len(text.strip()) < 5:
        print(f"⚠️ Skipping too-short segment {idx}: '{text}'")
        continue

    PROMPT = f"Speak this text in clear and natural Telugu after translating the given english Text, Preserve the meaning: {text}"
    print(f"\nProcessing segment {idx} ({start:.2f}s → {end:.2f}s): {text[:60]}...")

    data = get_tts_audio(client, PROMPT, VOICE)
    if not data:
        print(f"❌ Skipping segment {idx} — no audio after retries.")
        with open(failed_log, 'a') as log:
            log.write(f"{idx}: {text}\n")
        continue

    rate = 24000
    file_name = f"{idx:03d}.wav"
    wave_file(file_name, data, rate=rate)

    destination_path = os.path.join(output_dir, file_name)
    shutil.copy(f"/content/{file_name}", destination_path)
    display(Audio(destination_path))


print(f"\n✅ All segments saved in: {output_dir}")
print(f"📄 Failed segments (if any) logged to: {failed_log}")


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/47.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m262.0/262.0 kB[0m [31m18.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pysrt
  Downloading pysrt-1.1.2.tar.gz (104 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.4/104.4 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pysrt
  Building wheel for pysrt (setup.py) ... [?25l[?25hdone
  Created wheel for pysrt: filename=pysrt-1.1.2-py3-none-any.whl size=13443 sha256=a63f2093a403f520def48555fca6bd270f905a496c2a35960f65a6e8269640bd
  Stored in directory: /root/.cache/pip/wheels/6a/36/54/2aa8dc961885dfa7b0ebd45a57505f25039d79b4ea0fd9f29d
Successfully built pysrt
Installing coll


Processing segment 2 (5.78s → 23.25s): I am a graphic and motion graphic designer and have been wor...
⚠️ No audio returned on attempt 1. Retrying...
⚠️ No audio returned on attempt 2. Retrying...


Audio merge

In [None]:
import subprocess

def merge_segments_ffmpeg_timed(segments, segments_dir, output_path, sample_rate=24000):
    """
    Merge segments into a single time-aligned audio track using FFmpeg filter_complex.
    Each segment is placed at its exact SRT start time.
    """
    print("\n🎯 Performing precise timeline merge using FFmpeg...")

    filter_parts = []
    inputs = []

    for i, (start, end, text) in enumerate(segments, 1):
        seg_path = os.path.join(segments_dir, f"{i:03d}.wav")
        if not os.path.exists(seg_path):
            print(f"⚠️ Skipping missing segment {i:03d}")
            continue

        delay_ms = int(start * 1000)  # convert to milliseconds
        inputs += ["-i", seg_path]
        # Apply delay via adelay filter
        filter_parts.append(f"[{i-1}:a]adelay={delay_ms}|{delay_ms}[a{i}]")

    # Combine all delayed audio tracks
    filter_complex = "; ".join(filter_parts) + f"; {' '.join(f'[a{i}]' for i in range(1, len(filter_parts)+1))}amix=inputs={len(filter_parts)}:normalize=0[aout]"

    cmd = [
        "ffmpeg", "-y",
        *inputs,
        "-filter_complex", filter_complex,
        "-map", "[aout]",
        "-ar", str(sample_rate),
        "-ac", "1",
        "-c:a", "pcm_s16le",
        output_path
    ]

    print(f"\nRunning FFmpeg command:\n{' '.join(cmd)}\n")
    subprocess.run(cmd, check=True)
    print(f"✅ Final aligned audio saved at: {output_path}")

final_output = f"/content/drive/MyDrive/Test1/tts/{base_name}_merged_timed_Eng.wav"
merge_segments_ffmpeg_timed(segments, output_dir, final_output)




🎯 Performing precise timeline merge using FFmpeg...

Running FFmpeg command:
ffmpeg -y -i /content/drive/MyDrive/Test1/tts/1A_segments/001.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/002.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/003.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/004.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/005.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/006.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/007.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/008.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/009.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/010.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/011.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/012.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/013.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/014.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/015.wav -i /content/drive/MyDrive/Test1/tts/1A_segments/016.wav -i /content/driv