# test 1a

In [5]:
# ========================
# INSTALL & IMPORTS
# ========================
!pip install -q google-generativeai pydub tqdm librosa

import os
import io
from google.colab import drive, userdata
import google.generativeai as genai
from pydub import AudioSegment
from tqdm import tqdm

# ========================
# SETUP
# ========================

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Securely load your Gemini API key from Colab secrets
api_key = userdata.get("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("‚ùå No GOOGLE_API_KEY found in Colab secrets! Add it under 'More ‚Üí Secrets'.")

genai.configure(api_key=api_key)

# Choose your model
model = genai.GenerativeModel("models/gemini-2.5-pro")

# Input/output folders in Google Drive
base_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/"
input_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/English/aa"
output_dir = "/content/drive/MyDrive/Test_28_Adnew_mp3/ASRgemini/"
os.makedirs(output_dir, exist_ok=True)

# ========================
# HELPER FUNCTIONS
# ========================

def transcribe_audio_file(file_path):
    """Transcribe full audio file without splitting."""
    audio = AudioSegment.from_wav(file_path)
    buffer = io.BytesIO()
    audio.export(buffer, format="wav")
    audio_bytes = buffer.getvalue()

    try:
        response = model.generate_content(
            contents=[
                {
                    "role": "user",
                    "parts": [
                        {"mime_type": "audio/wav", "data": audio_bytes},
                        """
                        Transcribe this audio exactly as spoken in English (strictly: no extra comments, strictly: no filler words)
                        in valid .srt format.

                        Before outputting, you MUST internally ensure:

                        - Each subtitle segment must contain atleast 2 sentences in same line, unless the audio ends and fewer remain.
                        - Maintain natural sentence boundaries.
                        - Combine sentences smoothly while keeping meaning and flow.
                        - Only create a new segment after atleast 2 sentences have been completed (except the final segment).
                        - Timestamp continuity must be correct and must not overlap and YOU MUST GIVE ACCURATE TIMESTAMPS UPTO HALF-SECOND
                        - Format must strictly be:

                          <index>
                          HH:MM:SS,SSS --> HH:MM:SS,SSS
                          text

                        Rules:
                        1. Timestamps must be chronological and continuous.
                        2. Every segment contains atleast 2 sentences (except final).
                        3. Never generate timestamps beyond the audio duration.
                        4. If Gemini outputs incorrect timestamps, fix them BEFORE final output.
                        5. No explanations. Only the final SRT.
                        6. Include speaker labels if detectable.
                        7. Silence > 2 seconds ‚Üí include:
                          [Silence]
                          with correct timestamps.
                        """
                    ]
                }
            ],
            # generation_config=genai.types.GenerationConfig(
            #     temperature=0.6,
                # top_p=0.8,
                # top_k=40
            # )
        )

        return response.text.strip()
    except Exception as e:
        print("‚ùå Error:", e)
        return ""


# ========================
# MAIN PROCESS
# ========================

for filename in os.listdir(input_dir):
    if filename.lower().endswith(".wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"\nüéß Transcribing full audio: {filename}")

        # Get full transcription
        text = transcribe_audio_file(file_path)

        # Save TXT file
        txt_output = os.path.join(output_dir, filename.replace(".wav", ".txt"))
        with open(txt_output, "w", encoding="utf-8") as f:
            f.write(text)

        print(f"‚úÖ Done: {filename}")
        print(f"üìÑ TXT saved to: {txt_output}")


Mounted at /content/drive

üéß Transcribing full audio: Copy of Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.wav
‚úÖ Done: Copy of Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.wav
üìÑ TXT saved to: /content/drive/MyDrive/Test_28_Adnew_mp3/ASRgemini/Copy of Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.txt


# asr with tem 0.6 this works and good

In [None]:
# ========================
# INSTALL & IMPORTS
# ========================
!pip install -q google-generativeai pydub tqdm librosa

import os
import io
from google.colab import drive, userdata
import google.generativeai as genai
from pydub import AudioSegment
from tqdm import tqdm

# ========================
# SETUP
# ========================

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Securely load your Gemini API key from Colab secrets
api_key = userdata.get("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("‚ùå No GOOGLE_API_KEY found in Colab secrets! Add it under 'More ‚Üí Secrets'.")

genai.configure(api_key=api_key)

# Choose your model
model = genai.GenerativeModel("models/gemini-2.5-pro")

# Input/output folders in Google Drive
base_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/"
input_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/English/aa"
output_dir = os.path.join(base_dir,"testgenasr1","English","Srtformatissue")
os.makedirs(output_dir, exist_ok=True)

# ========================
# HELPER FUNCTIONS
# ========================

def transcribe_audio_file(file_path):
    """Transcribe full audio file without splitting."""
    audio = AudioSegment.from_wav(file_path)
    buffer = io.BytesIO()
    audio.export(buffer, format="wav")
    audio_bytes = buffer.getvalue()

    try:
        response = model.generate_content(
            contents=[
                {
                    "role": "user",
                    "parts": [
                        {"mime_type": "audio/wav", "data": audio_bytes},
                        """
                        Transcribe this audio exactly as spoken in English (strictly: no extra comments, strictly: no filler words)
                        in valid .srt format.

                        Before outputting, you MUST internally ensure:

                        - Each subtitle segment must contain atleast 2 sentences in same line, unless the audio ends and fewer remain.
                        - Maintain natural sentence boundaries.
                        - Combine sentences smoothly while keeping meaning and flow.
                        - Only create a new segment after atleast 2 sentences have been completed (except the final segment).
                        - Timestamp continuity must be correct and must not overlap and YOU MUST GIVE ACCURATE TIMESTAMPS UPTO HALF-SECOND
                        - Format must strictly be:

                          <index>
                          HH:MM:SS,SSS --> HH:MM:SS,SSS
                          text

                        Rules:
                        1. Timestamps must be chronological and continuous.
                        2. Every segment contains atleast 2 sentences (except final).
                        3. Never generate timestamps beyond the audio duration.
                        4. If Gemini outputs incorrect timestamps, fix them BEFORE final output.
                        5. No explanations. Only the final SRT.
                        6. Include speaker labels if detectable.
                        7. Silence > 2 seconds ‚Üí include:
                          [Silence]
                          with correct timestamps.
                        """
                    ]
                }
            ],
            generation_config=genai.types.GenerationConfig(
                temperature=0.6,
                # top_p=0.8,
                # top_k=40
            )
        )

        return response.text.strip()
    except Exception as e:
        print("‚ùå Error:", e)
        return ""


# ========================
# MAIN PROCESS
# ========================

for filename in os.listdir(input_dir):
    if filename.lower().endswith(".wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"\nüéß Transcribing full audio: {filename}")

        # Get full transcription
        text = transcribe_audio_file(file_path)

        # Save TXT file
        txt_output = os.path.join(output_dir, filename.replace(".wav", ".txt"))
        with open(txt_output, "w", encoding="utf-8") as f:
            f.write(text)

        print(f"‚úÖ Done: {filename}")
        print(f"üìÑ TXT saved to: {txt_output}")


  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):


Mounted at /content/drive

üéß Transcribing full audio: Chapter 5A - Assembling and installing ups.wav
‚úÖ Done: Chapter 5A - Assembling and installing ups.wav
üìÑ TXT saved to: /content/drive/MyDrive/Test_28_Adnew_wav/testgenasr1/English/Srtformatissue/Chapter 5A - Assembling and installing ups.txt


# Asr with temp 0

In [None]:
# ========================
# INSTALL & IMPORTS
# ========================
!pip install -q google-generativeai pydub tqdm librosa

import os
import io
from google.colab import drive, userdata
import google.generativeai as genai
from pydub import AudioSegment
from tqdm import tqdm

# ========================
# SETUP
# ========================

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Securely load your Gemini API key from Colab secrets
api_key = userdata.get("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("‚ùå No GOOGLE_API_KEY found in Colab secrets! Add it under 'More ‚Üí Secrets'.")

genai.configure(api_key=api_key)

# Choose your model
model = genai.GenerativeModel("models/gemini-2.5-pro")

# Input/output folders in Google Drive
base_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/"
input_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/Hindi/a"
output_dir = os.path.join(base_dir,"Test_28_Gemini25pro_asr_new","Hin6","Srtformatissue")
os.makedirs(output_dir, exist_ok=True)

# ========================
# HELPER FUNCTIONS
# ========================

def transcribe_audio_file(file_path):
    """Transcribe full audio file without splitting."""
    audio = AudioSegment.from_wav(file_path)
    buffer = io.BytesIO()
    audio.export(buffer, format="wav")
    audio_bytes = buffer.getvalue()

    try:
        response = model.generate_content(
            contents=[
                {
                    "role": "user",
                    "parts": [
                        {"mime_type": "audio/wav", "data": audio_bytes},
                        """
                        Transcribe this audio exactly as spoken (strictly: no extra comments, strictly: no filler words)
                        in valid .srt format.

                        Before outputting, you MUST internally ensure:

                        - Each subtitle segment must contain atleast 2 sentences in same line, unless the audio ends and fewer remain.
                        - Maintain natural sentence boundaries.
                        - Combine sentences smoothly while keeping meaning and flow.
                        - Only create a new segment after atleast 2 sentences have been completed (except the final segment).
                        - Timestamp continuity must be correct and must not overlap and YOU MUST GIVE ACCURATE TIMESTAMPS UPTO HALF-SECOND
                        - Format must strictly be:

                          <index>
                          HH:MM:SS,SSS --> HH:MM:SS,SSS
                          text

                        Rules:
                        1. Timestamps must be chronological and continuous.
                        2. Every segment contains atleast 2 sentences (except final).
                        3. Never generate timestamps beyond the audio duration.
                        4. If Gemini outputs incorrect timestamps, fix them BEFORE final output.
                        5. No explanations. Only the final SRT.
                        6. Include speaker labels if detectable.
                        7. Silence > 2 seconds ‚Üí include:
                          [Silence]
                          with correct timestamps.
                        """
                    ]
                }
            ],
            generation_config=genai.types.GenerationConfig(
                temperature=0.0,
                # top_p=0.8,
                # top_k=40
            )
        )

        return response.text.strip()
    except Exception as e:
        print("‚ùå Error:", e)
        return ""


# ========================
# MAIN PROCESS
# ========================

for filename in os.listdir(input_dir):
    if filename.lower().endswith(".wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"\nüéß Transcribing full audio: {filename}")

        # Get full transcription
        text = transcribe_audio_file(file_path)

        # Save TXT file
        txt_output = os.path.join(output_dir, filename.replace(".wav", ".txt"))
        with open(txt_output, "w", encoding="utf-8") as f:
            f.write(text)

        print(f"‚úÖ Done: {filename}")
        print(f"üìÑ TXT saved to: {txt_output}")


Mounted at /content/drive

üéß Transcribing full audio: Chapter 9A - E-Commerce, Usage of Job Portal.wav
‚úÖ Done: Chapter 9A - E-Commerce, Usage of Job Portal.wav
üìÑ TXT saved to: /content/drive/MyDrive/Test_28_Adnew_wav/Test_28_Gemini25pro_asr_new/Hin6/Srtformatissue/Chapter 9A - E-Commerce, Usage of Job Portal.txt

üéß Transcribing full audio: Chapter 10B - Packaging, Labeling & Branding.wav
‚úÖ Done: Chapter 10B - Packaging, Labeling & Branding.wav
üìÑ TXT saved to: /content/drive/MyDrive/Test_28_Adnew_wav/Test_28_Gemini25pro_asr_new/Hin6/Srtformatissue/Chapter 10B - Packaging, Labeling & Branding.txt


# txt to srt needs error handleing

In [6]:
import os
import re

def normalize_timestamp(ts: str) -> str:

    ts = ts.strip().replace(" ", "").replace(".", ",")

    # Fix case: HH:MM:SS:SSS or MM:SS:SSS or SS:SSS
    parts = re.split(r'[:,]', ts)

    # Extract possible milliseconds (last item if 2-4 digits)
    ms = ""
    if parts and re.fullmatch(r"\d{2,4}", parts[-1]):
        ms = parts.pop(-1)

    # Clean and normalize milliseconds
    if ms:
        ms = ms[:3].ljust(3, "0")
    else:
        ms = "000"

    # Now normalize hours/minutes/seconds
    nums = list(map(int, parts))

    if len(nums) == 1:      # seconds only
        h, m, s = 0, 0, nums[0]
    elif len(nums) == 2:    # minutes:seconds
        h, m, s = 0, nums[0], nums[1]
    elif len(nums) == 3:    # hours:minutes:seconds
        h, m, s = nums
    else:
        # Arbitrary broken format fallback
        h, m, s = 0, 0, 0

    return f"{h:02d}:{m:02d}:{s:02d},{ms}"






def fix_srt_file(input_path, output_path):
    """
    Reads one .srt/.txt file, fixes timestamp formatting,
    and saves a new valid .srt file.
    """
    with open(input_path, 'r', encoding='utf-8', errors='ignore') as f:
        lines = f.readlines()

    new_lines = []
    ts_pattern = re.compile(
        r'(\d{1,2}:?\d{1,2}:?\d{1,2}[.,]?\d*)\s*[-‚Äì>]+\s*(\d{1,2}:?\d{1,2}:?\d{1,2}[.,]?\d*)'
    )

    for line in lines:
        match = ts_pattern.search(line)
        if match:
            start, end = match.groups()
            start = normalize_timestamp(start)
            end = normalize_timestamp(end)
            new_lines.append(f"{start} --> {end}\n")
        else:
            new_lines.append(line)

    with open(output_path, 'w', encoding='utf-8') as f:
        f.writelines(new_lines)


def process_folder(input_folder, output_folder):
    """
    Process all .srt/.txt files in a folder recursively,
    writing fixed versions to output_folder.
    """
    os.makedirs(output_folder, exist_ok=True)

    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.lower().endswith(('.txt')):
                input_path = os.path.join(root, file)
                rel_path = os.path.relpath(input_path, input_folder)
                output_path = os.path.join(output_folder, os.path.splitext(rel_path)[0] + '.srt')

                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                print(f"Fixing: {rel_path}")
                fix_srt_file(input_path, output_path)

    print("\n All files processed and saved in:", output_folder)



if __name__ == "__main__":
    input_folder = "/content/drive/My Drive/Test_28_Adnew_mp3/ASRgemini/"
    output_folder = "/content/drive/My Drive/Test_28_Adnew_mp3/ASRgemini/"

    process_folder(input_folder, output_folder)


Fixing: 1stpass Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.txt
Fixing: 2ndpass Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.txt

 All files processed and saved in: /content/drive/My Drive/Test_28_Adnew_mp3/ASRgemini/


# json code block

In [None]:
f split_audio_chunks(video_path, chunk_length_ms=300000):
    """
    Splits audio from video into chunks of specified length (default 5 mins).
    Returns list of chunk file paths and the temp directory.
    """
    print(f"Splitting audio into {chunk_length_ms/1000}s chunks...")
    audio = AudioSegment.from_file(video_path)
    chunks = []

    # Create temp dir for chunks
    temp_dir = "temp_chunks"
    if os.path.exists(temp_dir):
        shutil.rmtree(temp_dir)
    os.makedirs(temp_dir)

    duration_ms = len(audio)

    if duration_ms <= chunk_length_ms:
        print(f"Video duration ({duration_ms/1000}s) is under limit. Processing as single block.")
        chunk_path = os.path.join(temp_dir, "full_audio.mp3")
        audio.export(chunk_path, format="mp3")
        return [chunk_path], temp_dir

    print(f"Video duration ({duration_ms/1000}s) exceeds limit. Splitting into chunks...")
    for i, start_ms in enumerate(range(0, duration_ms, chunk_length_ms)):
        end_ms = min(start_ms + chunk_length_ms, duration_ms)
        chunk = audio[start_ms:end_ms]
        chunk_path = os.path.join(temp_dir, f"chunk_{i}.mp3")
        chunk.export(chunk_path, format="mp3")
        chunks.append(chunk_path)

    return chunks, temp_dir

def parse_gemini_json(response_text, offset_ms=0):
    """Parses Gemini JSON response and applies time offset."""
    # Extract JSON from code blocks if present
    if "```json" in response_text:
        response_text = response_text.split("```json")[1].split("```")[0]
    elif "```" in response_text:
        response_text = response_text.split("```")[1].split("```")[0]

    try:
        segments = json.loads(response_text)
    except json.JSONDecodeError:
        print(f"Error decoding JSON: {response_text}")
        return []

    def parse_time(t_str):
        t_str = t_str.strip().replace('.', ',')
        parts = t_str.split(':')

        if len(parts) == 3: # HH:MM:SS,mmm
            h, m, s_ms = parts
        elif len(parts) == 2: # MM:SS,mmm
            h = "0"
            m, s_ms = parts
        else:
            raise ValueError(f"Invalid time format: {t_str}")

        if ',' in s_ms:
            s, ms = s_ms.split(',')
        else:
            s = s_ms
            ms = "000"

        return (int(h) * 3600 + int(m) * 60 + int(s)) * 1000 + int(ms)

    def format_time(ms):
        s = int(ms / 1000)
        ms = int(ms % 1000)
        m = int(s / 60)
        s = s % 60
        h = int(m / 60)
        m = m % 60
        return f"{h:02}:{m:02}:{s:02},{ms:03}"

    processed_segments = []
    for seg in segments:
        if seg.get('type') == 'silence':
            continue

        try:
            start_ms = parse_time(seg['start']) + offset_ms
            end_ms = parse_time(seg['end']) + offset_ms

            processed_segments.append({
                "start": start_ms,
                "end": end_ms,
                "text": seg['text'],
                "start_str": format_time(start_ms),
                "end_str": format_time(end_ms)
            })
        except Exception as e:
            print(f"Skipping malformed segment: {seg} - {e}")

    return processed_segments

def segments_to_srt(segments):
    lines = []
    for i, seg in enumerate(segments):
        lines.append(f"{i+1}")
        lines.append(f"{seg['start_str']} --> {seg['end_str']}")
        lines.append(f"{seg['text']}\n")
    return "\n".join(lines)


def generate_srt_gemini(video_path, input_lang="Hindi", model="gemini-2.5-pro", duration_limit=None):
    """
    Generates SRT from video using Gemini with 5-minute chunking and JSON output.
    """
    # 1. Split audio into chunks
    chunk_paths, temp_dir = split_audio_chunks(video_path, chunk_length_ms=300000) # 5 mins

    all_segments = []

    try:
        for i, chunk_path in enumerate(chunk_paths):
            print(f"Processing chunk {i+1}/{len(chunk_paths)}: {chunk_path}")

            # Upload chunk
            chunk_file = upload_to_gemini(chunk_path, mime_type="audio/mp3")
            wait_for_files_active([chunk_file])

            # JSON Prompt
            prompt = f"""
            Role: You are a high-precision ASR (Automatic Speech Recognition) system specialized in time-aligned transcription.

            INPUT: Audio Chunk ({i+1}/{len(chunk_paths)})
            TASK: Transcribe the spoken audio into a structured segment list.

            STRICT EXECUTION RULES:
            1.  **Time Alignment:** Your primary priority is timestamp accuracy. Do not guess. Align the start and end times strictly to the audio waveform.
            2.  **Granularity:** Segments must be sentence-based. Do not break mid-sentence unless there is a significant pause.
            3.  **Monotonicity:** Timestamps must be strictly increasing. Overlapping timestamps are forbidden.
            4.  **Multilingual Handling:** Transcribe {input_lang} natively. If technical terms appear in English/Latin script, preserve them in English (e.g., "API", "Pipeline", "Latency"). DO NOT translate technical terms.
            5.  **Clean-up:** Omit all filler words (uh, um, mm,uhm). Omit conversational stuttering.

            OUTPUT FORMAT:
            Provide the output as a valid JSON list of objects. This format is required to ensure timestamp precision.
            [
              {{
                "start": "HH:MM:SS,mmm",
                "end": "HH:MM:SS,mmm",
                "text": "The actual spoken text goes here."
              }},
              {{
                "start": "HH:MM:SS,mmm",
                "end": "HH:MM:SS,mmm",
                "type": "silence"
              }}
            ]

            **Constraint on Silence:** Only include a "type": "silence" object if the pause is strictly greater than 1.0 seconds.

            BEGIN TRANSCRIPTION:
            """

            print(f"Requesting transcript for chunk {i+1}...")

            # Set temperature to 0.0 for deterministic output
            generation_config = {"temperature": 0.0}

            try:
                response = client.models.generate_content(
                    model=model,
                    contents=[chunk_file, prompt],
                    config=generation_config
                )

                # Parse and offset
                offset_ms = i * 300000
                segments = parse_gemini_json(response.text, offset_ms=offset_ms)
                all_segments.extend(segments)

            except Exception as e:
                print(f"Error processing chunk {i+1}: {e}")

    finally:
        # Cleanup temp dir
        if os.path.exists(temp_dir):
            shutil.rmtree(temp_dir)

    # Convert to SRT
    return segments_to_srt(all_segments)


def fix_srt_timestamps(srt_content):
    """
    Fixes SRT timestamps by parsing them and re-formatting to strict HH:MM:SS,mmm.
    Handles:
    - MM:SS,mmm -> 00:MM:SS,mmm
    - H:MM:SS,mmm -> 0HH:MM:SS,mmm
    - 61:00,000 -> 01:01:00,000 (Normalizes accumulated minutes)
    """
    def normalize_timestamp(ts_str):
        ts_str = ts_str.strip()
        # Handle comma/dot
        ts_str = ts_str.replace('.', ',')

        # Split milliseconds
        if ',' in ts_str:
            main, ms = ts_str.split(',')
        else:
            main = ts_str
            ms = "000"

        parts = main.split(':')

        total_seconds = 0
        try:
            if len(parts) == 3: # HH:MM:SS
                h, m, s = map(int, parts)
                total_seconds = h * 3600 + m * 60 + s
            elif len(parts) == 2: # MM:SS
                m, s = map(int, parts)
                total_seconds = m * 60 + s
            else:
                return ts_str # Return original if unrecognizable
        except ValueError:
            return ts_str

        # Re-calculate HH:MM:SS
        h = total_seconds // 3600
        m = (total_seconds % 3600) // 60
        s = total_seconds % 60

        return f"{h:02}:{m:02}:{s:02},{ms.ljust(3, '0')[:3]}"

    lines = srt_content.splitlines()
    new_lines = []
    for line in lines:
        if "-->" in line:
            parts = line.split("-->")
            if len(parts) == 2:
                start = normalize_timestamp(parts[0])
                end = normalize_timestamp(parts[1])
                new_lines.append(f"{start} --> {end}")
            else:
                new_lines.append(line)
        else:
            new_lines.append(line)

    return "\n".join(new_lines)


def merge_segments_to_sentences(srt_content):
    """
    Merges SRT segments into full sentences based on punctuation.
    Decouples timestamp accuracy (short segments) from sentence structure.
    """
    segments = parse_srt(srt_content)
    merged_segments = []

    current_text = ""
    current_start = None

    # Punctuation that ends a sentence
    # English: . ? !
    # Hindi: | ? ! (Note: Hindi danda is | or ‡•§)
    terminals = ('.', '?', '!', '|', '‡•§')

    for seg in segments:
        text = seg['text'].strip()
        start = seg['start']
        end = seg['end']

        if current_start is None:
            current_start = start

        if current_text:
            current_text += " " + text
        else:
            current_text = text

        # Check if this segment ends a sentence
        if current_text.strip().endswith(terminals):
            merged_segments.append({
                "start": current_start,
                "end": end,
                "text": current_text.strip()
            })
            current_text = ""
            current_start = None

    # Handle remaining text
    if current_text and current_start is not None:
        merged_segments.append({
            "start": current_start,
            "end": segments[-1]['end'], # Use end of last processed segment
            "text": current_text.strip()
        })

    # Re-serialize to SRT
    lines = []
    for i, seg in enumerate(merged_segments):
        s = datetime.utcfromtimestamp(seg['start']/1000).strftime('%H:%M:%S,%f')[:-3]
        e = datetime.utcfromtimestamp(seg['end']/1000).strftime('%H:%M:%S,%f')[:-3]
        lines.append(f"{i+1}\n{s} --> {e}\n{seg['text']}\n")

    return "\n".join(lines)


def parse_srt_time(time_str):
    """Parses SRT timestamp to milliseconds. Handles various malformed formats."""
    time_str = time_str.strip().replace(',', '.')

    # Handle colon as millisecond separator (e.g. 00:00:10:159)
    if time_str.count(':') == 3:
        parts = time_str.rsplit(':', 1)
        time_str = '.'.join(parts)

    try:
        # Standard format HH:MM:SS.mmm
        if '.' in time_str:
            main, frac = time_str.split('.')
            frac = frac.ljust(6, '0')[:6]
            time_str = f"{main}.{frac}"

        t = datetime.strptime(time_str, "%H:%M:%S.%f")
        return (t.hour * 3600 + t.minute * 60 + t.second) * 1000 + t.microsecond / 1000
    except ValueError:
        # Fallback for malformed timestamps like "01:00:213" (MM:SS:mmm or similar weirdness)
        try:
            parts = time_str.replace('.', ':').split(':')
            if len(parts) == 3:
                # Assume MM:SS:mmm
                m = int(parts[0])
                s = int(parts[1])
                ms = int(parts[2])
                return (m * 60 + s) * 1000 + ms
        except:
            pass
        return 0

def parse_srt(srt_content):
    """Parses SRT content string into a list of segments. Robust to missing blank lines."""
    segments = []
    lines = srt_content.splitlines()
    n = len(lines)

    current_seg = {}

    i = 0
    while i < n:
        line = lines[i].strip()

        # Check if this is the start of a new segment
        # Look ahead for timestamp
        is_new_segment = False
        if line.isdigit():
            if i + 1 < n and '-->' in lines[i+1]:
                is_new_segment = True

        if is_new_segment:
            # If we were building a segment, save it
            if 'text' in current_seg and 'start' in current_seg:
                segments.append(current_seg)
                current_seg = {}

            # Start new segment
            # line is index (ignore)
            # lines[i+1] is time
            time_parts = lines[i+1].strip().split(' --> ')
            if len(time_parts) == 2:
                current_seg['start'] = parse_srt_time(time_parts[0])
                current_seg['end'] = parse_srt_time(time_parts[1])

            i += 2 # Skip index and time
            continue

        # If not new segment start, and we have a current segment with start time, it's text
        if 'start' in current_seg:
            if line:
                if 'text' in current_seg:
                    current_seg['text'] += ' ' + line
                else:
                    current_seg['text'] = line

        i += 1

    # Append last segment
    if 'text' in current_seg and 'start' in current_seg:
        segments.append(current_seg)

    return segments
