# new

In [None]:
# ========================
# INSTALL & IMPORTS
# ========================
!pip install -q google-generativeai pydub tqdm librosa

import os
import io
from google.colab import drive, userdata
import google.generativeai as genai
from pydub import AudioSegment
from tqdm import tqdm

# ========================
# SETUP
# ========================

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Securely load your Gemini API key from Colab secrets
api_key = userdata.get("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("‚ùå No GOOGLE_API_KEY found in Colab secrets! Add it under 'More ‚Üí Secrets'.")

genai.configure(api_key=api_key)

# Choose your model
model = genai.GenerativeModel("models/gemini-2.5-pro")

# Input/output folders in Google Drive
base_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/"
input_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/English/a"
output_dir = os.path.join(base_dir,"Test_28_Gemini25pro_asr_new","Eng","Srtformatissue")
os.makedirs(output_dir, exist_ok=True)

# ========================
# HELPER FUNCTIONS
# ========================

def transcribe_audio_file(file_path):
    """Transcribe full audio file without splitting."""
    audio = AudioSegment.from_wav(file_path)
    buffer = io.BytesIO()
    audio.export(buffer, format="wav")
    audio_bytes = buffer.getvalue()

    try:
        response = model.generate_content(
        contents=[
            {
                "role": "user",
                "parts": [
                    {"mime_type": "audio/wav", "data": audio_bytes},
                    """
                    You are a Subtitle Generator.

                    Transcribe this audio exactly as spoken (strictly: no extra comments, strictly: no filler words)
                    in valid .srt format.

                    Before outputting, you MUST internally ensure:

                    - Each subtitle segment must contain exactly 3 sentences in same line, unless the audio ends and fewer remain.
                    - Maintain natural sentence boundaries.
                    - Combine sentences smoothly while keeping meaning and flow.
                    - Only create a new segment after exactly 3 sentences have been completed (except the final segment).
                    - Timestamp continuity must be correct and must not overlap.
                    - Format must strictly be:

                      <index>
                      HH:MM:SS,SSS --> HH:MM:SS,SSS
                      text

                    Rules:
                    1. Timestamps must be chronological and continuous.
                    2. Every segment contains exactly 3 sentences (except final).
                    3. Never generate timestamps beyond the audio duration.
                    4. If Gemini outputs incorrect timestamps, fix them BEFORE final output.
                    5. No explanations. Only the final SRT.
                    6. Include speaker labels if detectable.
                    7. Silence > 2 seconds ‚Üí include:
                      [Silence]
                      with correct timestamps.


                    """
                ]
            }
        ]
    )


        return response.text.strip()
    except Exception as e:
        print("‚ùå Error:", e)
        return ""

# ========================
# MAIN PROCESS
# ========================

for filename in os.listdir(input_dir):
    if filename.lower().endswith(".wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"\nüéß Transcribing full audio: {filename}")

        # Get full transcription
        text = transcribe_audio_file(file_path)

        # Save TXT file
        txt_output = os.path.join(output_dir, filename.replace(".wav", ".txt"))
        with open(txt_output, "w", encoding="utf-8") as f:
            f.write(text)

        print(f"‚úÖ Done: {filename}")
        print(f"üìÑ TXT saved to: {txt_output}")


  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):


Mounted at /content/drive

üéß Transcribing full audio: Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.wav


KeyboardInterrupt: 

# reduce audio size

In [None]:
import os
from pydub import AudioSegment

from google.colab import drive, userdata
drive.mount('/content/drive', force_remount=True)

# ========================
# HELPER FUNCTION TO REDUCE FILE SIZE
# ========================

def reduce_wav_size(file_path, target_size_mb=25, output_path=None):
    """
    Reduces the size of a .wav file by adjusting bitrate, sample rate, and channels.
    Ensures the resulting file is under the target size (in MB).
    """
    # Load the audio file
    audio = AudioSegment.from_wav(file_path)

    # Convert to mono (if stereo)
    audio = audio.set_channels(1)

    # Reduce sample rate (e.g., 22050 Hz instead of 44100 Hz)
    audio = audio.set_frame_rate(22050)

    # Export the reduced audio to a temporary location to check the size
    temp_path = output_path if output_path else file_path
    audio.export(temp_path, format="wav", codec="pcm_s16le")

    # Check the file size, and adjust further if necessary
    while os.path.getsize(temp_path) > target_size_mb * 1024 * 1024:
        print(f"File too large ({os.path.getsize(temp_path)} bytes). Reducing further...")

        # Reduce further sample rate, or trim silence
        audio = audio.set_frame_rate(audio.frame_rate - 1000)  # Reduce 1 kHz at a time

        # Re-export the file
        audio.export(temp_path, format="wav", codec="pcm_s16le")

    print(f"Final file size: {os.path.getsize(temp_path) / (1024 * 1024):.2f} MB")
    return temp_path

# ========================
# REDUCE FILE SIZE FOR ALL .WAV FILES IN THE INPUT FOLDER
# ========================

input_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/English/a"  # Input folder path
output_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/English/a/Reduced"  # Folder to save reduced files
os.makedirs(output_dir, exist_ok=True)

for filename in os.listdir(input_dir):
    if filename.lower().endswith(".wav"):
        file_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)

        print(f"üéß Reducing size for {filename}...")

        # Reduce the file size
        reduced_file = reduce_wav_size(file_path, target_size_mb=25, output_path=output_path)

        print(f"‚úÖ Reduced file saved at: {reduced_file}")


  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):


Mounted at /content/drive
üéß Reducing size for Chapter 9C - Battery management system.wav...
File too large (47575390 bytes). Reducing further...
File too large (45417778 bytes). Reducing further...
File too large (43260166 bytes). Reducing further...
File too large (41102554 bytes). Reducing further...
File too large (38944942 bytes). Reducing further...
File too large (36787330 bytes). Reducing further...
File too large (34629720 bytes). Reducing further...
File too large (32472110 bytes). Reducing further...
File too large (30314500 bytes). Reducing further...
File too large (28156890 bytes). Reducing further...
Final file size: 24.79 MB
‚úÖ Reduced file saved at: /content/drive/MyDrive/Test_28_Adnew_wav/English/a/Reduced/Chapter 9C - Battery management system.wav
üéß Reducing size for Chapter 5A - Use of Growing and Rooting Media in Floriculture.wav...
File too large (76220870 bytes). Reducing further...
File too large (72764144 bytes). Reducing further...
File too large (693074

# here audio lenth checker

In [None]:
from google.colab import drive, userdata
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import os
import wave

directory = "/content/drive/MyDrive/Test_28_Adnew_wav/Hindi/"

for fname in sorted(os.listdir(directory)):
    if fname.lower().endswith(".wav"):
        path = os.path.join(directory, fname)
        with wave.open(path, 'rb') as w:
            frames = w.getnframes()
            rate = w.getframerate()
            duration = frames / float(rate)

        minutes = int(duration // 60)
        seconds = duration % 60
        print(f"{fname} ‚Äî {minutes}:{seconds:05.2f} (min:sec)")


Chapter 10B - Packaging, Labeling & Branding.wav ‚Äî 16:07.81 (min:sec)
Chapter 3C - Soy Wax candle Preparation.wav ‚Äî 16:02.84 (min:sec)
Chapter 5B - The Concept of Book Keeping and its Fundamental.wav ‚Äî 10:02.60 (min:sec)
Chapter 9A - E-Commerce, Usage of Job Portal.wav ‚Äî 7:39.97 (min:sec)
Copy of Chapter 8B - Sewing of Regular Blouse.wav ‚Äî 43:59.85 (min:sec)


In [None]:
import os
import wave

directory = "/content/drive/MyDrive/Test_28_Adnew_wav/English/"

for fname in sorted(os.listdir(directory)):
    if fname.lower().endswith(".wav"):
        path = os.path.join(directory, fname)
        with wave.open(path, 'rb') as w:
            frames = w.getnframes()
            rate = w.getframerate()
            duration = frames / float(rate)

        minutes = int(duration // 60)
        seconds = duration % 60
        print(f"{fname} ‚Äî {minutes}:{seconds:05.2f} (min:sec)")


Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.wav ‚Äî 35:20.33 (min:sec)
Chapter 2A - Green House and Poly House Types and Management.wav ‚Äî 22:16.96 (min:sec)
Chapter 4I - Sustainable Floriculture_ Water Conservation, Organics & Waste Management.wav ‚Äî 7:00.60 (min:sec)
Chapter 5A - Assembling and installing ups.wav ‚Äî 11:57.06 (min:sec)
Chapter 5A - Use of Growing and Rooting Media in Floriculture.wav ‚Äî 28:48.36 (min:sec)
Chapter 6A - Sucessful Entreuprenuer Journey.wav ‚Äî 25:08.74 (min:sec)
Chapter 9C - Battery management system.wav ‚Äî 17:58.81 (min:sec)


# Fix txt to srt

In [None]:
import os
import re

def normalize_timestamp(ts: str) -> str:

    ts = ts.strip().replace(" ", "").replace(".", ",")

    # Fix case: HH:MM:SS:SSS or MM:SS:SSS or SS:SSS
    parts = re.split(r'[:,]', ts)

    # Extract possible milliseconds (last item if 2-4 digits)
    ms = ""
    if parts and re.fullmatch(r"\d{2,4}", parts[-1]):
        ms = parts.pop(-1)

    # Clean and normalize milliseconds
    if ms:
        ms = ms[:3].ljust(3, "0")
    else:
        ms = "000"

    # Now normalize hours/minutes/seconds
    nums = list(map(int, parts))

    if len(nums) == 1:      # seconds only
        h, m, s = 0, 0, nums[0]
    elif len(nums) == 2:    # minutes:seconds
        h, m, s = 0, nums[0], nums[1]
    elif len(nums) == 3:    # hours:minutes:seconds
        h, m, s = nums
    else:
        # Arbitrary broken format fallback
        h, m, s = 0, 0, 0

    return f"{h:02d}:{m:02d}:{s:02d},{ms}"






def fix_srt_file(input_path, output_path):
    """
    Reads one .srt/.txt file, fixes timestamp formatting,
    and saves a new valid .srt file.
    """
    with open(input_path, 'r', encoding='utf-8', errors='ignore') as f:
        lines = f.readlines()

    new_lines = []
    ts_pattern = re.compile(
        r'(\d{1,2}:?\d{1,2}:?\d{1,2}[.,]?\d*)\s*[-‚Äì>]+\s*(\d{1,2}:?\d{1,2}:?\d{1,2}[.,]?\d*)'
    )

    for line in lines:
        match = ts_pattern.search(line)
        if match:
            start, end = match.groups()
            start = normalize_timestamp(start)
            end = normalize_timestamp(end)
            new_lines.append(f"{start} --> {end}\n")
        else:
            new_lines.append(line)

    with open(output_path, 'w', encoding='utf-8') as f:
        f.writelines(new_lines)


def process_folder(input_folder, output_folder):
    """
    Process all .srt/.txt files in a folder recursively,
    writing fixed versions to output_folder.
    """
    os.makedirs(output_folder, exist_ok=True)

    for root, _, files in os.walk(input_folder):
        for file in files:
            if file.lower().endswith(('.txt')):
                input_path = os.path.join(root, file)
                rel_path = os.path.relpath(input_path, input_folder)
                output_path = os.path.join(output_folder, os.path.splitext(rel_path)[0] + '.srt')

                os.makedirs(os.path.dirname(output_path), exist_ok=True)
                print(f"Fixing: {rel_path}")
                fix_srt_file(input_path, output_path)

    print("\n All files processed and saved in:", output_folder)



if __name__ == "__main__":
    input_folder = "/content/drive/My Drive/Test_28_Adnew_wav/Test_28_Gemini25pro_asr_new/hin_replace1/"
    output_folder = "/content/drive/My Drive/Test_28_Adnew_wav/Test_28_Gemini25pro_asr_new/hin_replace1/Fixed_srt/"

    process_folder(input_folder, output_folder)


Fixing: Copy of Chapter 5B - The Concept of Book Keeping and its Fundamental.txt

 All files processed and saved in: /content/drive/My Drive/Test_28_Adnew_wav/Test_28_Gemini25pro_asr_new/hin_replace1/Fixed_srt/


# old

In [None]:
# ========================
# INSTALL & IMPORTS
# ========================
!pip install -q google-generativeai pydub tqdm librosa

import os
import io
from google.colab import drive, userdata
import google.generativeai as genai
from pydub import AudioSegment
from tqdm import tqdm

# ========================
# SETUP
# ========================

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Securely load your Gemini API key from Colab secrets
api_key = userdata.get("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("‚ùå No GOOGLE_API_KEY found in Colab secrets! Add it under 'More ‚Üí Secrets'.")

genai.configure(api_key=api_key)

# Choose your model
model = genai.GenerativeModel("models/gemini-2.5-pro")

# Input/output folders in Google Drive
base_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/"
input_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/English"
output_dir = os.path.join(base_dir,"Test_28_Gemini25pro_asr_new","Eng2","Srtformatissue")
os.makedirs(output_dir, exist_ok=True)

# ========================
# HELPER FUNCTIONS
# ========================

def transcribe_audio_file(file_path):
    """Transcribe full audio file without splitting."""
    audio = AudioSegment.from_wav(file_path)
    buffer = io.BytesIO()
    audio.export(buffer, format="wav")
    audio_bytes = buffer.getvalue()

    try:
        response = model.generate_content(
        contents=[
            {
                "role": "user",
                "parts": [
                    {"mime_type": "audio/wav", "data": audio_bytes},
                    """
                    Transcribe this audio exactly as spoken (no extra comments, no filler words) with the following .srt format:

                    1
                    00:00:15,362 --> 00:00:21,789
                    ‡§Ö‡§¨ ‡§π‡§Æ ‡§ú‡§æ‡§®‡•á‡§Ç‡§ó‡•á ‡§ï‡•à‡§Ç‡§°‡§≤‡•ç‡§∏ ‡§Æ‡•á‡§Ç ‡§ï‡•ç‡§Ø‡§æ ‡§ï‡•ç‡§Ø‡§æ ‡§ö‡•Ä‡§ú‡§º‡•ã‡§Ç ‡§ï‡•Ä ‡§ú‡§º‡§∞‡•Ç‡§∞‡§§ ‡§™‡§°‡§º‡§§‡•Ä ‡§π‡•à ‡§î‡§∞ ‡§â‡§®‡§ï‡•ã ‡§π‡§Æ ‡§ï‡§π‡§æ‡§Å ‡§∏‡•á ‡§ñ‡§º‡§∞‡•Ä‡§¶ ‡§∏‡§ï‡§§‡•á ‡§π‡•à‡§Ç

                    2
                    00:00:21,922 --> 00:00:27,422
                    ‡§§‡•ã ‡§∏‡§¨‡§∏‡•á ‡§™‡§π‡§≤‡•á ‡§ï‡•à‡§Ç‡§°‡§≤ ‡§¨‡§®‡§æ‡§®‡•á ‡§ï‡•á ‡§≤‡§ø‡§è ‡§π‡§Æ‡•á‡§Ç ‡§°‡§¨‡§≤ ‡§¨‡•â‡§Ø‡§≤‡§∞ ‡§ï‡•Ä ‡§ú‡§º‡§∞‡•Ç‡§∞‡§§ ‡§™‡§°‡§º‡§§‡•Ä ‡§π‡•à ‡§Ø‡•á

                    3
                    00:00:27,617 --> 00:00:29,853
                    ‡§á‡§∏ ‡§§‡§∞‡§π ‡§ï‡§æ ‡§Ø‡•á ‡§á‡§Ç‡§°‡§ï‡•ç‡§∂‡§® ‡§π‡•à

                    and so on...

                    The transcription should strictly follow the format above, where:
                    - **Timestamps** are in the format of HH:MM:SS,SSS --> HH:MM:SS,SSS (with millisecond precision).
                    - Each entry should have a **sequential index** starting from 1 (e.g., 1, 2, 3, ...).
                    - The spoken text should be captured **exactly as it is spoken**, without adding or removing words(but remove filler words).
                    - In a Single
                    - If there is **silence** or a pause, mark the duration with a timestamp like this:
                      ```
                      4
                      00:00:29,854 --> 00:00:34,500
                      [Silence]
                      ```
                    - Include **Speaker labels** (e.g., Speaker 1, Speaker 2) where relevant if multiple speakers are detected.

                    Please ensure the output strictly follows this format. Thank you!


                    """
                ]
            }
        ]
    )


        return response.text.strip()
    except Exception as e:
        print("‚ùå Error:", e)
        return ""

# ========================
# MAIN PROCESS
# ========================

for filename in os.listdir(input_dir):
    if filename.lower().endswith(".wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"\nüéß Transcribing full audio: {filename}")

        # Get full transcription
        text = transcribe_audio_file(file_path)

        # Save TXT file
        txt_output = os.path.join(output_dir, filename.replace(".wav", ".txt"))
        with open(txt_output, "w", encoding="utf-8") as f:
            f.write(text)

        print(f"‚úÖ Done: {filename}")
        print(f"üìÑ TXT saved to: {txt_output}")


Mounted at /content/drive

üéß Transcribing full audio: Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.wav
‚úÖ Done: Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.wav
üìÑ TXT saved to: /content/drive/MyDrive/Test_28_Adnew_wav/Test_28_Gemini25pro_asr_new/Eng1/Srtformatissue/Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.txt

üéß Transcribing full audio: Chapter 9C - Battery management system.wav


KeyboardInterrupt: 

# json test

In [None]:
# ========================
# INSTALL & IMPORTS
# ========================
!pip install -q google-generativeai pydub tqdm librosa

import os
import io
from google.colab import drive, userdata
import google.generativeai as genai
from pydub import AudioSegment
from tqdm import tqdm

# ========================
# SETUP
# ========================

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)

# Securely load your Gemini API key from Colab secrets
api_key = userdata.get("GOOGLE_API_KEY")
if not api_key:
    raise ValueError("‚ùå No GOOGLE_API_KEY found in Colab secrets! Add it under 'More ‚Üí Secrets'.")

genai.configure(api_key=api_key)

# Choose your model
model = genai.GenerativeModel("models/gemini-2.5-pro")

# Input/output folders in Google Drive
base_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/"
input_dir = "/content/drive/MyDrive/Test_28_Adnew_wav/English"
output_dir = os.path.join(base_dir,"Test_28_Gemini25pro_asr_new","Eng2","Srtformatissue")
os.makedirs(output_dir, exist_ok=True)

# ========================
# HELPER FUNCTIONS
# ========================

def transcribe_audio_file(file_path):
    """Transcribe full audio file without splitting."""
    audio = AudioSegment.from_wav(file_path)
    buffer = io.BytesIO()
    audio.export(buffer, format="wav")
    audio_bytes = buffer.getvalue()

    try:
        response = model.generate_content(
        contents=[
            {
                "role": "user",
                "parts": [
                    {"mime_type": "audio/wav", "data": audio_bytes},
                    """
                     Role: You are a high-precision ASR (Automatic Speech Recognition) system specialized in time-aligned transcription.

                    INPUT: Audio Chunk ({i+1}/{len(chunk_paths)})
                    TASK: Transcribe the spoken audio into a structured segment list.

                    STRICT EXECUTION RULES:
                    1.  **Time Alignment:** Your primary priority is timestamp accuracy. Do not guess. Align the start and end times strictly to the audio waveform.
                    2.  **Granularity:** Segments must be sentence-based. Do not break mid-sentence unless there is a significant pause.
                    3.  **Monotonicity:** Timestamps must be strictly increasing. Overlapping timestamps are forbidden.
                    4.  **Multilingual Handling:** Transcribe {input_lang} natively. If technical terms appear in English/Latin script, preserve them in English (e.g., "API", "Pipeline", "Latency"). DO NOT translate technical terms.
                    5.  **Clean-up:** Omit all filler words (uh, um, mm,uhm). Omit conversational stuttering.

                    OUTPUT FORMAT:
                    Provide the output as a valid JSON list of objects. This format is required to ensure timestamp precision.
                    [
                      {{
                        "start": "HH:MM:SS,mmm",
                        "end": "HH:MM:SS,mmm",
                        "text": "The actual spoken text goes here."
                      }},
                      {{
                        "start": "HH:MM:SS,mmm",
                        "end": "HH:MM:SS,mmm",
                        "type": "silence"
                      }}
                    ]

                    **Constraint on Silence:** Only include a "type": "silence" object if the pause is strictly greater than 1.0 seconds.

                    BEGIN TRANSCRIPTION:
                    """
                ]
            }
        ]
    )


        return response.text.strip()
    except Exception as e:
        print("‚ùå Error:", e)
        return ""

# ========================
# MAIN PROCESS
# ========================

for filename in os.listdir(input_dir):
    if filename.lower().endswith(".wav"):
        file_path = os.path.join(input_dir, filename)
        print(f"\nüéß Transcribing full audio: {filename}")

        # Get full transcription
        text = transcribe_audio_file(file_path)

        # Save TXT file
        txt_output = os.path.join(output_dir, filename.replace(".wav", ".txt"))
        with open(txt_output, "w", encoding="utf-8") as f:
            f.write(text)

        print(f"‚úÖ Done: {filename}")
        print(f"üìÑ TXT saved to: {txt_output}")


Mounted at /content/drive

üéß Transcribing full audio: Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.wav
‚ùå Error: ('Connection aborted.', TimeoutError('timed out'))
‚úÖ Done: Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.wav
üìÑ TXT saved to: /content/drive/MyDrive/Test_28_Adnew_wav/Test_28_Gemini25pro_asr_new/Eng2/Srtformatissue/Chapter 1A - Concept of Basic Electricity Voltage, Currents, Resistance, Impedance & Power Factor.txt

üéß Transcribing full audio: Chapter 9C - Battery management system.wav


KeyboardInterrupt: 