<a href="https://colab.research.google.com/github/talhasahir9/Bulk-Video-Editor/blob/main/English_Dubing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ==========================================
# CELL 1: GOOGLE DRIVE & FOLDER SETUP
# ==========================================
from google.colab import drive
import os

print("üîå Google Drive connect kar raha hoon...")
drive.mount('/content/drive')

# Master Folders
base_path = '/content/drive/MyDrive/Auto_Dubbing_Factory'
input_folder = os.path.join(base_path, 'Input_Videos')
output_folder = os.path.join(base_path, 'Output_Dubbed')
ref_folder = os.path.join(base_path, 'Reference_Voice')
temp_folder = '/content/temp_workspace'

os.makedirs(input_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)
os.makedirs(ref_folder, exist_ok=True)
os.makedirs(temp_folder, exist_ok=True)

print(f"‚úÖ Setup Done!")
print(f"üëâ STEP 1: Apni videos '{input_folder}' mein daalo.")
print(f"üëâ STEP 2: Apni Custom Voice Sample (5-10 sec ki .wav file) '{ref_folder}' mein daalo jiska naam 'ref_voice.wav' ho.")

In [None]:
# ==========================================
# CELL 2: THE BULLETPROOF INSTALLATION
# ==========================================
!apt-get update -qq
!apt-get install -y ffmpeg -qq

import os
# Folder clone karega directly taake import error na aaye
if not os.path.exists('/content/F5-TTS'):
    !git clone https://github.com/SWivid/F5-TTS.git

# Folder ke andar ja kar zaroori cheezein install karni hain
%cd /content/F5-TTS
!pip install -q -e .
!pip install -q moviepy==1.0.3 pydub faster-whisper librosa soundfile
%cd /content

print("‚úÖ Installation 100% Complete! Folder clone ho gaya hai.")

In [None]:
# ==========================================
# CELL 3: ENGINE WARM-UP & VOICE PREP
# ==========================================
import os
import sys
import gc
import torch
import librosa
import soundfile as sf
from pydub import AudioSegment
import moviepy.editor as mp
from faster_whisper import WhisperModel

# üéØ THE MASTER IMPORT HACK
if '/content/F5-TTS' not in sys.path:
    sys.path.append('/content/F5-TTS')
from f5_tts.api import F5TTS

print("üß† Loading Whisper AI (Brain)...")
whisper_model = WhisperModel("base", device="cuda", compute_type="float16")

print("üî• Loading F5-TTS (Voice Cloning)...")
f5tts = F5TTS()

# Custom Reference Voice Setup
ref_audio_path = os.path.join(ref_folder, 'ref_voice.wav')

if not os.path.exists(ref_audio_path):
    print(f"‚ö†Ô∏è ERROR: '{ref_folder}' mein 'ref_voice.wav' nahi mili!")
else:
    print("üéôÔ∏è Custom Reference Voice mil gayi. Whisper iska text nikal raha hai...")
    ref_segments, _ = whisper_model.transcribe(ref_audio_path, beam_size=5, language="en")
    custom_ref_text = " ".join([s.text for s in ref_segments]).strip()
    print(f"üéØ Reference Text Auto-Generated: '{custom_ref_text}'")
    print("‚úÖ ENGINES ARE 100% READY FOR BULK DUBBING!")

In [None]:
# ==========================================
# CELL 4: THE MASTER DUBBING FACTORY
# ==========================================
import time

videos = [f for f in os.listdir(input_folder) if f.endswith(('.mp4', '.mkv', '.mov'))]

for video_name in videos:
    final_vid_path = os.path.join(output_folder, f"DUBBED_{video_name}")
    final_aud_path = os.path.join(output_folder, f"AUDIO_ONLY_{video_name}.wav")

    # Auto-Skip Logic
    if os.path.exists(final_vid_path):
        print(f"‚è≠Ô∏è Skipping '{video_name}', pehle se dubbed hai!")
        continue

    print(f"\n========================================")
    print(f"üé¨ PROCESSING: {video_name}")

    video_path = os.path.join(input_folder, video_name)
    temp_audio_path = os.path.join(temp_folder, f"original_audio.wav")

    # 1. Extract Audio
    video_clip = mp.VideoFileClip(video_path)
    total_video_duration = video_clip.duration
    video_clip.audio.write_audiofile(temp_audio_path, logger=None)

    # 2. Whisper Translation to English
    print("ü™Ñ Translating and generating timestamps...")
    segments_raw, info = whisper_model.transcribe(temp_audio_path, beam_size=5, task="translate")
    segments = [{"start": s.start, "end": s.end, "text": s.text.strip()} for s in segments_raw if s.text.strip()]

    if not segments:
        print("‚ö†Ô∏è Koi aawaz nahi mili. Skipping.")
        video_clip.close()
        continue

    master_audio = AudioSegment.from_file(temp_audio_path)
    bg_audio = master_audio - 15 # Background score -15dB

    # 3. SPILL-OVER TIMING LOGIC & F5-TTS
    for i, seg in enumerate(segments):
        start_time = seg["start"]
        text = seg["text"]

        # Check empty space till next line
        if i < len(segments) - 1:
            next_start_time = segments[i+1]["start"]
            max_allowed_duration = (next_start_time - start_time) - 0.2
        else:
            max_allowed_duration = total_video_duration - start_time

        max_allowed_duration = max(0.5, max_allowed_duration)

        print(f"  üîä Dubbing: {text[:40]}... (Available Space: {round(max_allowed_duration, 1)}s)")

        temp_tts_path = os.path.join(temp_folder, f"tts_{i}.wav")

        # Generate F5-TTS
        wav, sr_f5, _ = f5tts.infer(
            ref_file=ref_audio_path,
            ref_text=custom_ref_text,
            gen_text=text
        )
        sf.write(temp_tts_path, wav, sr_f5)

        # Time-Stretch Logic (Spill-over)
        y, sr_librosa = librosa.load(temp_tts_path, sr=None)
        actual_duration = librosa.get_duration(y=y, sr=sr_librosa)

        if actual_duration > max_allowed_duration:
            stretch_rate = actual_duration / max_allowed_duration
            stretch_rate = min(stretch_rate, 1.35) # Speed limit
            y_stretched = librosa.effects.time_stretch(y, rate=stretch_rate)
            stretched_tts_path = os.path.join(temp_folder, f"tts_sync_{i}.wav")
            sf.write(stretched_tts_path, y_stretched, sr_librosa)
            final_segment_path = stretched_tts_path
        else:
            final_segment_path = temp_tts_path

        # Overlay on Master Track
        synced_audio = AudioSegment.from_file(final_segment_path)
        insert_position_ms = int(start_time * 1000)
        bg_audio = bg_audio.overlay(synced_audio, position=insert_position_ms)

        try: os.remove(temp_tts_path)
        except: pass
        try: os.remove(stretched_tts_path)
        except: pass

    # 4. EXPORT
    print("üíæ Saving Final Audio & Video to Google Drive...")
    bg_audio.export(final_aud_path, format="wav")
    print(f"üéµ Audio Saved: {final_aud_path}")

    new_audio_clip = mp.AudioFileClip(final_aud_path)
    final_video = video_clip.set_audio(new_audio_clip)

    final_video.write_videofile(
        final_vid_path,
        codec="libx264",
        audio_codec="aac",
        preset="fast",
        logger=None
    )
    print(f"üé• Video Saved: {final_vid_path}")

    video_clip.close()
    final_video.close()
    new_audio_clip.close()

    torch.cuda.empty_cache()
    gc.collect()

    print(f"‚úÖ DONE: {video_name}")
    print(f"========================================\n")

print("üéâ BULK FACTORY JOB COMPLETE!")