<a href="https://colab.research.google.com/github/mostafa-ja/Video_Dubber/blob/main/dubbing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ffmpeg pysrt yt-dlp


# Combine original and dubbed audio

## # upload dubbed audio

In [18]:
import shutil

# dubbed audio
source_path = "/content/drive/MyDrive/dub/combined_audio.wav"
destination_path = "/content/combined_audio.wav"
shutil.copy(source_path, destination_path)

# translated srt
source_path = "/content/drive/MyDrive/dub/translated.srt"
destination_path = "/content/translated.srt"
shutil.copy(source_path, destination_path)


print("âœ… Files copied from Google Drive.")

âœ… Files copied from Google Drive.


## extract original audio

In [3]:
import yt_dlp
import os

youtube_url = "https://www.youtube.com/watch?v=cG4aYsvgchA"
output_path="audio"

ydl_opts = {
    'format': 'bestaudio/best',
    'outtmpl': output_path,
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'wav',
    }],
    'quiet': False
}

with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([youtube_url])

[youtube] Extracting URL: https://www.youtube.com/watch?v=cG4aYsvgchA
[youtube] cG4aYsvgchA: Downloading webpage
[youtube] cG4aYsvgchA: Downloading tv client config
[youtube] cG4aYsvgchA: Downloading player 612f74a3-main
[youtube] cG4aYsvgchA: Downloading tv player API JSON
[youtube] cG4aYsvgchA: Downloading ios player API JSON
[youtube] cG4aYsvgchA: Downloading m3u8 information
[info] cG4aYsvgchA: Downloading 1 format(s): 251
[download] Destination: audio
[download] 100% of   12.24MiB in 00:00:00 at 18.73MiB/s  
[ExtractAudio] Destination: audio.wav
Deleting original file audio (pass -k to keep)


## mix audios

## mixing based on silence times

In [None]:
from pydub import AudioSegment, silence
import os

# === Load audio files ===
speech = AudioSegment.from_wav("combined_audio.wav")
bg = AudioSegment.from_file("audio.wav")  # Works for .mp3, .wav, etc.

# === Ensure background is long enough ===
if len(bg) < len(speech):
    bg *= ((len(speech) // len(bg)) + 1)
#bg = bg[:len(speech)]

# === Detect non-silent parts of speech ===
nonsilent_ranges = silence.detect_nonsilent(
    speech,
    min_silence_len=200,
    silence_thresh=speech.dBFS - 16
)

# === Volume settings ===
speech_gain = 5
bg_quiet_gain = -20  # Lower volume during speech
fade_duration = 1000  # in ms, for fade in/out

# === Start with background as base ===
output = bg

# === Process each speech segment ===
for start, end in nonsilent_ranges:
    # Apply fades to background and mix with speech
    speech_chunk = speech[start:end] + speech_gain
    bg_chunk = bg[start:end] + bg_quiet_gain

    # Add fade-in and fade-out for smoother transitions
    #speech_chunk = speech_chunk.fade_in(fade_duration).fade_out(fade_duration)
    bg_chunk = bg_chunk.fade_in(fade_duration).fade_out(fade_duration)

    mixed = bg_chunk.overlay(speech_chunk)

    # Replace that portion in output
    output = output[:start] + mixed + output[end:]

# === Export as WAV ===
output.export("final_mixed_audio.wav", format="wav")
print("âœ… Saved as 'final_mixed_audio.wav'")

# === Export as MP3 (compressed) ===
output.export("final_mixed_audio.mp3", format="mp3", bitrate="192k")
print("ðŸŽ§ Saved as 'final_mixed_audio.mp3'")


âœ… Saved as 'final_mixed_audio.wav'
ðŸŽ§ Saved as 'final_mixed_audio.mp3'


## mixing based on subtitle times

In [43]:
from pydub.silence import detect_silence
from pydub import AudioSegment, silence
import pysrt
import os

# === Load audio files ===
speech = AudioSegment.from_wav("combined_audio.wav")
bg = AudioSegment.from_file("audio.wav")  # Works for .mp3, .wav, etc.

# === Load subtitles ===
subs = pysrt.open('translated.srt', encoding='utf-8')

# === Ensure background is long enough ===
if len(bg) < len(speech):
    bg *= ((len(speech) // len(bg)) + 1)
bg = bg[:len(speech)]

# === Volume settings ===
speech_gain = 0
bg_quiet_gain = -20  # Lower volume during speech
fade_duration = 1000  # in ms, for fade in/out

# === Start with background as base ===
output = bg

# === Process each speech segment ===
for i, sub in enumerate(subs):

    # Time window in ms
    start = (sub.start.hours * 3600 + sub.start.minutes * 60 + sub.start.seconds) * 1000 + sub.start.milliseconds
    end = (sub.end.hours * 3600 + sub.end.minutes * 60 + sub.end.seconds) * 1000 + sub.end.milliseconds
    time_window = end - start

    # Extract chunks
    speech_chunk = speech[start:end] + speech_gain
    bg_chunk = bg[start:end] + bg_quiet_gain
    original_bg_chunk = bg[start:end]  # Unaltered volume

    # === Detect silence at end ===
    silence_ranges = detect_silence(speech_chunk, min_silence_len=600, silence_thresh=speech_chunk.dBFS - 16)

    trailing_silence_start = None
    if silence_ranges:
        # Check if the last silence overlaps the end
        last_silence = silence_ranges[-1]
        if last_silence[1] >= len(speech_chunk) - 200:  # We're checking whether the last silence ends very close to the end of the chunk â€” specifically, within the last 200 milliseconds.
            trailing_silence_start = last_silence[0]


    if trailing_silence_start:
        # Split into active and trailing silence
        active_speech = speech_chunk[:trailing_silence_start]
        trailing_silence = speech_chunk[trailing_silence_start:]

        # Background for each
        active_bg = bg_chunk[:trailing_silence_start]
        trailing_bg = original_bg_chunk[trailing_silence_start:]

        # Apply fades
        active_bg = active_bg.fade_in(fade_duration).fade_out(fade_duration)
        trailing_bg = trailing_bg.fade_in(fade_duration)

        # Overlay
        mixed = active_bg.overlay(active_speech) + trailing_bg.overlay(trailing_silence)
    else:
        # No trailing silence â€” treat whole as active speech
        bg_chunk = bg_chunk.fade_in(fade_duration).fade_out(fade_duration)
        mixed = bg_chunk.overlay(speech_chunk)

    # Replace that portion in output
    output = output[:start] + mixed + output[end:]


# === Export as WAV ===
output.export("final_mixed_audio.wav", format="wav")
print("âœ… Saved as 'final_mixed_audio.wav'")

# === Export as MP3 (compressed) ===
output.export("final_dubbed_audio.mp3", format="mp3", bitrate="192k")
print("ðŸŽ§ Saved as 'final_dubbed_audio.mp3'")


âœ… Saved as 'final_mixed_audio.wav'
ðŸŽ§ Saved as 'final_dubbed_audio.mp3'


# Download video

In [63]:
# checking qualities
!yt-dlp -F "{youtube_url}"

[youtube] Extracting URL: https://www.youtube.com/watch?v=cG4aYsvgchA
[youtube] cG4aYsvgchA: Downloading webpage
[youtube] cG4aYsvgchA: Downloading tv client config
[youtube] cG4aYsvgchA: Downloading tv player API JSON
[youtube] cG4aYsvgchA: Downloading ios player API JSON
[youtube] cG4aYsvgchA: Downloading m3u8 information
[info] Available formats for cG4aYsvgchA:
[0;33mID[0m      [0;33mEXT[0m   [0;33mRESOLUTION[0m [0;33mFPS[0m [0;33mCH[0m [0;33m[0;34mâ”‚[0;33m[0m [0;33m  FILESIZE[0m [0;33m  TBR[0m [0;33mPROTO[0m [0;33m[0;34mâ”‚[0;33m[0m [0;33mVCODEC[0m        [0;33m  VBR[0m [0;33mACODEC[0m     [0;33m ABR[0m [0;33mASR[0m [0;33mMORE INFO[0m
[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34mâ”€[0m[0;34m

## testing

In [51]:
# downloading low quality for test

ydl_opts = {
    'format': '394+140',  # 1080p video + good m4a audio
    'outtmpl': 'test_video.mp4',
    'merge_output_format': 'mp4'
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([youtube_url])


[youtube] Extracting URL: https://www.youtube.com/watch?v=cG4aYsvgchA
[youtube] cG4aYsvgchA: Downloading webpage
[youtube] cG4aYsvgchA: Downloading tv client config
[youtube] cG4aYsvgchA: Downloading tv player API JSON
[youtube] cG4aYsvgchA: Downloading ios player API JSON
[youtube] cG4aYsvgchA: Downloading m3u8 information
[info] cG4aYsvgchA: Downloading 1 format(s): 394+140
[download] Destination: test_video.f394.mp4
[download] 100% of    6.61MiB in 00:00:01 at 5.06MiB/s   
[download] Destination: test_video.f140.m4a
[download] 100% of   12.11MiB in 00:00:00 at 19.13MiB/s  
[Merger] Merging formats into "test_video.mp4"
Deleting original file test_video.f140.m4a (pass -k to keep)
Deleting original file test_video.f394.mp4 (pass -k to keep)


In [52]:
import subprocess

# Replace audio using ffmpeg
subprocess.call([
    'ffmpeg', '-y',
    '-i', 'test_video.mp4',
    '-i', 'final_mixed_audio.wav',
    '-c:v', 'copy',           # Copy video without re-encoding
    '-map', '0:v:0',          # Use video from first input
    '-map', '1:a:0',          # Use audio from second input
    '-shortest',              # Trim to shorter of video or audio
    'test_video_with_new_audio.mp4'
])

0

## mixing high quality video and dubbed audio


for the highest quality

```
from yt_dlp import YoutubeDL

youtube_url = "https://www.youtube.com/watch?v=ISToBIkSNbM"
video_filename = "original_video.mp4"

ydl_opts = {
    'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4',
    'outtmpl': video_filename,
    'merge_output_format': 'mp4'
}

with YoutubeDL(ydl_opts) as ydl:
    ydl.download([youtube_url])
```



In [57]:
ydl_opts = {
    'format': '299+140',  # 1080p video + good m4a audio
    'outtmpl': 'video_1080p.mp4',
    'merge_output_format': 'mp4'
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([youtube_url])


[youtube] Extracting URL: https://www.youtube.com/watch?v=cG4aYsvgchA
[youtube] cG4aYsvgchA: Downloading webpage
[youtube] cG4aYsvgchA: Downloading tv client config
[youtube] cG4aYsvgchA: Downloading tv player API JSON
[youtube] cG4aYsvgchA: Downloading ios player API JSON
[youtube] cG4aYsvgchA: Downloading m3u8 information
[info] cG4aYsvgchA: Downloading 1 format(s): 299+140
[download] Destination: video_1080p.f299.mp4
[download] 100% of  421.48MiB in 00:00:08 at 49.36MiB/s  
[download] Destination: video_1080p.f140.m4a
[download] 100% of   12.11MiB in 00:00:00 at 13.35MiB/s  
[Merger] Merging formats into "video_1080p.mp4"
Deleting original file video_1080p.f299.mp4 (pass -k to keep)
Deleting original file video_1080p.f140.m4a (pass -k to keep)


In [58]:
import subprocess

# Replace audio using ffmpeg
subprocess.call([
    'ffmpeg', '-y',
    '-i', 'video_1080p.mp4',
    '-i', 'final_mixed_audio.wav',
    '-c:v', 'copy',           # Copy video without re-encoding
    '-map', '0:v:0',          # Use video from first input
    '-map', '1:a:0',          # Use audio from second input
    '-shortest',              # Trim to shorter of video or audio
    'dubbed_video.mp4'
])


0

## upload result to google drive

In [59]:
import shutil

source_path = "/content/dubbed_video.mp4"
destination_path = "/content/drive/MyDrive/dub/dubbed_video.mp4"
shutil.copy(source_path, destination_path)



print("âœ… File copied from Google Drive.")

âœ… File copied from Google Drive.
