# Install dependencies

In [3]:
!pip install moviepy ffmpeg-python git+https://github.com/openai/whisper.git gradio -q

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


# Import dependencies

In [4]:
import os
import subprocess
import moviepy.editor as mp
import ffmpeg
import sys
import gradio as gr
import whisper
from whisper.utils import get_writer
import gc
import torch


## Define a variable with the video path to be used in the notebook

In [5]:
input_video_path = "/content/AI_ What is the future of artificial intelligence_ - BBC News.mp4"

# The code bellow uses the python libraries moviepy and ffmpeg-python to extract and compress the audio from a video

In [6]:
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_audio
import ffmpeg


def extract_audio(input_video, output_audio):
    """
    Extract audio from a video file.

    Args:
        input_video (str): Path to the input video file.
        output_audio (str): Path to the output audio file.
    """
    ffmpeg_extract_audio(input_video, output_audio)

def compress_audio(input_audio, output_audio, audio_codec="wav", audio_bitrate="128k"):
    """
    Compress audio using FFmpeg.

    Args:
        input_audio (str): Path to the input audio file.
        output_audio (str): Path to the output audio file.
        audio_codec (str): Audio codec (e.g., "aac", "mp3", "vorbis").
        audio_bitrate (str): Audio bitrate (e.g., "128k").
    """
    ffmpeg.input(input_audio).output(output_audio, acodec=audio_codec, audio_bitrate=audio_bitrate).run()

if __name__ == "__main__":


    # Audio Extration
    input_video_file = input_video_path  # Replace with your input video file
    output_audio_file = "output_audio.wav"        # Replace with your desired output audio file

    extract_audio(input_video_file, output_audio_file)


    # Audio Compression
    input_audio_file = "output_audio.wav"  # Replace with your input audio file
    output_audio_file = "compressed_audio.mp3"  # Replace with your desired output audio file
    audio_codec = "mp3"  # You can change this to another codec like "aac" or "vorbis"
    audio_bitrate = "64k"  # Adjust the bitrate as needed for desired quality and size

    # Remove file if it already exists
    if os.path.exists(output_audio_file):
      # If it exists, delete the file
      os.remove(output_audio_file)

    compress_audio(input_audio_file, output_audio_file, audio_codec, audio_bitrate)

Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful


# The code bellow uses the linux package ffmpeg to extrat the mp3 audio from a video

In [7]:
def video2mp3(video_file, output_ext="mp3"):
    filename, ext = os.path.splitext(video_file)
    subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT)
    return f"{filename}.{output_ext}"

video2mp3(input_video_path)

'/content/AI_ What is the future of artificial intelligence_ - BBC News.mp3'

# The code bellow segmentates an audio with a given maximum duration for each segment

In [8]:
def split_and_compress_audio(input_audio, output_directory, max_duration=59, audio_codec="aac", audio_bitrate="128k"):
    """
    Split an audio file into segments of no longer than a specified duration and compress each segment.

    Args:
        input_audio (str): Path to the input audio file.
        output_directory (str): Directory to save the split and compressed audio segments.
        max_duration (int): Maximum duration (in seconds) for each segment.
        audio_codec (str): Audio codec (e.g., "aac", "mp3", "vorbis").
        audio_bitrate (str): Audio bitrate (e.g., "128k").
    """
    audio_clip = mp.AudioFileClip(input_audio)

    # Calculate the number of segments needed
    num_segments = int(audio_clip.duration / max_duration) + 1

    for i in range(num_segments):
        start_time = i * max_duration
        end_time = min(start_time + max_duration, audio_clip.duration)  # Ensure segments are no longer than max_duration

        # Extract the segment
        segment = audio_clip.subclip(start_time, end_time)

        # Save the segment as a temporary uncompressed file
        temp_audio_filename = f"{output_directory}/temp_segment_{i+1}.wav"
        segment.write_audiofile(temp_audio_filename)

        # Compress the temporary audio file and save it with the specified codec and bitrate
        compressed_audio_filename = f"{output_directory}/segment_{i+1}.{audio_codec}"
        ffmpeg.input(temp_audio_filename).output(compressed_audio_filename, acodec=audio_codec, audio_bitrate=audio_bitrate).run()

        # Remove the temporary uncompressed file
        os.remove(temp_audio_filename)

if __name__ == "__main__":
    # Define your variables
    input_audio_file = "compressed_audio.mp3"  # Replace with your input audio file
    output_directory = "output_segments"  # Directory to save the split and compressed audio segments
    max_duration = 59  # Maximum duration for each segment in seconds
    audio_codec = "mp3"  # You can change this to another codec like "aac" or "vorbis"
    audio_bitrate = "64k"  # Adjust the bitrate as needed for desired quality and size

    # Check if the output directory exists, and create it if it doesn't
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    split_and_compress_audio(input_audio_file, output_directory, max_duration, audio_codec, audio_bitrate)


MoviePy - Writing audio in output_segments/temp_segment_1.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_2.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_3.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_4.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_5.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_6.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_7.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_8.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_9.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_10.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_11.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_12.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_13.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_14.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_15.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_16.wav




MoviePy - Done.
MoviePy - Writing audio in output_segments/temp_segment_17.wav




MoviePy - Done.


# This code generates subtitles to a video and saves the subtitle file

In [9]:
def video2mp3(video_file, output_ext="mp3"):
    filename, ext = os.path.splitext(video_file)
    subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT)
    return f"{filename}.{output_ext}"

def generate_and_translate_subtitles(input_video, output_dir, sub_format='srt'):

    audio_file = video2mp3(input_video)

    options = dict(beam_size=5, best_of=5)
    translate_options = dict(task="translate", **options)
    result = model.transcribe(audio_file,**translate_options)

    audio_path = audio_file.split(".")[0]

    sub_writer = get_writer(sub_format, output_dir)
    sub_file_path = audio_path + "." + sub_format

    #with open(os.path.join(output_dir, audio_path + ".vtt"), "w") as sub:
    sub_writer(result, sub_file_path, {"max_line_width": 47, "max_line_count": 1, "highlight_words": False})

    subtitle_path = audio_path + "." + sub_format
    # output_video = audio_path + "_subtitled.mp4"

    return subtitle_path


model = whisper.load_model("medium")

video_file = input_video_path

output_dir = '/content/'

subtitle_file_path = generate_and_translate_subtitles(video_file, output_dir)

100%|██████████████████████████████████████| 1.42G/1.42G [00:15<00:00, 102MiB/s]


## These 2 lines free some GPU and RAM memory

In [10]:
torch.cuda.empty_cache()
gc.collect()

0

# This code joins the subtitle to the video

In [11]:
import os
import ffmpeg  # Make sure you have the ffmpeg-python library installed

def add_subtitles(video_file, subtitle_file, output_file):
    # Create input streams for video and subtitle
    input_video = ffmpeg.input(video_file)
    input_subtitle = ffmpeg.input(subtitle_file)

    # Use filter() to add subtitles
    output = ffmpeg.output(
        input_video,      # Input video stream
        input_subtitle,   # Input subtitle stream
        output_file,
        #vcodec='copy',    # Video codec, keep it as is
        acodec='copy',    # Audio codec, keep it as is
        scodec='mov_text', # Subtitle codec
        f='mp4',           # Output file format
        vf=f'subtitles={subtitle_file}',  # Add subtitle filter
        strict='experimental',  # Use experimental subtitle codec
    )

    # Run the FFmpeg command to create the output file
    ffmpeg.run(output)

video_file = input_video_path
subtitle_file = subtitle_file_path

# Subbed video path
output_file_name, ext = os.path.splitext(video_file) # '/content/AI_ What is the future of artificial intelligence_ - BBC News - Legendado.mp4'
output_file = output_file_name + '_subbed' + ext

add_subtitles(video_file, subtitle_file, output_file)

## This code shows the video with a lower resolution in colab notebooks

In [12]:
from IPython.display import HTML
from base64 import b64encode
from moviepy.editor import VideoFileClip

# Check if the file exists
if not os.path.exists(output_file):
    print(f"Video file not found at {output_file}")
else:
    # Compressed video path
    output_file_name, ext = os.path.splitext(output_file)
    compressed_path = output_file_name + '_compressed' + ext

    # Load the video using moviepy
    video_clip = VideoFileClip(output_file)

    # Specify the start and end times for the fraction of the video you want to display
    start_time = 10  # Start time in seconds
    end_time = 30    # End time in seconds

    # Create a subclip from the specified time range
    subclip = video_clip.subclip(start_time, end_time)

    # Compress the subclip using moviepy (adjust the bitrate as needed)
    subclip.write_videofile(
        compressed_path,
        codec='libx264',  # Video codec
        audio_codec='mp3',  # Audio codec
        bitrate='100k'  # Adjust bitrate as needed
    )

Moviepy - Building video /content/AI_ What is the future of artificial intelligence_ - BBC News_subbed_compressed.mp4.
MoviePy - Writing audio in AI_ What is the future of artificial intelligence_ - BBC News_subbed_compressedTEMP_MPY_wvf_snd.mp3




MoviePy - Done.
Moviepy - Writing video /content/AI_ What is the future of artificial intelligence_ - BBC News_subbed_compressed.mp4





Moviepy - Done !
Moviepy - video ready /content/AI_ What is the future of artificial intelligence_ - BBC News_subbed_compressed.mp4


In [20]:
# Show the subclip
mp4 = open(compressed_path, 'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [13]:
!pip install argostranslate pysrt -q

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/104.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.4/104.4 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m35.7/35.7 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m77.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.6/227.6 kB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for pysrt (setup.py) ... [?25l[?25hdone


# Simple and free language translator using Argos Translate

In [14]:
import pysrt
import argostranslate
from argostranslate import translate

# Download and install Argos Translate package
from_code = 'en' # English
to_code = 'it' # Italian

argostranslate.package.update_package_index()
available_packages = argostranslate.package.get_available_packages()
package_to_install = next(
    filter(
        lambda x: x.from_code == from_code and x.to_code == to_code, available_packages
    )
)
argostranslate.package.install_from_path(package_to_install.download())



def translate_srt(input_srt_file, output_srt_file, from_code, to_code):
    # Load the input .srt subtitle file
    subtitles = pysrt.open(input_srt_file, encoding='utf-8')

    # Translate each subtitle entry
    for subtitle in subtitles:
      # Translate
        translated_text = argostranslate.translate.translate(subtitle.text, from_code, to_code)
        subtitle.text = translated_text

    # Save the translated subtitles to the output .srt file
    subtitles.save(output_srt_file, encoding='utf-8')

# Example usage:

input_srt_file = '/content/AI_ What is the future of artificial intelligence_ - BBC News.srt'  # Replace with your input .srt file
output_srt_file = 'output_subtitle.srt'  # Replace with your desired output .srt file

translate_srt(input_srt_file, output_srt_file, from_code, to_code)

In [15]:
argostranslate.package.get_available_packages()

[Arabic -> English,
 Azerbaijani -> English,
 Catalan -> English,
 Chinese -> English,
 Czech -> English,
 Danish -> English,
 Dutch -> English,
 English -> Arabic,
 English -> Azerbaijani,
 English -> Catalan,
 English -> Chinese,
 English -> Czech,
 English -> Danish,
 English -> Dutch,
 English -> Esperanto,
 English -> Finnish,
 English -> French,
 English -> German,
 English -> Greek,
 English -> Hebrew,
 English -> Hindi,
 English -> Hungarian,
 English -> Indonesian,
 English -> Irish,
 English -> Italian,
 English -> Japanese,
 English -> Korean,
 English -> Persian,
 English -> Polish,
 English -> Portuguese,
 English -> Russian,
 English -> Slovak,
 English -> Spanish,
 English -> Swedish,
 English -> Thai,
 English -> Turkish,
 English -> Ukranian,
 Esperanto -> English,
 Finnish -> English,
 French -> English,
 German -> English,
 Greek -> English,
 Hebrew -> English,
 Hindi -> English,
 Hungarian -> English,
 Indonesian -> English,
 Irish -> English,
 Italian -> English,
 

In [16]:
type(available_packages[0])

argostranslate.package.AvailablePackage

In [17]:
available_packages[0].from_code

'ar'

## Packages Versions

In [18]:
!pip list

Package                          Version
-------------------------------- ---------------------
absl-py                          1.4.0
aiofiles                         23.2.1
aiohttp                          3.8.5
aiosignal                        1.3.1
alabaster                        0.7.13
albumentations                   1.3.1
altair                           4.2.2
anyio                            3.7.1
appdirs                          1.4.4
argon2-cffi                      23.1.0
argon2-cffi-bindings             21.2.0
argostranslate                   1.8.2
array-record                     0.4.1
arviz                            0.15.1
astropy                          5.3.4
astunparse                       1.6.3
async-timeout                    4.0.3
attrs                            23.1.0
audioread                        3.0.1
autograd                         1.6.2
Babel                            2.13.0
backcall                         0.2.0
beautifulsoup4                   4.11.2

# The code bellow creates a gradio app to generate subtitles to a video

In [19]:
model = whisper.load_model("medium")

def video2mp3(video_file, output_ext="mp3"):
    filename, ext = os.path.splitext(video_file)
    subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT)
    return f"{filename}.{output_ext}"


def generate_translate_and_add_subtitles(input_video_path, output_dir, sub_format='srt'):

    audio_file = video2mp3(input_video_path)

    options = dict(beam_size=5, best_of=5)
    translate_options = dict(task="translate", **options)
    result = model.transcribe(audio_file,**translate_options)

    audio_path = audio_file.split(".")[0]

    sub_writer = get_writer(sub_format, output_dir)
    sub_file_path = audio_path + "." + sub_format

    sub_writer(result, sub_file_path, {"max_line_width": 47, "max_line_count": 1, "highlight_words": False})

    subtitle_path = audio_path + "." + sub_format
    output_video_path = audio_path + "_subtitled.mp4"

    add_subtitles(input_video_path, subtitle_path, output_video_path)

    return output_video_path, audio_file


def add_subtitles(video_file, subtitle_file, output_file):
    # Create input streams for video and subtitle
    input_video = ffmpeg.input(video_file)
    input_subtitle = ffmpeg.input(subtitle_file)

    # Use filter() to add subtitles
    output = ffmpeg.output(
        input_video,      # Input video stream
        input_subtitle,   # Input subtitle stream
        output_file,
        #vcodec='copy',    # Video codec, keep it as is
        acodec='copy',    # Audio codec, keep it as is
        scodec='mov_text', # Subtitle codec
        f='mp4',           # Output file format
        vf=f'subtitles={subtitle_file}',  # Add subtitle filter
        strict='experimental',  # Use experimental subtitle codec
    )

    # Run the FFmpeg command to create the output file
    ffmpeg.run(output)


title = "Add Text/Caption to your YouTube Shorts - MultiLingual"

block = gr.Blocks()

with block:

    with gr.Group():
        with gr.Box():



            with gr.Row().style():

                inp_video = gr.Video(
                    label="Input Video",
                    type="filepath",
                    mirror_webcam = False
                )
                op_video = gr.Video()
            btn = gr.Button("Generate Subtitle Video")






        btn.click(generate_translate_and_add_subtitles, inputs=[inp_video], outputs=[op_video])

        gr.HTML('''
        <div class="footer">
                    <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
                    </p>
        </div>
        ''')

block.launch(debug = True)

  with gr.Row().style():

  inp_video = gr.Video(




Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://2f268048833082ac66.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://2f268048833082ac66.gradio.live


