In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install pytube transformers moviepy TTS youtube_transcript_api pydub SentencePiece pysubs2

Collecting pysubs2
  Downloading pysubs2-1.6.1-py3-none-any.whl (35 kB)
Installing collected packages: pysubs2
Successfully installed pysubs2-1.6.1


In [4]:
import os
import re
import nltk
from pytube import YouTube
from transformers import MarianMTModel, MarianTokenizer
from moviepy.editor import VideoFileClip, concatenate_audioclips, AudioFileClip
from whisper import load_model
from TTS.api import TTS
from pydub import AudioSegment, silence
import pysubs2
import subprocess

nltk.download('punkt')
model_name = 'Helsinki-NLP/opus-mt-en-fr'
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
tts = TTS(model_name="tts_models/fr/css10/vits")
whisper_model = load_model("base")


save_path =  "/content/drive/MyDrive/Colab Notebooks/CS370_project/videos1"
os.makedirs(save_path, exist_ok=True)
audio_folder = os.path.join(save_path, "audio")
os.makedirs(audio_folder, exist_ok=True)
tts_audio_folder = os.path.join(save_path, "tts_audio")
os.makedirs(tts_audio_folder, exist_ok=True)

def cleaned_video(video_name):
    return re.sub(r'[\\/*?:"<>|]', "", video_name)

def translate(text):
    sentences = nltk.tokenize.sent_tokenize(text)
    translations = []
    for sentence in sentences:
        batch = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=512)
        gen = model.generate(**batch)
        translation = tokenizer.batch_decode(gen, skip_special_tokens=True)
        translations.append(translation[0])
    return ' '.join(translations)

def generate_tts_audio(text, start, end, tts_audio_path):
    tts.tts_to_file(text=text, file_path=tts_audio_path)
    tts_audio = AudioSegment.from_mp3(tts_audio_path)
    expected_duration = (end - start) * 1000
    actual_duration = len(tts_audio)
    if actual_duration < expected_duration:
        silence_duration = expected_duration - actual_duration
        silence_audio = AudioSegment.silent(duration=silence_duration)
        tts_audio += silence_audio
        tts_audio.export(tts_audio_path, format='wav')
    return True

def create_subtitles(segments, subtitles_file):
    subs = pysubs2.SSAFile()
    for start, end, text in segments:
        start_ms = int(start * 1000)
        end_ms = int(end * 1000)
        subs.append(pysubs2.SSAEvent(start=start_ms, end=end_ms, text=text))
    subs.save(subtitles_file)

def embed_subtitles(video_path, subtitles_path, output_path):
    command = [
        'ffmpeg',
        '-i', video_path,
        '-vf', f"subtitles={subtitles_path}",
        '-c:a', 'copy',
        output_path
    ]
    subprocess.run(command, check=True)

def process_video(url):
    yt = YouTube(url)
    video_id = yt.video_id
    yt_title_cleaned = cleaned_video(yt.title)
    video_stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()

    if not video_stream:
        print("No suitable video stream found.")
        return None

    video_path = os.path.join(save_path, yt_title_cleaned + ".mp4")
    video_stream.download(output_path=save_path, filename=yt_title_cleaned + ".mp4")

    video_clip = VideoFileClip(video_path)
    audio_path = os.path.join(audio_folder, yt_title_cleaned + ".mp3")
    video_clip.audio.write_audiofile(audio_path)

    result = whisper_model.transcribe(audio_path)
    segments = []
    for segment in result["segments"]:
        start_time, end_time, text = segment["start"], segment["end"], segment["text"]
        segments.append((start_time, end_time, text))

    translated_segments = []
    tts_clips = []
    for start, end, text in segments:
        translated_text = translate(text)
        translated_segments.append((start, end, translated_text))

        tts_audio_path = os.path.join(tts_audio_folder, f"tts_{start}_{end}.wav")
        generate_tts_audio(translated_text, start, end, tts_audio_path)

        tts_clip = AudioFileClip(tts_audio_path).subclip(0, end - start)
        tts_clips.append(tts_clip)

    combined_tts_audio = concatenate_audioclips(tts_clips)
    final_video = video_clip.set_audio(combined_tts_audio)
    final_video_path = os.path.join(save_path, yt_title_cleaned + "_translated.mp4")
    final_video.write_videofile(final_video_path)

    subtitles_file = os.path.join(save_path, yt_title_cleaned + ".srt")
    create_subtitles(translated_segments, subtitles_file)

    embedded_video_path = os.path.join(save_path, yt_title_cleaned + "_translated_subtitled.mp4")
    embed_subtitles(final_video_path, subtitles_file, embedded_video_path)

    return embedded_video_path

url = 'https://youtu.be/CSoXyDcUxEk?si=kwNDyTE-hIy6jpi6'
processed_video_path = process_video(url)
if processed_video_path:
    print(f"Processed video saved at {processed_video_path}")
else:
    print("Failed to process video.")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!



 > tts_models/fr/css10/vits is already downloaded.
 > Using model: vits
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:0
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:None
 | > fft_size:1024
 | > power:None
 | > preemphasis:0.0
 | > griffin_lim_iters:None
 | > signal_norm:None
 | > symmetric_norm:None
 | > mel_fmin:0
 | > mel_fmax:None
 | > pitch_fmin:None
 | > pitch_fmax:None
 | > spec_gain:20.0
 | > stft_pad_mode:reflect
 | > max_norm:1.0
 | > clip_norm:True
 | > do_trim_silence:False
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:10
 | > hop_length:256
 | > win_length:1024
 > initialization of speaker-embedding layers.
 > initialization of language-embedding layers.
MoviePy - Writing audio in /content/drive/MyDrive/Colab Notebooks/CS370_project/videos1/




MoviePy - Done.
 > Text splitted to sentences.
["C'est une question de propriété intellectuelle intéressante, que je pense être en fait quelque chose que je peux dire en tant que quelqu'un qui est dans les affaires créatrices et journalistiques et quoi que ce soit, ou qui se soucie du droit d'auteur."]
 > Processing time: 2.005516290664673
 > Real-time factor: 0.1893957471439904
 > Text splitted to sentences.
["Donc, l'une des choses à propos de la formation sur les données a été l'idée que vous n'allez pas vous former, ou ces choses ne sont pas formés sur les informations protégées par le droit d'auteur des gens."]
 > Processing time: 1.9664645195007324
 > Real-time factor: 0.20553916692733767
 > Text splitted to sentences.
["Historiquement, c'était le concept."]
 > Processing time: 0.7079637050628662
 > Real-time factor: 0.28620195982392566
 > Text splitted to sentences.
["Oui, c'est un énorme mensonge."]
 > Processing time: 0.7230620384216309
 > Real-time factor: 0.2895872919790206




MoviePy - Done.
Moviepy - Writing video /content/drive/MyDrive/Colab Notebooks/CS370_project/videos1/Elon Musk OpenAI is lying when it says it is not using copyrighted data_translated.mp4






Moviepy - Done !
Moviepy - video ready /content/drive/MyDrive/Colab Notebooks/CS370_project/videos1/Elon Musk OpenAI is lying when it says it is not using copyrighted data_translated.mp4
Processed video saved at /content/drive/MyDrive/Colab Notebooks/CS370_project/videos1/Elon Musk OpenAI is lying when it says it is not using copyrighted data_translated_subtitled.mp4


In [3]:
!sudo apt update && sudo apt install ffmpeg

[33m0% [Working][0m            Get:1 http://security.ubuntu.com/ubuntu jammy-security InRelease [110 kB]
[33m0% [Connecting to archive.ubuntu.com (185.125.190.36)] [1 InRelease 14.2 kB/110[0m                                                                               Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [119 kB]
Hit:5 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease
Get:6 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:7 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:9 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [109 kB]
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:11 http://archive.

In [4]:
!pip install ffmpeg

Collecting ffmpeg
  Downloading ffmpeg-1.4.tar.gz (5.1 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ffmpeg
  Building wheel for ffmpeg (setup.py) ... [?25l[?25hdone
  Created wheel for ffmpeg: filename=ffmpeg-1.4-py3-none-any.whl size=6080 sha256=e95818aa57d5de958cd586d64236d01b0434e4f5dd42cf1dc410462209c4cc71
  Stored in directory: /root/.cache/pip/wheels/8e/7a/69/cd6aeb83b126a7f04cbe7c9d929028dc52a6e7d525ff56003a
Successfully built ffmpeg
Installing collected packages: ffmpeg
Successfully installed ffmpeg-1.4


In [5]:
!pip install git+https://github.com/openai/whisper.git

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-h9hkqsc5
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-h9hkqsc5
  Resolved https://github.com/openai/whisper.git to commit e58f28804528831904c3b6f2c0e473f346223433
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper==20231117)
  Downloading tiktoken-0.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: openai-whisper
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
  Created wheel for openai-whisper: filename=openai_whisper-20231117-py3-none-an