In [None]:
#@title *Prepare* { display-mode: "form", run: "auto" }

#@markdown # **Prepare the environment**
#@markdown Execute to install necessary packages, modules, fonts

#@markdown Mount Google Drive at /content/drive (your drive folder at /content/drive/MyDrive):
mount_gdrive = True #@param {type:"boolean"}
#@markdown Force mount again. Useful for bug cases:
force_remount = False #@param {type:"boolean"}

# remove the old log
!rm -rf error_log.txt > /dev/null

import os
import subprocess
from google.colab import drive

# mount Google Drive
if mount_gdrive:
    print("Mounting your Google Drive | Waiting user Allow Access | ", end='')
    try:
        drive.mount('/content/drive/', force_remount=force_remount)
    except Exception as e:
        print(f"[✗]: {e}")

# update code
print("Ensuring the LeGen code is existing and updated...", end='')
repo_url = "https://github.com/matheusbach/legen.git"
local_folder = "/content/src"  # LeGen source path

# Create directory if it does not exist
os.makedirs(local_folder, exist_ok=True)

# Try git status in the directory
git_task = "git fetch"
process = subprocess.Popen(git_task, cwd=local_folder, shell=True)
return_code = process.wait()
if return_code == 0:
  git_task = "git fetch && git reset --hard origin/main && git pull"
else:
  !rm -rf local_folder > /dev/null
  os.makedirs(local_folder, exist_ok=True)
  git_task = f"git clone {repo_url} {local_folder}"

# If it is a git repo, fetch, reset, and pull. Else, clone.
with open('/content/error_log.txt', 'a') as f:
    process = subprocess.Popen(git_task, cwd=local_folder, shell=True, stderr=f)
    return_code = process.wait()
    print("[✔]" if return_code == 0 else "[✗]")

# install pip requirements.txt updating
print("Installing or updating pip requirements...", end='')
with open('/content/error_log.txt', 'a') as f:
    process = subprocess.Popen('pip3 install --upgrade -r requirements.txt && pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 && pip3 install git+https://github.com/m-bain/whisperx.git --upgrade', cwd=local_folder, shell=True, stderr=f)
    return_code = process.wait()
    print("[✔]" if return_code == 0 else "[✗]")

# install ffmpeg
print("Installing FFmpeg and xvfb...", end='')
with open('/content/error_log.txt', 'a') as f:
    process = subprocess.Popen('apt update -y ; apt install ffmpeg xvfb -y', shell=True, stderr=f)
    return_code = process.wait()
    print("[✔]" if return_code == 0 else "[✗]")

# install pip requirements.txt updating
print("Installing fonts...", end='')
with open('/content/error_log.txt', 'a') as f:
    process = subprocess.Popen('echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula select true | debconf-set-selections && apt install -y ttf-mscorefonts-installer && fc-cache -f -v', shell=True, stderr=f)
    return_code = process.wait()
    print("[✔]" if return_code == 0 else "[✗]")

# create a virtual display
os.system('Xvfb :1 -screen 0 2560x1440x8  &') # create virtual display with size 1600x1200 and 8 bit color. Color can be changed to 24, 16 or 8
os.environ['DISPLAY'] = ':1.0' # tell X clients to use our virtual DISPLAY :1.0.

print("\nPreparation tasks done.")

In [None]:
#@title *Configure* { display-mode: "form", run: "auto" }
#@markdown # **Define Software Settings**
#@markdown ---
#@markdown ## General Options
#@markdown Set where your files are located (your Drive is the base /content/drive/MyDrive):
import os
from IPython.display import display
import ipywidgets as widgets
input_path = "/content/drive/MyDrive/LeGen/media" #@param {type:"string"}
#@markdown Set where output soft subs (.srt and embed) videos will be saved on your Drive:
output_softsubs_path = "/content/drive/MyDrive/LeGen/softsubs" #@param {type:"string"}
#@markdown Set where output hard subs (burned in) videos will be saved on your Drive:
output_hardsubs_path = "/content/drive/MyDrive/LeGen/hardsubs" #@param {type:"string"}

#@markdown ---
overwrite = True #@param {type:"boolean"}
normalize = True #@param {type:"boolean"}
copy_extra_files = False #@param {type:"boolean"}

# generate_srt_files = True #@param {type:"boolean"}
generate_softsub = False #@param {type:"boolean"}
generate_hardsub = True  # @param {type:"boolean"}

# buttons for directory create
button = widgets.Button(description="Create directories!")
output = widgets.Output()


def on_button_clicked(b):
  # Create input and output directories if it does not exist
  os.makedirs(input_path, exist_ok=True)
  if generate_softsub:
    os.makedirs(output_softsubs_path, exist_ok=True)
  if generate_hardsub:
    os.makedirs(output_hardsubs_path, exist_ok=True)
  with output:
    print("Done.")


button.on_click(on_button_clicked)
display(button, output)

#@markdown ---
#@markdown ## Transcription Settings:
transcription_engine = 'WhisperX' # @param ["Whisper", "WhisperX"]
transcription_device = 'cuda' #@param ["auto", "cpu", "cuda"]
transcription_model = 'tiny' #@param ["tiny", "small", "medium", "large"]
compute_type = 'default' # @param ["default", "int8", "int16", "float16", "float32"]
batch_size = 12 # @param {type: "number"}
transcription_input_lang = 'en' #@param ["auto detect", "ar", "de", "en", "es", "fr", "haw", "pt", "ru", "sw", "uk", "zh"]

#@markdown ---
#@markdown ## Translation Settings:
#@markdown Set the destination langage code. Set to same as original to skip translation. (*Note: check below for Batch processing for many languages*)
target_language_code = 'es' #@param ["ar", "de", "en", "es", "fr", "haw", "pt", "ru", "sw", "uk", "zh-CN", "zh-TW", "co"]

#@markdown ---
#@markdown ## Video Settings:
video_codec = "h264"  #@param ["h264", "hevc", "mpeg4"]
video_hardware_api = "auto"  #@param ["auto", "none", "nvenc", "vaapi", "amf", "v4l2m2m", "qsv", "videotoolbox", "cuvid"]

audio_codec = "aac"  #@param ["aac", "libopus", "libmp3lame", "pcm_s16le"]

In [None]:
#@title *Select font parameters* { display-mode: "form", run: "auto" }
#@markdown # **Define font style of burned-in subtitles**
#@markdown ---
#@markdown ## General Options
#@markdown Set basic font features, first the font family:

font_name = "Futura" #@param ["Futura", "Arial", "Tahoma", "Aileron", "Verdana"]
fontsize = 12 # @param {type:"integer"}

#@markdown Set the text color:

primaryColour = "H00FFFFFF" #@param {type:"string"}

#@markdown Text box or outline/shadow?
text_box = True # @param {type:"boolean"}
outline = True # @param {type:"boolean"}
secondaryColour = "H00FFFFFF" #@param {type:"string"}
outlineColour = "H03fcff" #@param {type:"string"}
backColour = "H80000000" #@param {type:"string"}

#@markdown Position of the text in the video (default = **Bottom_center** + marginV=**20**):
subs_position = "Top center" #@param ["Bottom center", "Top center", "Bottom left", "Bottom right", "Top left", "Top right", "Middle left", "Middle center", "Middle right"]
margin_vertical = 25 # @param {type:"integer"}
#@markdown ---
font_bold = 1 # @param {type:"slider", min:0, max:1, step:1}
font_italic = 0 # @param {type:"slider", min:0, max:1, step:1}
font_shadow = 0 # @param {type:"slider", min:0, max:1, step:1}
font_spacing = 0.4 # @param {type:"slider", min:0, max:1, step:0.1}

#@markdown ---

# Borderline=4 for Background textbox https://stackoverflow.com/a/65271472
borderstyle = 0 if outline == False and text_box == False else 1
borderstyle = 4 if text_box == True else 1

# Alignment --> sub_align=10 by default in ffmpeg_utils.py
positions = {'Bottom left': 1, 'Bottom center': 2, 'Bottom right': 3, 'Top left': 5, 'Top center': 6, 'Top right': 7, 'Middle left': 9, 'Middle center': 10, 'Middle right': 11}
sub_align = str(positions[subs_position])

sub_style = f"'{font_name},PrimaryColour=&{primaryColour},Fontsize={fontsize},Borderstyle={borderstyle},BackColour=&{backColour},Bold={font_bold},Spacing={font_spacing},Outline={1 if outline==True else 0},Shadow={font_shadow},MarginL=10,MarginR=10,MarginV={margin_vertical}'"
#print(str(sub_align))


In [None]:
#@title *Run* { display-mode: "form" }
#@markdown # **Run LeGen.py**
many_target_languages = True  # @param {type:"boolean"}
def run_LeGen(myLang:str = "en"):

  print("Starting LeGen...")
  import torch
  try:
      import tensorflow  # required in Colab to avoid protobuf compatibility issues
  except ImportError:
      pass

  # Create input and output directories if it does not exist
  os.makedirs(input_path, exist_ok=True)
  if generate_softsub:
    os.makedirs(output_softsubs_path, exist_ok=True)
  if generate_hardsub:
    os.makedirs(output_hardsubs_path, exist_ok=True)

  #build query
  query = f" -i '{input_path}'"
  query += f" --srt_out_dir '{output_softsubs_path}'"
  query += f" --burned_out_dir '{output_hardsubs_path}'"
  query += " --overwrite" if overwrite else ""
  query += " --norm" if normalize else ""
  query += " --only_video" if not copy_extra_files else ""
  query += " --disable_embed" if not generate_softsub else ""
  query += " --disable_burn" if not generate_hardsub else ""
  query += " --whisperx" if transcription_engine == "WhisperX" else ""
  query += f" --dev {transcription_device}"
  query += f" --model {transcription_model}"
  query += f" --compute_type {compute_type}"
  query += f" --batch_size {batch_size}"
  query += f" --input_lang {transcription_input_lang}" if transcription_input_lang != "auto detect" else ""
  query += f" --lang {myLang}"
  query += f" -c:v {video_codec}" + ("" if video_hardware_api == "none" else f"_{video_hardware_api}" if video_hardware_api != "auto" else "_nvenc" if torch.cuda.is_available() else "")
  query += f" -c:a {audio_codec}"
  query += f" --sub_style {sub_style}"
  query += f" --sub_align {sub_align}" if sub_align in range(1,11) else "2"


  #run python script
  print(f"command line: python3 /content/src/legen.py {query}", end="\n\n")
  !python3 /content/src/legen.py $query

batch_lang = None
if target_language_code and not many_target_languages:
  run_LeGen(target_language_code)


# Batch for many languages
Possible if subs files have the same name as the source video?

In [None]:
#@title *Export in many languages* { display-mode: "form", run: "auto" }
#@markdown # **Export to many languages at once**

#@markdown ## Languages selection
#@markdown Check the boxes for the target languages:

def batch_lang():
  es = True # @param {type:"boolean"}
  fr = True # @param {type:"boolean"}
  pt = True # @param {type:"boolean"}
  sw = False # @param {type:"boolean"}
  ru = False # @param {type:"boolean"}
  zh = False # @param {type:"boolean"}
  ar = False # @param {type:"boolean"}
  #@markdown ---
  #@markdown Check if all the previous languages:
  all_my_languages = False # @param {type:"boolean"}

  return [language.replace("zh","zh-CN") for language, is_true in locals().items() if is_true and isinstance(is_true, bool)]

print(f"Running the script for the following languages:\n\t{batch_lang()}")
for lang_code in batch_lang():
  run_LeGen(lang_code)