In [1]:
# python -m venv subtitles_env
# .\subtitles_env\Scripts\Activate.ps1
# !pip install -r requirements.txt


In [2]:
import os
import json
import glob
import time
import shutil
import subprocess
from pathlib import Path
from faster_whisper import WhisperModel

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def find_mkvtoolnix():

    search_locations = [
        r"C:\Program Files\MKVToolNix",
        r"C:\Program Files (x86)\MKVToolNix",
        os.path.expandvars(r"%LOCALAPPDATA%\Programs\MKVToolNix"),
    ]

    for base in search_locations:
        if os.path.exists(base):

            mkvmerge = os.path.join(base, "mkvmerge.exe")
            mkvextract = os.path.join(base, "mkvextract.exe")
            mkvpropedit = os.path.join(base, "mkvpropedit.exe")

            if os.path.exists(mkvmerge) and os.path.exists(mkvextract) and os.path.exists(mkvpropedit):
                return mkvmerge, mkvextract, mkvpropedit

    # Deep fallback search
    results = glob.glob(r"C:\**\mkvmerge.exe", recursive=True)

    if results:
        mkvmerge = results[0]
        mkvextract = mkvmerge.replace("mkvmerge.exe", "mkvextract.exe")
        mkvpropedit = mkvmerge.replace("mkvmerge.exe", "mkvpropedit.exe")

        if os.path.exists(mkvmerge) and os.path.exists(mkvextract) and os.path.exists(mkvpropedit):
            return mkvmerge, mkvextract, mkvpropedit

    # Fallback search
    for base in search_locations:
        mkvmerge = os.path.join(base, "mkvmerge.exe")
        mkvextract = os.path.join(base, "mkvextract.exe")
        mkvpropedit = os.path.join(base, "mkvpropedit.exe")

        if os.path.exists(mkvmerge) and os.path.exists(mkvextract) and os.path.exists(mkvpropedit):
            return mkvmerge, mkvextract, mkvpropedit

    raise Exception("MKVToolNix not found anywhere")


MKVMERGE, MKVEXTRACT, MKVPROPEDIT = find_mkvtoolnix()

print("Found MKVToolNix:")
print(MKVMERGE)
print(MKVEXTRACT)
print(MKVPROPEDIT)

Found MKVToolNix:
C:\Program Files\MKVToolNix\mkvmerge.exe
C:\Program Files\MKVToolNix\mkvextract.exe
C:\Program Files\MKVToolNix\mkvpropedit.exe


In [4]:
model = WhisperModel(
    "small",
    compute_type="int8"
)

In [5]:
WORK_DIR = "subtitle_work"
os.makedirs(WORK_DIR, exist_ok=True)
AUDIO_FILE = os.path.join(WORK_DIR, "audio.wav")
DIALOGUE_FILE = os.path.join(WORK_DIR, "dialogue.srt")

In [6]:
def get_tracks(mkv_file):
    cmd = [MKVMERGE, "-J", mkv_file]
    result = subprocess.run(cmd, capture_output=True, text=True)
    return json.loads(result.stdout)["tracks"]

In [7]:
def extract_audio(MKV_FILE):
    subprocess.run([
        "ffmpeg",
        "-y",
        "-i", MKV_FILE,
        "-vn",
        "-acodec", "pcm_s16le",
        "-ar", "16000",
        "-ac", "1",
        AUDIO_FILE
    ], check=True)
    print("\t2/8=> Extracted Audio")

In [8]:
def format_time(seconds):
    hrs = int(seconds // 3600)
    mins = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    ms = int((seconds - int(seconds)) * 1000)
    return f"{hrs:02}:{mins:02}:{secs:02},{ms:03}"

In [9]:
def get_segments():
    segments_clean, _ = model.transcribe(
        AUDIO_FILE,
        vad_filter=False,
        beam_size=5,
        best_of=5,
        word_timestamps=True,
        condition_on_previous_text=False
    )
    print(f"\t3/8=> Transcribed Audio")
    return segments_clean

In [10]:
def create_subtitles(segments_clean):    
    with open(DIALOGUE_FILE, "w", encoding="utf-8") as f:
        for i, segment in enumerate(segments_clean, 1):
            f.write(f"{i}\n")
            f.write(f"{format_time(segment.start)} --> {format_time(segment.end)}\n")
            f.write(segment.text.strip() + "\n\n")
    print("\t4/8=> Created Subtitles")

In [11]:
def merge_subtitles(MKV_FILE, DIALOGUE_FILE, OUTPUT_MKV):
    subprocess.run([
        MKVMERGE,
        "-o", OUTPUT_MKV,
        MKV_FILE,
        "--language", "0:eng",
        "--track-name", "0:AI Created Subtitle",
        DIALOGUE_FILE
    ], check=True)
    print("\t5/8=> Final MKV created with new Subtitles")

In [12]:
VIDEO_DIR = "./videos"
mkv_files = list(Path(VIDEO_DIR).glob("*.mkv"))
i=0
print(f"Found {len(mkv_files)} MKV files")

for mkv_path in mkv_files:
    i+=1
    mkv_path = str(mkv_path)
    output_file = mkv_path.replace(".mkv", "_with_subs.mkv")

    print(f"\nüé¨ Processing {i}/{len(mkv_files)} : {mkv_path}")
    start_time = time.time()
    try:
        tracks = get_tracks(mkv_path)
        print(f"\t1/8=> Available Tracks: {len(tracks)}")
        extract_audio(mkv_path)
        segments = get_segments()
        create_subtitles(segments)
        merge_subtitles(mkv_path, DIALOGUE_FILE, output_file)
        tracks = get_tracks(output_file)
        print(f"\t6/8=> Finally Available Tracks: {len(tracks)}")
        os.remove(mkv_path)
        print(f"\t7/8=> üóë Deleted original file: {mkv_path}")
    except Exception as e:
        print(f"\n‚ùå Error processing {mkv_path}")
        print(e)

    finally:
        for item in Path(WORK_DIR).glob("*"):
            if item.is_file():
                item.unlink()
            else:
                shutil.rmtree(item, ignore_errors=True)
        end_time = time.time()
        print(f"\t8/8=> ‚è± Time taken: {round((end_time - start_time)/60, 2)} minutes")

shutil.rmtree(WORK_DIR, ignore_errors=True)
print("\nüßπ WORK_DIR deleted")


Found 22 MKV files

üé¨ Processing 1/22 : videos\1_NCED 01.mkv
	1/8=> Available Tracks: 3
	2/8=> Extracted Audio
	3/8=> Transcribed Audio
	4/8=> Created Subtitles
	5/8=> Final MKV created with new Subtitles
	6/8=> Finally Available Tracks: 4
	7/8=> üóë Deleted original file: videos\1_NCED 01.mkv
	8/8=> ‚è± Time taken: 0.65 minutes

üé¨ Processing 2/22 : videos\1_NCED 02.mkv
	1/8=> Available Tracks: 3
	2/8=> Extracted Audio
	3/8=> Transcribed Audio
	4/8=> Created Subtitles
	5/8=> Final MKV created with new Subtitles
	6/8=> Finally Available Tracks: 4
	7/8=> üóë Deleted original file: videos\1_NCED 02.mkv
	8/8=> ‚è± Time taken: 0.86 minutes

üé¨ Processing 3/22 : videos\1_NCOP 01.mkv
	1/8=> Available Tracks: 3
	2/8=> Extracted Audio
	3/8=> Transcribed Audio
	4/8=> Created Subtitles
	5/8=> Final MKV created with new Subtitles
	6/8=> Finally Available Tracks: 4
	7/8=> üóë Deleted original file: videos\1_NCOP 01.mkv
	8/8=> ‚è± Time taken: 1.08 minutes

üé¨ Processing 4/22 : videos\1_