In [70]:
!pip -q install yt-dlp youtube-transcript-api python-dateutil tqdm
!pip -q install openai-whisper ffmpeg-python
!apt-get -y install ffmpeg >/dev/null

In [71]:
import os
import re
import glob
import time
import json
from dataclasses import dataclass
from datetime import datetime, date
from typing import List, Optional, Dict, Tuple

from dateutil.parser import parse as dtparse
from tqdm.auto import tqdm

import yt_dlp

# youtube-transcript-api imports (compatible with multiple versions)
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter


# Configuration defaults
DEFAULT_LANGS = ["en", "en-US", "en-GB","en-orig"]
DEFAULT_DELAY_SECONDS = 1.0
DEFAULT_SORT_MODE = "as_is"  # "as_is" | "date_asc" | "date_desc"
DEFAULT_OUTPUT_FILE = "combined_transcripts.txt"

TMP_SUB_DIR = "/content/_yt_subs_tmp"
os.makedirs(TMP_SUB_DIR, exist_ok=True)

formatter = TextFormatter()

# Data model
@dataclass
class VideoItem:
    video_id: str
    title: str
    publish_date: Optional[date]
    url: str

In [72]:
# Utilities
def parse_user_date(s: str) -> Optional[date]:
    s = (s or "").strip()
    if not s:
        return None
    # Accept "YYYY-MM-DD", "MM/DD/YYYY", "Jan 5 2024", etc.
    return dtparse(s).date()

def yyyymmdd_to_date(s: Optional[str]) -> Optional[date]:
    if not s:
        return None
    try:
        return datetime.strptime(s, "%Y%m%d").date()
    except Exception:
        return None

def fmt_date(d: Optional[date]) -> str:
    return d.isoformat() if d else "Unknown Date"

def normalize_channel_input(channel: str) -> str:
    """
    Accepts:
      - full URLs (channel/custom/handle)
      - @handle
    Returns a usable URL.
    """
    channel = (channel or "").strip()
    if not channel:
        raise ValueError("Channel input is empty.")

    if channel.startswith("@"):
        return f"https://www.youtube.com/{channel}/videos"

    if channel.startswith("http://") or channel.startswith("https://"):
        return channel

    # If user pasted just a handle without @
    if re.match(r"^[A-Za-z0-9_.-]+$", channel):
        return f"https://www.youtube.com/@{channel}/videos"

    # Fallback
    return channel

def parse_selection(selection: str, n: int) -> List[int]:
    """
    Parses:
      - "all"
      - "1,2,5"
      - "1-5,8,10-12"
    Returns 0-based indices.
    """
    selection = (selection or "").strip().lower()
    if selection in ("all", "a", "*"):
        return list(range(n))

    idxs = set()
    parts = [p.strip() for p in selection.split(",") if p.strip()]
    for p in parts:
        if "-" in p:
            a, b = p.split("-", 1)
            a, b = int(a), int(b)
            for k in range(a, b + 1):
                if 1 <= k <= n:
                    idxs.add(k - 1)
        else:
            k = int(p)
            if 1 <= k <= n:
                idxs.add(k - 1)
    return sorted(idxs)

In [73]:
def sort_videos(videos: List[VideoItem], mode: str) -> List[VideoItem]:
    mode = (mode or "as_is").strip().lower()
    if mode == "as_is":
        return videos

    def key(v: VideoItem):
        return (v.publish_date is None, v.publish_date or date(1900,1,1), v.title.lower())

    if mode == "date_asc":
        return sorted(videos, key=key)
    if mode == "date_desc":
        return list(reversed(sorted(videos, key=key)))
    return videos

In [74]:
# yt-dlp metadata extraction
def extract_video_info(url: str) -> Optional[VideoItem]:
    """
    Extracts title + upload_date + id using yt-dlp.
    """
    ydl_opts = {
        "quiet": True,
        "skip_download": True,
        "ignoreerrors": True,
        "no_warnings": True,
        # Helps reduce chances of grabbing playlists
        "noplaylist": True,
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)
        if not info:
            return None
        vid = info.get("id")
        title = info.get("title") or "(No title)"
        up = yyyymmdd_to_date(info.get("upload_date"))
        webpage_url = info.get("webpage_url") or url
        if not vid:
            return None
        return VideoItem(video_id=vid, title=title, publish_date=up, url=webpage_url)
    except Exception:
        return None

def list_channel_videos(channel_url: str, scan_limit: Optional[int] = None) -> List[str]:
    """
    Returns a list of video URLs from a channel page using a flat playlist extraction.
    This is fast, but does not include full metadata; metadata is fetched later.
    """
    ydl_opts = {
        "quiet": True,
        "skip_download": True,
        "extract_flat": "in_playlist",
        "ignoreerrors": True,
        "no_warnings": True,
        "playlistend": scan_limit,
    }
    urls = []
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(channel_url, download=False)
    if not info:
        return urls

    for e in (info.get("entries") or []):
        if not e:
            continue
        vid = e.get("id") or e.get("url")
        if not vid:
            continue
        if len(vid) == 11 and re.match(r"^[A-Za-z0-9_-]{11}$", vid):
            urls.append(f"https://www.youtube.com/watch?v={vid}")
        else:
            # Sometimes yt-dlp gives full URLs
            if str(vid).startswith("http"):
                urls.append(str(vid))
    return urls

In [75]:
def search_videos(query: str, max_results: int = 20) -> List[VideoItem]:
    """
    Uses yt-dlp search to get video IDs, then extracts metadata for each.
    """
    query = (query or "").strip()
    if not query:
        return []

    search_expr = f"ytsearch{max_results}:{query}"

    ydl_opts = {
        "quiet": True,
        "skip_download": True,
        "ignoreerrors": True,
        "no_warnings": True,
        "extract_flat": "in_playlist",
    }

    urls = []
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(search_expr, download=False)
    for e in (info.get("entries") or []):
        if not e:
            continue
        vid = e.get("id") or e.get("url")
        if vid and len(vid) == 11:
            urls.append(f"https://www.youtube.com/watch?v={vid}")

    videos = []
    for u in urls:
        vi = extract_video_info(u)
        if vi:
            videos.append(vi)
    return videos

In [76]:
# Transcript extraction
def looks_like_block_error(ex: Exception) -> bool:
    name = ex.__class__.__name__.lower()
    msg = str(ex).lower()
    # Common class/message patterns from youtube-transcript-api
    return ("ipblocked" in name or "requestblocked" in name or
            "ipblocked" in msg or "requestblocked" in msg or
            "too many requests" in msg or "429" in msg)

def list_available_subtitles(video_url: str):
    ydl_opts = {
        "quiet": True,
        "skip_download": True,
        "no_warnings": True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(video_url, download=False)

    subs = info.get("subtitles") or {}
    autos = info.get("automatic_captions") or {}
    return subs, autos

def fetch_transcript_yta(video_id: str, languages: List[str], translate_to: Optional[str]) -> Optional[str]:
    """
    Try youtube-transcript-api first.
    """
    # Try "get_transcript" (classic API)
    try:
        if hasattr(YouTubeTranscriptApi, "get_transcript"):
            data = YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
            text = formatter.format_transcript(data).strip()
            return text if text else None
    except Exception as e:
        # We'll try the list/translate approach next (unless it's a hard block)
        if looks_like_block_error(e):
            raise

    # Try list_transcripts + translate
    try:
        if hasattr(YouTubeTranscriptApi, "list_transcripts"):
            tlist = YouTubeTranscriptApi.list_transcripts(video_id)
            try:
                tr = tlist.find_transcript(languages)
            except Exception:
                # fall back: pick any transcript
                tr = next(iter(tlist))

            if translate_to and getattr(tr, "is_translatable", False):
                tr = tr.translate(translate_to)

            data = tr.fetch()
            text = formatter.format_transcript(data).strip()
            return text if text else None
    except Exception as e:
        if looks_like_block_error(e):
            raise
        return None

    return None

def vtt_to_text(vtt: str) -> str:
    lines_out = []
    for line in vtt.splitlines():
        line = line.strip()
        if not line:
            continue
        if line.startswith("WEBVTT"):
            continue
        if "-->" in line:
            continue
        if re.match(r"^\d+$", line):
            continue
        # Remove basic HTML tags
        line = re.sub(r"<[^>]+>", "", line).strip()
        if line:
            lines_out.append(line)

    # Light de-dup: remove consecutive duplicates
    cleaned = []
    prev = None
    for l in lines_out:
        if l != prev:
            cleaned.append(l)
        prev = l
    return "\n".join(cleaned).strip()

def fetch_transcript_ytdlp_subs(video: VideoItem, langs: List[str]) -> Optional[str]:
    """
    Fallback: download subtitles (manual/auto) via yt-dlp, parse VTT to text.
    """
    # Clean old files for this video id
    for f in glob.glob(os.path.join(TMP_SUB_DIR, f"{video.video_id}.*")):
        try:
            os.remove(f)
        except Exception:
            pass

    ydl_opts = {
        "quiet": True,
        "skip_download": True,
        "writesubtitles": True,
        "writeautomaticsub": True,
        "subtitleslangs": langs,
        "subtitlesformat": "vtt",
        "outtmpl": os.path.join(TMP_SUB_DIR, "%(id)s.%(language)s.%(ext)s"),
        "no_warnings": True,
        "ignoreerrors": True,
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([video.url])
    except Exception:
        return None

    # Find any downloaded VTT for this video
    vtts = sorted(glob.glob(os.path.join(TMP_SUB_DIR, f"*{video.video_id}*.vtt")))
    if not vtts:
      return None

    # Prefer English tracks if present
    preferred = ["en", "en-orig", "en-US", "en-GB"]
    def score(path: str) -> int:
      # lower is better
      for i, lang in enumerate(preferred):
        if f".{lang}." in path:
            return i
      return 999

    best_vtt = sorted(vtts, key=score)[0]

    with open(best_vtt, "r", encoding="utf-8", errors="ignore") as f:
      vtt = f.read()

    text = vtt_to_text(vtt)
    return text if text else None


import whisper

_whisper_model = whisper.load_model("tiny")

def transcribe_with_whisper(video: VideoItem) -> Optional[str]:
    global _whisper_model
    if _whisper_model is None:
        _whisper_model = whisper.load_model("base")  # "tiny" faster, "small" better

    audio_path = os.path.join(TMP_SUB_DIR, f"{video.video_id}.mp3")

    # download audio only
    ydl_opts = {
        "quiet": True,
        "format": "bestaudio/best",
        "outtmpl": audio_path,
        "postprocessors": [{
            "key": "FFmpegExtractAudio",
            "preferredcodec": "mp3",
            "preferredquality": "192",
        }],
        "no_warnings": True,
        "ignoreerrors": True,
    }

    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([video.url])
    except Exception:
        return None

    if not os.path.exists(audio_path):
        # sometimes yt-dlp creates audio_path with different ext
        mp3s = glob.glob(os.path.join(TMP_SUB_DIR, f"{video.video_id}*.mp3"))
        if mp3s:
            audio_path = mp3s[0]
        else:
            return None

    try:
        result = _whisper_model.transcribe(audio_path)
        text = (result.get("text") or "").strip()
        return text if text else None
    except Exception:
        return None

In [77]:
# Output formatting
def write_video_block(fpath: str, video: VideoItem, transcript_text: str, append: bool = True) -> None:
    mode = "a" if append else "w"
    header = f"++++++++++++++[{video.title} | {fmt_date(video.publish_date)}]"
    end = "==============END================="

    with open(fpath, mode, encoding="utf-8") as f:
        f.write(header + "\n")
        f.write((transcript_text or "").strip() + "\n")
        f.write(end + "\n\n\n")

In [78]:
# Main interactive flow
def run():
    print("YouTube Transcript Collector (Channel or Search) -> Single Combined Text File")
    print("NOTE: Some videos have no transcripts/captions. Also, Colab cloud IPs may be blocked by YouTube.\n")

    mode = input("Choose mode: (1) Channel  (2) Search  [1/2]: ").strip() or "1"
    mode = "channel" if mode == "1" else "search"

    langs_in = input(f"Preferred languages (comma-separated) [default: {','.join(DEFAULT_LANGS)}]: ").strip()
    langs = [x.strip() for x in langs_in.split(",") if x.strip()] if langs_in else DEFAULT_LANGS

    translate_to = input("Optional: translate to language code (e.g. 'en') or press Enter for no translation: ").strip() or None

    delay_s_in = input(f"Delay between videos in seconds [default: {DEFAULT_DELAY_SECONDS}]: ").strip()
    delay_s = float(delay_s_in) if delay_s_in else DEFAULT_DELAY_SECONDS

    out_path = input(f"Output filename [default: {DEFAULT_OUTPUT_FILE}]: ").strip() or DEFAULT_OUTPUT_FILE

    append = True
    if os.path.exists(out_path):
        ans = input(f"File '{out_path}' exists. Append to it? [Y/n]: ").strip().lower()
        append = (ans != "n")

    videos: List[VideoItem] = []

    if mode == "channel":
        chan_in = input("Enter YouTube channel URL or @handle: ").strip()
        channel_url = normalize_channel_input(chan_in)

        scan_limit_in = input("Optional: limit how many channel videos to scan (blank=all): ").strip()
        scan_limit = int(scan_limit_in) if scan_limit_in else None

        date_filter_mode = input("Channel option: (a) all videos  (b) only videos on/after a date  [a/b]: ").strip().lower() or "a"
        cutoff = None
        if date_filter_mode == "b":
            cutoff = parse_user_date(input("Enter cutoff date (e.g. 2024-01-01): ").strip())
            print(f"Will keep only videos on/after {cutoff.isoformat()}")

        # 1) Getting candidate video URLs quickly
        urls = list_channel_videos(channel_url, scan_limit=scan_limit)
        print(f"\nFound {len(urls)} video URLs to evaluate.\n")

        # 2) Extracting metadata per video and applying date filter
        kept = []
        for u in tqdm(urls, desc="Reading video metadata"):
            vi = extract_video_info(u)
            if not vi:
                continue
            if cutoff and vi.publish_date and vi.publish_date < cutoff:
                continue
            kept.append(vi)

        videos = kept

        sort_mode = input("Order: (1) as-is  (2) by date asc  (3) by date desc  [1/2/3]: ").strip() or "1"
        sort_mode = {"1":"as_is", "2":"date_asc", "3":"date_desc"}.get(sort_mode, "as_is")
        videos = sort_videos(videos, sort_mode)

    else:
        query = input("Enter search term: ").strip()
        max_results_in = input("How many search results to fetch? [default: 20]: ").strip()
        max_results = int(max_results_in) if max_results_in else 20

        videos = search_videos(query, max_results=max_results)

        if not videos:
            print("No videos found.")
            return

        print("\nSearch results:")
        for i, v in enumerate(videos, start=1):
            print(f"{i:>2}. {v.title}  |  {fmt_date(v.publish_date)}  |  {v.url}")

        pick_mode = input("\nSearch option: (a) select specific videos  (b) select all  [a/b]: ").strip().lower() or "b"
        if pick_mode == "a":
            sel = input("Enter selections like '1,2,5-7' (or 'all'): ").strip()
            idxs = parse_selection(sel, len(videos))
            videos = [videos[i] for i in idxs]

        # Keeping search order by default
        sort_mode = input("Order: (1) as-is  (2) by date asc  (3) by date desc  [1/2/3]: ").strip() or "1"
        sort_mode = {"1":"as_is", "2":"date_asc", "3":"date_desc"}.get(sort_mode, "as_is")
        videos = sort_videos(videos, sort_mode)

    if not videos:
        print("No videos selected after filtering.")
        return

    print(f"\nSelected {len(videos)} videos total.")
    print("Tip: For large runs, do batching with start/end indices.\n")

    start_in = input("Start index (1-based) [default: 1]: ").strip()
    end_in = input(f"End index (1-based) [default: {len(videos)}]: ").strip()

    start_i = int(start_in) - 1 if start_in else 0
    end_i = int(end_in) if end_in else len(videos)
    batch = videos[start_i:end_i]

    print(f"\nProcessing {len(batch)} videos (items {start_i+1}..{end_i}) -> {out_path}\n")

    # Process
    blocked_hard_stop = False
    for v in tqdm(batch, desc="Fetching transcripts"):
        if delay_s > 0:
            time.sleep(delay_s)

        transcript_text = None
        subs, autos = list_available_subtitles(v.url)
        print("\nDEBUG for:", v.title)
        print("Manual subtitle languages:", list(subs.keys()))
        print("Auto subtitle languages:", list(autos.keys()))


        # Attempt 1: youtube-transcript-api
        try:
          transcript_text = fetch_transcript_yta(v.video_id, langs, translate_to)
        except Exception as e:
          print("\nYTA ERROR:", repr(e))
          if looks_like_block_error(e):
            print("LIKELY BLOCKED / RATE LIMITED.")
            blocked_hard_stop = True
          transcript_text = None
          subs, autos = list_available_subtitles(v.url)
          print("\nDEBUG for:", v.title)
          print("Manual subtitle languages:", list(subs.keys()))
          print("Auto subtitle languages:", list(autos.keys()))



        # Attempt 2: yt-dlp subtitle fallback (only if not blocked)
        if (not transcript_text) and (not blocked_hard_stop):
            transcript_text = fetch_transcript_ytdlp_subs(v, langs)

        # Attempt 3: Whisper fallback (only if NO captions exist)
        if (not transcript_text) and (not blocked_hard_stop):
            subs, autos = list_available_subtitles(v.url)
        if not subs and not autos:
            transcript_text = transcribe_with_whisper(v)


        if not transcript_text:
            transcript_text = f"[FAILED: No transcript returned. VideoID={v.video_id}]"


        write_video_block(out_path, v, transcript_text, append=append)
        append = True  # once we've written one block, always append

        if blocked_hard_stop:
            break

    print("\nDone.")
    print(f"Combined transcript file: {out_path}")

    try:
        from google.colab import files
        files.download(out_path)
    except Exception:
        print("If download didn't trigger, you can find the file in the Colab file browser on the left.")

In [80]:
run()

YouTube Transcript Collector (Channel or Search) -> Single Combined Text File
NOTE: Some videos have no transcripts/captions. Also, Colab cloud IPs may be blocked by YouTube.

Choose mode: (1) Channel  (2) Search  [1/2]: 1
Preferred languages (comma-separated) [default: en,en-US,en-GB,en-orig]: 
Optional: translate to language code (e.g. 'en') or press Enter for no translation: en
Delay between videos in seconds [default: 1.0]: 0.05
Output filename [default: combined_transcripts.txt]: channel_final
Enter YouTube channel URL or @handle: @ClipKingsChannel
Optional: limit how many channel videos to scan (blank=all): 10
Channel option: (a) all videos  (b) only videos on/after a date  [a/b]: a

Found 10 video URLs to evaluate.



Reading video metadata:   0%|          | 0/10 [00:00<?, ?it/s]

Order: (1) as-is  (2) by date asc  (3) by date desc  [1/2/3]: 1

Selected 10 videos total.
Tip: For large runs, do batching with start/end indices.

Start index (1-based) [default: 1]: 
End index (1-based) [default: 10]: 

Processing 10 videos (items 1..10) -> channel_final



Fetching transcripts:   0%|          | 0/10 [00:00<?, ?it/s]


DEBUG for: "If I Would've Died, You Would've Seen my Name at The Post Office" (Joel Edgerton) | Jane Got a Gun
Manual subtitle languages: []
Auto subtitle languages: ['ab', 'aa', 'af', 'ak', 'sq', 'am', 'ar', 'hy', 'as', 'ay', 'az', 'bn', 'ba', 'eu', 'be', 'bho', 'bs', 'br', 'bg', 'my', 'ca', 'ceb', 'zh-Hans', 'zh-Hant', 'co', 'hr', 'cs', 'da', 'dv', 'nl', 'dz', 'en-orig', 'en', 'eo', 'et', 'ee', 'fo', 'fj', 'fil', 'fi', 'fr', 'gaa', 'gl', 'lg', 'ka', 'de', 'el', 'gn', 'gu', 'ht', 'ha', 'haw', 'iw', 'hi', 'hmn', 'hu', 'is', 'ig', 'id', 'iu', 'ga', 'it', 'ja', 'jv', 'kl', 'kn', 'kk', 'kha', 'km', 'rw', 'ko', 'kri', 'ku', 'ky', 'lo', 'la', 'lv', 'ln', 'lt', 'lua', 'luo', 'lb', 'mk', 'mg', 'ms', 'ml', 'mt', 'gv', 'mi', 'mr', 'mn', 'mfe', 'ne', 'new', 'nso', 'no', 'ny', 'oc', 'or', 'om', 'os', 'pam', 'ps', 'fa', 'pl', 'pt', 'pt-PT', 'pa', 'qu', 'ro', 'rn', 'ru', 'sm', 'sg', 'sa', 'gd', 'sr', 'crs', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'st', 'es', 'su', 'sw', 'ss', 'sv', 'tg', 'ta', 'tt', '




Manual subtitle languages: []
Auto subtitle languages: ['ab', 'aa', 'af', 'ak', 'sq', 'am', 'ar', 'hy', 'as', 'ay', 'az', 'bn', 'ba', 'eu', 'be', 'bho', 'bs', 'br', 'bg', 'my', 'ca', 'ceb', 'zh-Hans', 'zh-Hant', 'co', 'hr', 'cs', 'da', 'dv', 'nl', 'dz', 'en-orig', 'en', 'eo', 'et', 'ee', 'fo', 'fj', 'fil', 'fi', 'fr', 'gaa', 'gl', 'lg', 'ka', 'de', 'el', 'gn', 'gu', 'ht', 'ha', 'haw', 'iw', 'hi', 'hmn', 'hu', 'is', 'ig', 'id', 'iu', 'ga', 'it', 'ja', 'jv', 'kl', 'kn', 'kk', 'kha', 'km', 'rw', 'ko', 'kri', 'ku', 'ky', 'lo', 'la', 'lv', 'ln', 'lt', 'lua', 'luo', 'lb', 'mk', 'mg', 'ms', 'ml', 'mt', 'gv', 'mi', 'mr', 'mn', 'mfe', 'ne', 'new', 'nso', 'no', 'ny', 'oc', 'or', 'om', 'os', 'pam', 'ps', 'fa', 'pl', 'pt', 'pt-PT', 'pa', 'qu', 'ro', 'rn', 'ru', 'sm', 'sg', 'sa', 'gd', 'sr', 'crs', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'st', 'es', 'su', 'sw', 'ss', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'bo', 'ti', 'to', 'ts', 'tn', 'tum', 'tr', 'tk', 'uk', 'ur', 'ug', 'uz', 've', 'vi', 'war', 'cy', 'f




DEBUG for: Donnie Yen VS Hiroyuki Sanada (Full Scene) | John Wick: Chapter 4
Manual subtitle languages: []
Auto subtitle languages: ['ab', 'aa', 'af', 'ak', 'sq', 'am', 'ar', 'hy', 'as', 'ay', 'az', 'bn', 'ba', 'eu', 'be', 'bho', 'bs', 'br', 'bg', 'my', 'ca', 'ceb', 'zh-Hans', 'zh-Hant', 'co', 'hr', 'cs', 'da', 'dv', 'nl', 'dz', 'en-orig', 'en', 'eo', 'et', 'ee', 'fo', 'fj', 'fil', 'fi', 'fr', 'gaa', 'gl', 'lg', 'ka', 'de', 'el', 'gn', 'gu', 'ht', 'ha', 'haw', 'iw', 'hi', 'hmn', 'hu', 'is', 'ig', 'id', 'iu', 'ga', 'it', 'ja', 'jv', 'kl', 'kn', 'kk', 'kha', 'km', 'rw', 'ko', 'kri', 'ku', 'ky', 'lo', 'la', 'lv', 'ln', 'lt', 'lua', 'luo', 'lb', 'mk', 'mg', 'ms', 'ml', 'mt', 'gv', 'mi', 'mr', 'mn', 'mfe', 'ne', 'new', 'nso', 'no', 'ny', 'oc', 'or', 'om', 'os', 'pam', 'ps', 'fa', 'pl', 'pt', 'pt-PT', 'pa', 'qu', 'ro', 'rn', 'ru', 'sm', 'sg', 'sa', 'gd', 'sr', 'crs', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'st', 'es', 'su', 'sw', 'ss', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'bo', 'ti', 'to', 'ts',




DEBUG for: Bruce Willis Kidnaps His Love Interest (Full Scene) | RED
Manual subtitle languages: []
Auto subtitle languages: ['ab', 'aa', 'af', 'ak', 'sq', 'am', 'ar', 'hy', 'as', 'ay', 'az', 'bn', 'ba', 'eu', 'be', 'bho', 'bs', 'br', 'bg', 'my', 'ca', 'ceb', 'zh-Hans', 'zh-Hant', 'co', 'hr', 'cs', 'da', 'dv', 'nl', 'dz', 'en-orig', 'en', 'eo', 'et', 'ee', 'fo', 'fj', 'fil', 'fi', 'fr', 'gaa', 'gl', 'lg', 'ka', 'de', 'el', 'gn', 'gu', 'ht', 'ha', 'haw', 'iw', 'hi', 'hmn', 'hu', 'is', 'ig', 'id', 'iu', 'ga', 'it', 'ja', 'jv', 'kl', 'kn', 'kk', 'kha', 'km', 'rw', 'ko', 'kri', 'ku', 'ky', 'lo', 'la', 'lv', 'ln', 'lt', 'lua', 'luo', 'lb', 'mk', 'mg', 'ms', 'ml', 'mt', 'gv', 'mi', 'mr', 'mn', 'mfe', 'ne', 'new', 'nso', 'no', 'ny', 'oc', 'or', 'om', 'os', 'pam', 'ps', 'fa', 'pl', 'pt', 'pt-PT', 'pa', 'qu', 'ro', 'rn', 'ru', 'sm', 'sg', 'sa', 'gd', 'sr', 'crs', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'st', 'es', 'su', 'sw', 'ss', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'bo', 'ti', 'to', 'ts', 'tn', '




Done.
Combined transcript file: channel_final


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>