<a href="https://colab.research.google.com/github/pakmingc/download-youtube-subtitles/blob/main/download_youtube_subtitles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import os
import re
from youtube_transcript_api import YouTubeTranscriptApi
from yt_dlp import YoutubeDL
from google.colab import drive

# Install required libraries
!pip install youtube_transcript_api
!pip install yt_dlp

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        primary_transcript = None

        # Prioritize finding English subtitles
        try:
            primary_transcript = transcript_list.find_transcript(['en'])
        except:
            pass

        # If English subtitles are not available, look for Chinese subtitles
        if not primary_transcript:
            try:
                primary_transcript = transcript_list.find_transcript([
                    'yue', 'yue-HK', 'zh', 'zh-HK', 'zh-CN', 'zh-Hans',
                    'zh-SG', 'zh-Hant', 'zh-TW'
                ])
            except:
                print("English and all possible Chinese subtitles are not available.")
                return None

        primary_transcript.fetch()
        subs = []
        for line in primary_transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def main(video_url_or_id):
    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        return

    subs = download_subs(video_id)
    if subs:
        save_path = f"/content/drive/My Drive/youtube_subtitles/{title}.txt"
        with open(save_path, 'w', encoding='utf-8') as f:
            f.write(subs)
        print(f"Subtitles saved to: {save_path}")
        print("Subtitle content:")
        print(subs)
    else:
        print("Unable to download subtitles.")

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")

    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    # Run the main function
    main(video_url_or_id)

Enter the YouTube video URL or ID (or type 'the end' to quit): https://www.youtube.com/watch?v=WxYC9-hBM_g
[youtube] Extracting URL: WxYC9-hBM_g
[youtube] WxYC9-hBM_g: Downloading webpage
[youtube] WxYC9-hBM_g: Downloading ios player API JSON
[youtube] WxYC9-hBM_g: Downloading android player API JSON
[youtube] WxYC9-hBM_g: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/Run your own AI (but private).txt
Subtitle content:
I'm running something called private
ai. It's kind of like chat GPT,
except it's not. Everything about it
is running right here on my computer.
Am I even connected to the internet?
This is private contained and my data
isn't being shared with some random
company. So in this video I
want to do two things. First,
I want to show you how to set this up.
It is ridiculously easy and fast to run
your own AI on your laptop computer or
whatever. It's this is free, it's amazing.
It'll take you about five minutes and
if you stick around 