In [9]:
import os
import yt_dlp
import re
from youtube_transcript_api import YouTubeTranscriptApi
from pytube import YouTube, Playlist
import shutil

# Function to generate a .txt file with the list of videos in the playlist
def generate_video_list_file(playlist_title, video_titles):
    """
    Creates or replaces a .txt file with the list of all videos in the playlist with their index numbers.
    """
    list_file_path = os.path.join(os.getcwd(), f"{sanitize_filename(playlist_title)}_video_list.txt")
    
    # Open the file in write mode to overwrite the content
    with open(list_file_path, "w", encoding="utf-8") as file:
        for idx, video_title in enumerate(video_titles, start=1):
            file.write(f"{idx:02d}. {video_title}\n")
    
    print(f"Video list saved to {list_file_path}")
    
# Helper function to clean invalid characters from filenames
def sanitize_filename(filename):
    """
    Replace invalid characters in filenames with underscores.
    This handles characters that are not allowed in file names on Windows and other systems.
    """
    return re.sub(r'[<>:"/\\|?*]', '_', filename)

# Function to clean the specified directory
def clean_directory(directory):
    for filename in os.listdir(directory):
        file_path = os.path.join(directory, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  # Remove the file or link
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)  # Remove the directory
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")

# Function to download subtitles in both txt and srt format for a video
def download_youtube_subtitles(video_id, directory, video_title, language='en'):
    """
    Downloads subtitles for the given video ID and saves them as .txt and .srt in the specified directory.
    """
    try:
        # Get the transcript (subtitles)
        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])

        # Combine all the text into one string for .txt
        subtitle_text = "\n".join([item['text'] for item in transcript])

        # Save the subtitle as .txt file
        txt_file_path = os.path.join(directory, f"{video_title}.txt")
        with open(txt_file_path, "w", encoding="utf-8") as file:
            file.write(subtitle_text)
        print(f"Subtitles (TXT) for '{video_title}' saved.")

        # Save the subtitle as .srt file
        srt_file_path = os.path.join(directory, f"{video_title}.srt")
        with open(srt_file_path, "w", encoding="utf-8") as srt_file:
            for index, item in enumerate(transcript, start=1):
                start_time = item['start']
                duration = item['duration']
                end_time = start_time + duration
                srt_file.write(f"{index}\n")
                srt_file.write(f"{format_time(start_time)} --> {format_time(end_time)}\n")
                srt_file.write(f"{item['text']}\n\n")
        print(f"Subtitles (SRT) for '{video_title}' saved.")

    except Exception as e:
        print(f"An error occurred with video ID {video_id}: {e}")

# Helper function to format time for SRT files
def format_time(seconds):
    milliseconds = int((seconds - int(seconds)) * 1000)
    seconds = int(seconds)
    minutes, seconds = divmod(seconds, 60)
    hours, minutes = divmod(minutes, 60)
    return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"

# Helper function to download a single video using yt-dlp
# Modified download_single_video function to accept a custom filename
def download_single_video(video_id, folder_path, video_filename):
    video_url = f"https://www.youtube.com/watch?v={video_id}"
    
    yt = YouTube(video_url)
    sanitized_filename = sanitize_filename(video_filename)
    
    video_file = os.path.join(folder_path, f"{sanitized_filename}.mp4")
    
    # Skip if the video already exists
    if os.path.exists(video_file):
        print(f"Video '{video_filename}' already exists. Skipping download.")
        return

    # Download the best video and audio available and merge them
    print(f"Downloading video: {video_filename}")
    ydl_opts = {
        'outtmpl': video_file,
        'format': 'bestvideo+bestaudio/best',  # Download best video and best audio and merge
        'merge_output_format': 'mp4',          # Merge the video and audio into mp4
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([video_url])
    print(f"Downloaded: {video_filename}")

    # Download subtitles in both .txt and .srt formats
    download_youtube_subtitles(video_id, folder_path, sanitized_filename)

# Function to download videos and subtitles for a playlist
def download_playlist(playlist_url, max_videos):
    playlist = Playlist(playlist_url)
    video_ids = [video_url.split("v=")[-1] for video_url in playlist.video_urls]

    channel_name = sanitize_filename(playlist.owner)
    playlist_title = sanitize_filename(playlist.title)

    # Create a folder for the channel
    channel_folder_path = os.path.join(os.getcwd(), channel_name)
    os.makedirs(channel_folder_path, exist_ok=True)

    # Create a subfolder for the playlist
    playlist_folder_path = os.path.join(channel_folder_path, playlist_title)
    os.makedirs(playlist_folder_path, exist_ok=True)

    print(f"Downloading playlist: {playlist_title}")
    
    # Prepare a list to store the video titles
    video_titles = []
    
    # Download videos with numbered filenames
    print("Starting video download...")
    for idx, video_id in enumerate(video_ids[:max_videos]):
        video_url = f"https://www.youtube.com/watch?v={video_id}"
        yt = YouTube(video_url)
        video_title = sanitize_filename(yt.title)
        video_titles.append(video_title)  # Store the title for the list file

        # Prefix video order number to the title
        video_filename = f"{idx + 1:02d}. {video_title}"

        # Download the video and subtitles with the indexed filename
        download_single_video(video_id, playlist_folder_path, video_filename)

    # Generate the video list text file after downloading
    generate_video_list_file(playlist_title, video_titles)


# Main function to download either a playlist or a single video
def download_youtube_content():
    url = input("Please enter the YouTube URL (video or playlist): ").strip()
    
    if "playlist" in url:
        max_videos = int(input("Enter the maximum number of new videos to download: "))
        download_playlist(url, max_videos)
    else:
        video_id = url.split("v=")[-1]
        yt = YouTube(url)
        channel_name = sanitize_filename(yt.author)
        folder_path = os.path.join(os.getcwd(), channel_name)
        os.makedirs(folder_path, exist_ok=True)
        
        print("Starting subtitle download for a single video...")
        download_single_video(video_id, folder_path)

# Start the program
download_youtube_content()


Please enter the YouTube URL (video or playlist):  https://www.youtube.com/playlist?list=PLblh5JKOoLUIxGDQs4LFFD--41Vzf-ME1
Enter the maximum number of new videos to download:  100


Downloading playlist: Neural Networks _ Deep Learning
Starting video download...
Video '01. Happy Halloween (Neural Networks Are Not Scary)' already exists. Skipping download.
Video '02. The Essential Main Ideas of Neural Networks' already exists. Skipping download.
Video '03. The Chain Rule' already exists. Skipping download.
Video '04. Gradient Descent, Step-by-Step' already exists. Skipping download.
Video '05. Neural Networks Pt. 2_ Backpropagation Main Ideas' already exists. Skipping download.
Video '06. Backpropagation Details Pt. 1_ Optimizing 3 parameters simultaneously.' already exists. Skipping download.
Video '07. Backpropagation Details Pt. 2_ Going bonkers with The Chain Rule' already exists. Skipping download.
Video '08. Neural Networks Pt. 3_ ReLU In Action!!!' already exists. Skipping download.
Video '09. Neural Networks Pt. 4_ Multiple Inputs and Outputs' already exists. Skipping download.
Video '10. Neural Networks Part 5_ ArgMax and SoftMax' already exists. Skipping 