<a href="https://colab.research.google.com/github/pakmingc/YouTube-AI-Summarizer_withPDF/blob/main/YouTube_AI_Summarizer_withPDF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def generate_summary(api_key, subs, model, language, max_tokens=3000):
    openai.api_key = api_key

    summaries = []
    chunk_size = 6000

    for i in range(0, len(subs), chunk_size):
        chunk = subs[i:i+chunk_size]
        prompt = f"Please generate a summary of the following YouTube video subtitles. Respond in {language}.\n\n{chunk}"

        if model.startswith('gpt-3.5-turbo') or model.startswith('gpt-4'):
            response = openai.ChatCompletion.create(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=max_tokens,
                n=1,
                stop=None,
                temperature=0.7
            )
            summaries.append(response['choices'][0]['message']['content'].strip())
        else:
            return "Error: Selected model is not supported. Please choose either 'gpt-3.5-turbo' or 'gpt-4'."

    return '\n'.join(summaries)

def save_summary_to_pdf(title, summary, language):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)

    save_path_summary = f"/content/drive/My Drive/youtube_subtitles/{title}_summary_{language}.pdf"
    pdf.output(save_path_summary)
    print(f"Summary saved to: {save_path_summary}")

    return save_path_summary

def main(video_url_or_id, api_key, model, language):
    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        return

    subs = download_subs(video_id)
    if not subs:
        print("Unable to download subtitles. Skipping summary generation.")
        return

    summary = generate_summary(api_key, subs, model, language)

    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

    pdf_path = save_summary_to_pdf(title, summary, language)
    print(f"\nSummary PDF: {pdf_path}")

    files.download(pdf_path)

# List of available languages
languages = [
    "English",
    "Chinese (Simplified)",
    "Chinese (Traditional)",
    "Spanish",
    "French",
    "German",
    "Italian",
    "Japanese",
    "Korean",
    "Portuguese",
    "Russian",
    "Arabic",
    "Hindi",
    "Bengali",
    "Indonesian",
    "Urdu",
    "Vietnamese",
    "Turkish",
    "Persian",
    "Thai"
]

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")
    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    # Prompt user for OpenAI API key
    api_key = getpass("Enter your OpenAI API key: ")
    print("API key received. Continuing...")

    # List of available OpenAI language models
    models = [
        'gpt-3.5-turbo',
        'gpt-4'
    ]

    # Prompt user to select a language model
    print("Select an OpenAI language model:")
    for i, model in enumerate(models):
        print(f"{i+1}. {model}")
    model_choice = int(input("Enter the number corresponding to your language model choice: "))
    model = models[model_choice - 1]

    # Prompt user to select the language for AI-generated summary
    print("Select the language for the AI-generated summary:")
    for i, lang in enumerate(languages):
        print(f"{i+1}. {lang}")
    lang_choice = int(input("Enter the number corresponding to your language choice: "))
    language = languages[lang_choice - 1]

    # Run the main function
    main(video_url_or_id, api_key, model, language)

Enter the YouTube video URL or ID (or type 'the end' to quit): https://www.youtube.com/watch?v=s1M8scmaWxc
Enter your OpenAI API key: ··········
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 2
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 1
[youtube] Extracting URL: s1M8scmaWxc
[youtube] s1M8scmaWxc: Downloading webpage
[youtube] s1M8scmaWxc: Downloading ios player API JSON
[youtube] s1M8scmaWxc: Downloading android player API JSON




[youtube] s1M8scmaWxc: Downloading m3u8 information


APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742


In [None]:
!pip install youtube_transcript_api yt_dlp openai==0.28 fpdf

import os
import re
from youtube_transcript_api import YouTubeTranscriptApi
from yt_dlp import YoutubeDL
from google.colab import drive
import openai
from fpdf import FPDF
from getpass import getpass
from google.colab import files

# Mount Google Drive
drive.mount('/content/drive')

def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def generate_summary(api_key, subs, model, language, max_tokens=3000):
    openai.api_key = api_key

    summaries = []
    chunk_size = 6000

    for i in range(0, len(subs), chunk_size):
        chunk = subs[i:i+chunk_size]
        prompt = f"Please generate a summary of the following YouTube video subtitles. Respond in {language}.\n\n{chunk}"

        if model.startswith('gpt-3.5-turbo') or model.startswith('gpt-4'):
            response = openai.ChatCompletion.create(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=max_tokens,
                n=1,
                stop=None,
                temperature=0.7
            )
            summaries.append(response['choices'][0]['message']['content'].strip())
        else:
            return "Error: Selected model is not supported. Please choose either 'gpt-3.5-turbo' or 'gpt-4'."

    return '\n'.join(summaries)

def save_summary_to_pdf(title, summary, language):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)

    save_path_summary = f"/content/drive/My Drive/youtube_subtitles/{title}_summary_{language}.pdf"
    pdf.output(save_path_summary)
    print(f"Summary saved to: {save_path_summary}")

    return save_path_summary

def main(video_url_or_id, api_key, model, language):
    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        return

    subs = download_subs(video_id)
    if not subs:
        print("Unable to download subtitles. Skipping summary generation.")
        return

    summary = generate_summary(api_key, subs, model, language)

    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

    pdf_path = save_summary_to_pdf(title, summary, language)
    print(f"\nSummary PDF: {pdf_path}")

    files.download(pdf_path)

# List of available languages
languages = [
    "English",
    "Chinese (Simplified)",
    "Chinese (Traditional)",
    "Spanish",
    "French",
    "German",
    "Italian",
    "Japanese",
    "Korean",
    "Portuguese",
    "Russian",
    "Arabic",
    "Hindi",
    "Bengali",
    "Indonesian",
    "Urdu",
    "Vietnamese",
    "Turkish",
    "Persian",
    "Thai"
]

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")
    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    # Prompt user for OpenAI API key
    api_key = getpass("Enter your OpenAI API key: ")
    print("API key received. Continuing...")

    # List of available OpenAI language models
    models = [
        'gpt-3.5-turbo',
        'gpt-4'
    ]

    # Prompt user to select a language model
    print("Select an OpenAI language model:")
    for i, model in enumerate(models):
        print(f"{i+1}. {model}")
    model_choice = int(input("Enter the number corresponding to your language model choice: "))
    model = models[model_choice - 1]

    # Prompt user to select the language for AI-generated summary
    print("Select the language for the AI-generated summary:")
    for i, lang in enumerate(languages):
        print(f"{i+1}. {lang}")
    lang_choice = int(input("Enter the number corresponding to your language choice: "))
    language = languages[lang_choice - 1]

    # Run the main function
    main(video_url_or_id, api_key, model, language)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Enter the YouTube video URL or ID (or type 'the end' to quit): https://www.youtube.com/watch?v=bMIRhOXAjYk
Enter your OpenAI API key: ··········
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 2
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 3
[youtube] Extracting URL: bMIRhOXAjYk
[youtube] bMIRhOXAjYk: Downloading webpage
[youtube] bMIRhOXAjYk: Downloading ios player API JSON
[youtube] bMIRhOXAjYk: Downloading android player API JSON




[youtube] bMIRhOXAjYk: Downloading player 1ced3a71
[youtube] bMIRhOXAjYk: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/Nvidia 2024 AI Event: Everything Revealed in 16 Minutes_subtitles.txt


UnicodeEncodeError: 'latin-1' codec can't encode characters in position 50-95: ordinal not in range(256)

In [None]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def generate_summary(api_key, subs, model, language):
    openai.api_key = api_key

    prompt = f"Please generate a 3,000-word summary of the following YouTube video based on its subtitles. Respond in {language}.\n\n{subs}"

    if model.startswith('gpt-3.5-turbo') or model.startswith('gpt-4'):
        response = openai.ChatCompletion.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=3000,
            n=1,
            stop=None,
            temperature=0.7
        )
        return response['choices'][0]['message']['content'].strip()
    else:
        return "Error: Selected model is not supported. Please choose either 'gpt-3.5-turbo' or 'gpt-4'."

def save_summary_to_pdf(title, summary, language):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)

    save_path_summary = f"/content/drive/My Drive/youtube_subtitles/{title}_summary_{language}.pdf"
    pdf.output(save_path_summary)
    print(f"Summary saved to: {save_path_summary}")

    return save_path_summary

def main(video_url_or_id, api_key, model, language):
    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        return

    subs = download_subs(video_id)
    if not subs:
        print("Unable to download subtitles. Skipping summary generation.")
        return

    summary = generate_summary(api_key, subs, model, language)

    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

    pdf_path = save_summary_to_pdf(title, summary, language)
    print(f"\nSummary PDF: {pdf_path}")

    files.download(pdf_path)

# List of available languages
languages = [
    "English",
    "Chinese (Simplified)",
    "Chinese (Traditional)",
    "Spanish",
    "French",
    "German",
    "Italian",
    "Japanese",
    "Korean",
    "Portuguese",
    "Russian",
    "Arabic",
    "Hindi",
    "Bengali",
    "Indonesian",
    "Urdu",
    "Vietnamese",
    "Turkish",
    "Persian",
    "Thai"
]

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")
    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    # Prompt user for OpenAI API key
    api_key = getpass("Enter your OpenAI API key: ")
    print("API key received. Continuing...")

    # List of available OpenAI language models
    models = [
        'gpt-3.5-turbo',
        'gpt-4'
    ]

    # Prompt user to select a language model
    print("Select an OpenAI language model:")
    for i, model in enumerate(models):
        print(f"{i+1}. {model}")
    model_choice = int(input("Enter the number corresponding to your language model choice: "))
    model = models[model_choice - 1]

    # Prompt user to select the language for AI-generated summary
    print("Select the language for the AI-generated summary:")
    for i, lang in enumerate(languages):
        print(f"{i+1}. {lang}")
    lang_choice = int(input("Enter the number corresponding to your language choice: "))
    language = languages[lang_choice - 1]

    # Run the main function
    main(video_url_or_id, api_key, model, language)

Enter the YouTube video URL or ID (or type 'the end' to quit): https://www.youtube.com/watch?v=bMIRhOXAjYk
Enter your OpenAI API key: ··········
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 2
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 1
[youtube] Extracting URL: bMIRhOXAjYk
[youtube] bMIRhOXAjYk: Downloading webpage
[youtube] bMIRhOXAjYk: Downloading ios player API JSON
[youtube] bMIRhOXAjYk: Downloading android player API JSON




[youtube] bMIRhOXAjYk: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/Nvidia 2024 AI Event: Everything Revealed in 16 Minutes_subtitles.txt
Summary saved to: /content/drive/My Drive/youtube_subtitles/Nvidia 2024 AI Event: Everything Revealed in 16 Minutes_summary_English.pdf

Summary PDF: /content/drive/My Drive/youtube_subtitles/Nvidia 2024 AI Event: Everything Revealed in 16 Minutes_summary_English.pdf


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Enter the YouTube video URL or ID (or type 'the end' to quit): https://www.youtube.com/watch?v=bMIRhOXAjYk
Enter your OpenAI API key: ··········
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 2
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 3
[youtube] Extracting URL: bMIRhOXAjYk
[youtube] bMIRhOXAjYk: Downloading webpage
[youtube] bMIRhOXAjYk: Downloading ios player API JSON
[youtube] bMIRhOXAjYk: Downloading android player API JSON




[youtube] bMIRhOXAjYk: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/Nvidia 2024 AI Event: Everything Revealed in 16 Minutes_subtitles.txt


UnicodeEncodeError: 'latin-1' codec can't encode characters in position 50-99: ordinal not in range(256)

In [None]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def generate_summary(api_key, subs, model, language):
    openai.api_key = api_key

    prompt = f"Please generate a 3,000-word summary of the following YouTube video based on its subtitles. Respond in {language}.\n\n{subs}"

    if model.startswith('gpt-3.5-turbo') or model.startswith('gpt-4'):
        response = openai.ChatCompletion.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=3000,
            n=1,
            stop=None,
            temperature=0.7
        )
        return response['choices'][0]['message']['content'].strip()
    else:
        return "Error: Selected model is not supported. Please choose either 'gpt-3.5-turbo' or 'gpt-4'."

def save_summary_to_pdf(title, summary, language):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)

    save_path_summary = f"/content/drive/My Drive/youtube_subtitles/{title}_summary_{language}.pdf"
    pdf.output(save_path_summary)
    print(f"Summary saved to: {save_path_summary}")

    return save_path_summary

def main(video_url_or_id, api_key, model, language):
    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        return

    subs = download_subs(video_id)
    if not subs:
        print("Unable to download subtitles. Skipping summary generation.")
        return

    summary = generate_summary(api_key, subs, model, language)

    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

    pdf_path = save_summary_to_pdf(title, summary, language)
    print(f"\nSummary PDF: {pdf_path}")

# List of available languages
languages = [
    "English",
    "Chinese (Simplified)",
    "Chinese (Traditional)",
    "Spanish",
    "French",
    "German",
    "Italian",
    "Japanese",
    "Korean",
    "Portuguese",
    "Russian",
    "Arabic",
    "Hindi",
    "Bengali",
    "Indonesian",
    "Urdu",
    "Vietnamese",
    "Turkish",
    "Persian",
    "Thai"
]

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")
    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    # Prompt user for OpenAI API key
    api_key = input("Enter your OpenAI API key: ")
    print("API key received. Continuing...")

    # List of available OpenAI language models
    models = [
        'gpt-3.5-turbo',
        'gpt-4'
    ]

    # Prompt user to select a language model
    print("Select an OpenAI language model:")
    for i, model in enumerate(models):
        print(f"{i+1}. {model}")
    model_choice = int(input("Enter the number corresponding to your language model choice: "))
    model = models[model_choice - 1]

    # Prompt user to select the language for AI-generated summary
    print("Select the language for the AI-generated summary:")
    for i, lang in enumerate(languages):
        print(f"{i+1}. {lang}")
    lang_choice = int(input("Enter the number corresponding to your language choice: "))
    language = languages[lang_choice - 1]

    # Run the main function
    main(video_url_or_id, api_key, model, language)


Enter the YouTube video URL or ID (or type 'the end' to quit): https://www.youtube.com/watch?v=ybl8IvhGAJ4
Enter your OpenAI API key: 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
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 2
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 3
[youtube] Extracting URL: ybl8IvhGAJ4
[youtube] ybl8IvhGAJ4: Downloading webpage
[youtube] ybl8IvhGAJ4: Downloading ios player API JSON
[youtube] ybl8IvhGAJ4: Downloading android player API JSON




[youtube] ybl8IvhGAJ4: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/How Chinese EV Giant BYD Is Taking On Tesla_subtitles.txt


UnicodeEncodeError: 'latin-1' codec can't encode characters in position 50-147: ordinal not in range(256)

In [None]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def generate_summary(api_key, subs, model, language):
    openai.api_key = api_key

    prompt = f"Please generate a 3,000-word summary of the following YouTube video based on its subtitles. Respond in {language}.\n\n{subs}"

    if model.startswith('gpt-3.5-turbo') or model.startswith('gpt-4'):
        response = openai.ChatCompletion.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            max_tokens=3000,
            n=1,
            stop=None,
            temperature=0.7
        )
        return response.choices[0].message['content'].strip()
    else:
        return "Error: Selected model is not supported. Please choose either 'gpt-3.5-turbo' or 'gpt-4'."

def save_summary_to_pdf(title, summary, language):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)

    save_path_summary = f"/content/drive/My Drive/youtube_subtitles/{title}_summary_{language}.pdf"
    pdf.output(save_path_summary)
    print(f"Summary saved to: {save_path_summary}")

    return save_path_summary

def main(video_url_or_id, api_key, model, language):
    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        return

    subs = download_subs(video_id)
    if not subs:
        print("Unable to download subtitles. Skipping summary generation.")
        return

    summary = generate_summary(api_key, subs, model, language)

    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

    pdf_path = save_summary_to_pdf(title, summary, language)
    print(f"\nSummary PDF: {pdf_path}")

# List of available languages
languages = [
    "English",
    "Chinese (Simplified)",
    "Chinese (Traditional)",
    "Spanish",
    "French",
    "German",
    "Italian",
    "Japanese",
    "Korean",
    "Portuguese",
    "Russian",
    "Arabic",
    "Hindi",
    "Bengali",
    "Indonesian",
    "Urdu",
    "Vietnamese",
    "Turkish",
    "Persian",
    "Thai"
]

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")
    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    # Prompt user for OpenAI API key
    api_key = input("Enter your OpenAI API key: ")
    print("API key received. Continuing...")

    # List of available OpenAI language models
    models = [
        'gpt-3.5-turbo',
        'gpt-4'
    ]

    # Prompt user to select a language model
    print("Select an OpenAI language model:")
    for i, model in enumerate(models):
        print(f"{i+1}. {model}")
    model_choice = int(input("Enter the number corresponding to your language model choice: "))
    model = models[model_choice - 1]

    # Prompt user to select the language for AI-generated summary
    print("Select the language for the AI-generated summary:")
    for i, lang in enumerate(languages):
        print(f"{i+1}. {lang}")
    lang_choice = int(input("Enter the number corresponding to your language choice: "))
    language = languages[lang_choice - 1]

    # Run the main function
    main(video_url_or_id, api_key, model, language)


Enter the YouTube video URL or ID (or type 'the end' to quit): https://www.youtube.com/watch?v=W2h1QDNaBtI
Enter your OpenAI API key: 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
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 2
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 1
[youtube] Extracting URL: W2h1QDNaBtI
[youtube] W2h1QDNaBtI: Downloading webpage
[youtube] W2h1QDNaBtI: Downloading ios player API JSON
[youtube] W2h1QDNaBtI: Downloading android player API JSON




[youtube] W2h1QDNaBtI: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/The 5 Richest Billionaires In The World 2024_subtitles.txt
Summary saved to: /content/drive/My Drive/youtube_subtitles/The 5 Richest Billionaires In The World 2024_summary_English.pdf

Summary PDF: /content/drive/My Drive/youtube_subtitles/The 5 Richest Billionaires In The World 2024_summary_English.pdf
Enter the YouTube video URL or ID (or type 'the end' to quit): https://www.youtube.com/watch?v=U-I2oFkVt_4
Enter your OpenAI API key: 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
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 2
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. In



[youtube] U-I2oFkVt_4: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/The Richest Crypto And Bitcoin Billionaires In The World 2024_subtitles.txt


UnicodeEncodeError: 'latin-1' codec can't encode characters in position 50-51: ordinal not in range(256)

In [1]:
!pip install youtube_transcript_api yt_dlp openai==0.28 fpdf

import os
import re
from youtube_transcript_api import YouTubeTranscriptApi
from yt_dlp import YoutubeDL
from google.colab import drive
import openai
from fpdf import FPDF
from getpass import getpass
from google.colab import files

# Mount Google Drive
drive.mount('/content/drive')

Collecting youtube_transcript_api
  Downloading youtube_transcript_api-0.6.2-py3-none-any.whl (24 kB)
Collecting yt_dlp
  Downloading yt_dlp-2024.3.10-py3-none-any.whl (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting brotli (from yt_dlp)
  Downloading Brotli-1.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
Collecting mutagen (from yt_dlp)
  Downloading mutagen-1.47.0-py3-none-any.whl (194 kB)
[2K     [90

In [10]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def generate_summary(api_key, subs, model, language, max_tokens=3000):
    openai.api_key = api_key

    summaries = []
    chunk_size = 6000

    for i in range(0, len(subs), chunk_size):
        chunk = subs[i:i+chunk_size]
        prompt = f"Please generate a summary of the following YouTube video subtitles. Respond in {language}.\n\n{chunk}"

        if model.startswith('gpt-3.5-turbo') or model.startswith('gpt-4'):
            response = openai.ChatCompletion.create(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=max_tokens,
                n=1,
                stop=None,
                temperature=0.7
            )
            summaries.append(response['choices'][0]['message']['content'].strip())
        else:
            return "Error: Selected model is not supported. Please choose either 'gpt-3.5-turbo' or 'gpt-4'."

    return '\n'.join(summaries)

def save_summary_to_pdf(title, summary, language):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)

    save_path_summary = f"/content/drive/My Drive/youtube_subtitles/{title}_summary_{language}.pdf"
    pdf.output(save_path_summary)
    print(f"Summary saved to: {save_path_summary}")

    return save_path_summary

def main(video_url_or_id, api_key, model, language):
    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        return

    subs = download_subs(video_id)
    if not subs:
        print("Subtitles are not available. Please enter another YouTube video URL or ID.")
        return

    summary = generate_summary(api_key, subs, model, language)

    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

    pdf_path = save_summary_to_pdf(title, summary, language)
    print(f"\nSummary PDF: {pdf_path}")

    files.download(pdf_path)

# List of available languages
languages = [
    "English",
    "Chinese (Simplified)",
    "Chinese (Traditional)",
    "Spanish",
    "French",
    "German",
    "Italian",
    "Japanese",
    "Korean",
    "Portuguese",
    "Russian",
    "Arabic",
    "Hindi",
    "Bengali",
    "Indonesian",
    "Urdu",
    "Vietnamese",
    "Turkish",
    "Persian",
    "Thai"
]

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")
    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    # Prompt user for OpenAI API key
    api_key = getpass("Enter your OpenAI API key: ")
    print("API key received. Continuing...")

    # List of available OpenAI language models
    models = [
        'gpt-3.5-turbo',
        'gpt-4'
    ]

    # Prompt user to select a language model
    print("Select an OpenAI language model:")
    for i, model in enumerate(models):
        print(f"{i+1}. {model}")
    model_choice = int(input("Enter the number corresponding to your language model choice: "))
    model = models[model_choice - 1]

    # Prompt user to select the language for AI-generated summary
    print("Select the language for the AI-generated summary:")
    for i, lang in enumerate(languages):
        print(f"{i+1}. {lang}")
    lang_choice = int(input("Enter the number corresponding to your language choice: "))
    language = languages[lang_choice - 1]

    # Run the main function
    main(video_url_or_id, api_key, model, language)

Enter the YouTube video URL or ID (or type 'the end' to quit): sGZ6AlAnULc
Enter your OpenAI API key: ··········
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 1
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 1
[youtube] Extracting URL: sGZ6AlAnULc
[youtube] sGZ6AlAnULc: Downloading webpage
[youtube] sGZ6AlAnULc: Downloading ios player API JSON
[youtube] sGZ6AlAnULc: Downloading android player API JSON




[youtube] sGZ6AlAnULc: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/The most important AI trends in 2024_subtitles.txt
Summary saved to: /content/drive/My Drive/youtube_subtitles/The most important AI trends in 2024_summary_English.pdf

Summary PDF: /content/drive/My Drive/youtube_subtitles/The most important AI trends in 2024_summary_English.pdf


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Enter the YouTube video URL or ID (or type 'the end' to quit): sGZ6AlAnULc
Enter your OpenAI API key: ··········
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 1
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 3
[youtube] Extracting URL: sGZ6AlAnULc
[youtube] sGZ6AlAnULc: Downloading webpage
[youtube] sGZ6AlAnULc: Downloading ios player API JSON
[youtube] sGZ6AlAnULc: Downloading android player API JSON




[youtube] sGZ6AlAnULc: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/The most important AI trends in 2024_subtitles.txt


UnicodeEncodeError: 'latin-1' codec can't encode characters in position 50-57: ordinal not in range(256)

In [20]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def save_subs_to_txt(title, subs):
    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'end' to quit): ")
    if video_url_or_id.lower() == 'end':
        print("Program ended.")
        break

    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        continue

    subs = download_subs(video_id)
    if not subs:
        print("Subtitles are not available. Please enter another YouTube video URL or ID.")
        continue

    save_subs_to_txt(title, subs)

Enter the YouTube video URL or ID (or type 'end' to quit): 01OzXMCqjLk
[youtube] Extracting URL: 01OzXMCqjLk
[youtube] 01OzXMCqjLk: Downloading webpage
[youtube] 01OzXMCqjLk: Downloading ios player API JSON
[youtube] 01OzXMCqjLk: Downloading android player API JSON




[youtube] 01OzXMCqjLk: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/I Spent 100+ Hours with 4 Billionaires (here’s what I learned)_subtitles.txt
Enter the YouTube video URL or ID (or type 'end' to quit): tUB_mmBXG9Y
[youtube] Extracting URL: tUB_mmBXG9Y
[youtube] tUB_mmBXG9Y: Downloading webpage
[youtube] tUB_mmBXG9Y: Downloading ios player API JSON
[youtube] tUB_mmBXG9Y: Downloading android player API JSON




[youtube] tUB_mmBXG9Y: Downloading m3u8 information
English subtitles are not available.
Subtitles are not available. Please enter another YouTube video URL or ID.
Enter the YouTube video URL or ID (or type 'end' to quit): fiOhCJveNXM
[youtube] Extracting URL: fiOhCJveNXM
[youtube] fiOhCJveNXM: Downloading webpage
[youtube] fiOhCJveNXM: Downloading ios player API JSON
[youtube] fiOhCJveNXM: Downloading android player API JSON




[youtube] fiOhCJveNXM: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/AI大時代！程式交易能否取替人手交易？_subtitles.txt


KeyboardInterrupt: Interrupted by user

In [16]:
def ask_generate_summary():
    while True:
        choice = input("Do you want to generate an AI summary? (Y/N): ")
        if choice.upper() == 'Y':
            return True
        elif choice.upper() == 'N':
            return False
        else:
            print("Invalid choice. Please enter 'Y' or 'N'.")

if ask_generate_summary():
    # Prompt user to select the language for AI-generated summary
    print("Select the language for the AI-generated summary:")
    for i, lang in enumerate(languages):
        print(f"{i+1}. {lang}")
    lang_choice = int(input("Enter the number corresponding to your language choice: "))
    language = languages[lang_choice - 1]
else:
    print("AI summary generation skipped.")
    continue

SyntaxError: 'continue' not properly in loop (<ipython-input-16-6df7f20198fe>, line 20)

In [None]:
def ask_summary_format():
    while True:
        choice = input("Do you want the summary as text or PDF? (text/pdf): ")
        if choice.lower() == 'text':
            return 'text'
        elif choice.lower() == 'pdf':
            return 'pdf'
        else:
            print("Invalid choice. Please enter 'text' or 'pdf'.")

summary_format = ask_summary_format()

In [None]:
!pip install openai fpdf
from google.colab import files

import openai
from fpdf import FPDF

# Prompt user for OpenAI API key
api_key = getpass("Enter your OpenAI API key: ")
print("API key received. Continuing...")

# List of available OpenAI language models
models = [
    'gpt-3.5-turbo',
    'gpt-4'
]

# Prompt user to select a language model
print("Select an OpenAI language model:")
for i, model in enumerate(models):
    print(f"{i+1}. {model}")
model_choice = int(input("Enter the number corresponding to your language model choice: "))
model = models[model_choice - 1]

def generate_summary(api_key, subs, model, language, max_tokens=3000):
    openai.api_key = api_key

    summaries = []
    chunk_size = 6000

    for i in range(0, len(subs), chunk_size):
        chunk = subs[i:i+chunk_size]
        prompt = f"Please generate a summary of the following YouTube video subtitles. Respond in {language}.\n\n{chunk}"

        if model.startswith('gpt-3.5-turbo') or model.startswith('gpt-4'):
            response = openai.ChatCompletion.create(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=max_tokens,
                n=1,
                stop=None,
                temperature=0.7
            )
            summaries.append(response['choices'][0]['message']['content'].strip())
        else:
            return "Error: Selected model is not supported. Please choose either 'gpt-3.5-turbo' or 'gpt-4'."

    return '\n'.join(summaries)

def save_summary_to_pdf(title, summary, language):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)

    save_path_summary = f"/content/drive/My Drive/youtube_subtitles/{title}_summary_{language}.pdf"
    pdf.output(save_path_summary)
    print(f"Summary saved to: {save_path_summary}")

    return save_path_summary

summary = generate_summary(api_key, subs, model, language)

if summary_format == 'text':
    print("\nSummary:")
    print(summary)
else:
    pdf_path = save_summary_to_pdf(title, summary, language)
    print(f"\nSummary PDF: {pdf_path}")
    files.download(pdf_path)

In [21]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def save_subs_to_txt(title, subs):
    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

def ask_generate_summary():
    while True:
        choice = input("Do you want to generate an AI summary? (Y/N): ")
        if choice.upper() == 'Y':
            return True
        elif choice.upper() == 'N':
            return False
        else:
            print("Invalid choice. Please enter 'Y' or 'N'.")

# List of available languages
languages = [
    "English",
    "Chinese (Simplified)",
    "Chinese (Traditional)",
    "Spanish",
    "French",
    "German",
    "Italian",
    "Japanese",
    "Korean",
    "Portuguese",
    "Russian",
    "Arabic",
    "Hindi",
    "Bengali",
    "Indonesian",
    "Urdu",
    "Vietnamese",
    "Turkish",
    "Persian",
    "Thai"
]

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")
    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        continue

    subs = download_subs(video_id)
    if not subs:
        print("Subtitles are not available. Please enter another YouTube video URL or ID.")
        continue

    save_subs_to_txt(title, subs)

    if ask_generate_summary():
        # Prompt user to select the language for AI-generated summary
        print("Select the language for the AI-generated summary:")
        for i, lang in enumerate(languages):
            print(f"{i+1}. {lang}")
        lang_choice = int(input("Enter the number corresponding to your language choice: "))
        language = languages[lang_choice - 1]
    else:
        print("AI summary generation skipped.")
        continue


Enter the YouTube video URL or ID (or type 'the end' to quit): lBUrdGWoUXE
[youtube] Extracting URL: lBUrdGWoUXE
[youtube] lBUrdGWoUXE: Downloading webpage
[youtube] lBUrdGWoUXE: Downloading ios player API JSON
[youtube] lBUrdGWoUXE: Downloading android player API JSON




[youtube] lBUrdGWoUXE: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/Here's How ChatGPT 5 Will Change the World Forever_subtitles.txt
Do you want to generate an AI summary? (Y/N): y
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 3


KeyboardInterrupt: Interrupted by user

In [24]:
!pip install youtube_transcript_api yt_dlp openai fpdf
from google.colab import drive, files
from youtube_transcript_api import YouTubeTranscriptApi
from yt_dlp import YoutubeDL
from getpass import getpass
import os
import re
import openai
from fpdf import FPDF

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Enter your OpenAI API key: ··········
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: https://www.youtube.com/watch?v=vGxR98gI930


ValueError: invalid literal for int() with base 10: 'https://www.youtube.com/watch?v=vGxR98gI930'

In [31]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def save_subs_to_txt(title, subs):
    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

def ask_generate_summary():
    while True:
        choice = input("Do you want to generate an AI summary? (Y/N): ")
        if choice.upper() == 'Y':
            return True
        elif choice.upper() == 'N':
            return False
        else:
            print("Invalid choice. Please enter 'Y' or 'N'.")

def ask_summary_format():
    while True:
        choice = input("Do you want the summary as text or PDF? (text/pdf): ")
        if choice.lower() == 'text':
            return 'text'
        elif choice.lower() == 'pdf':
            return 'pdf'
        else:
            print("Invalid choice. Please enter 'text' or 'pdf'.")

def generate_summary(api_key, subs, model, language, max_tokens=3000):
    openai.api_key = api_key

    summaries = []
    chunk_size = 6000

    for i in range(0, len(subs), chunk_size):
        chunk = subs[i:i+chunk_size]
        prompt = f"Please generate a summary of the following YouTube video subtitles. Respond in {language}.\n\n{chunk}"

        if model.startswith('gpt-3.5-turbo') or model.startswith('gpt-4'):
            response = openai.ChatCompletion.create(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=max_tokens,
                n=1,
                stop=None,
                temperature=0.7
            )
            summaries.append(response['choices'][0]['message']['content'].strip())
        else:
            return "Error: Selected model is not supported. Please choose either 'gpt-3.5-turbo' or 'gpt-4'."

    return '\n'.join(summaries)

def save_summary_to_pdf(title, summary, language):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)

    save_path_summary = f"/content/drive/My Drive/youtube_subtitles/{title}_summary_{language}.pdf"
    pdf.output(save_path_summary)
    print(f"Summary saved to: {save_path_summary}")

    return save_path_summary

# List of available languages
languages = [
    "English",
    "Chinese (Simplified)",
    "Chinese (Traditional)",
    "Spanish",
    "French",
    "German",
    "Italian",
    "Japanese",
    "Korean",
    "Portuguese",
    "Russian",
    "Arabic",
    "Hindi",
    "Bengali",
    "Indonesian",
    "Urdu",
    "Vietnamese",
    "Turkish",
    "Persian",
    "Thai"
]

# Prompt user for OpenAI API key
api_key = getpass("Enter your OpenAI API key: ")
print("API key received. Continuing...")

# List of available OpenAI language models
models = [
    'gpt-3.5-turbo',
    'gpt-4'
]

# Prompt user to select a language model
print("Select an OpenAI language model:")
for i, model in enumerate(models):
    print(f"{i+1}. {model}")
model_choice = int(input("Enter the number corresponding to your language model choice: "))
model = models[model_choice - 1]

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")
    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        continue

    subs = download_subs(video_id)
    if not subs:
        print("Subtitles are not available. Please enter another YouTube video URL or ID.")
        continue

    save_subs_to_txt(title, subs)

    if ask_generate_summary():
        # Prompt user to select the language for AI-generated summary
        print("Select the language for the AI-generated summary:")
        for i, lang in enumerate(languages):
            print(f"{i+1}. {lang}")
        lang_choice = int(input("Enter the number corresponding to your language choice: "))
        language = languages[lang_choice - 1]

        summary = generate_summary(api_key, subs, model, language)

        summary_format = ask_summary_format()

        if summary_format == 'text':
            print("\nSummary:")
            print(summary)
        else:
            pdf_path = save_summary_to_pdf(title, summary, language)
            print(f"\nSummary PDF: {pdf_path}")
            files.download(pdf_path)
    else:
        print("AI summary generation skipped.")
        continue


Enter your OpenAI API key: ··········
API key received. Continuing...
Select an OpenAI language model:
1. gpt-3.5-turbo
2. gpt-4
Enter the number corresponding to your language model choice: 2
Enter the YouTube video URL or ID (or type 'the end' to quit): z8RVnPRNQvo
[youtube] Extracting URL: z8RVnPRNQvo
[youtube] z8RVnPRNQvo: Downloading webpage
[youtube] z8RVnPRNQvo: Downloading ios player API JSON
[youtube] z8RVnPRNQvo: Downloading android player API JSON




[youtube] z8RVnPRNQvo: Downloading m3u8 information
Subtitles saved to: /content/drive/My Drive/youtube_subtitles/Best AI Side Hustles You Can Start With $0 In 2024_subtitles.txt
Do you want to generate an AI summary? (Y/N): y
Select the language for the AI-generated summary:
1. English
2. Chinese (Simplified)
3. Chinese (Traditional)
4. Spanish
5. French
6. German
7. Italian
8. Japanese
9. Korean
10. Portuguese
11. Russian
12. Arabic
13. Hindi
14. Bengali
15. Indonesian
16. Urdu
17. Vietnamese
18. Turkish
19. Persian
20. Thai
Enter the number corresponding to your language choice: 3
Do you want the summary as text or PDF? (text/pdf): text

Summary:
在這部影片中，主講者分享了九個無需投資就能透過AI賺錢的副業機會。首先，他談到利用AI軟體為黑白照片上色，這項服務在Fiverr等網站上約能賺取5到10美元。其次，他提到利用AI模型拍攝服裝廣告，這種服務能夠節省業主的模特和拍攝成本。第三，他談到利用AI為視頻添加字幕，這項服務的收費根據視頻長度而定。最後，他提到利用AI為產品照片添加或移除背景，這項服務在Fiverr等網站上約能賺取5美元。他強調，雖然這些副業看似簡單，但要賺取大量利潤還需要創新和獨特的營銷策略。
這段字幕主要討論了利用AI和CapCut工具進行各種創作與賺錢的方法。首先，他提到利用AI進行照片編輯，儘管需要一些攝影知識，但有賺錢的潛力。然後他提到社交媒體圖形設計，透過長期合作夥伴關係提供圖形設計服務，並利用