<a href="https://colab.research.google.com/github/pakmingc/YouTube-AI-Summarizer_withPDF/blob/main/YouTube_AI_Summarizer_TextPDF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install youtube_transcript_api yt_dlp openai fpdf
from google.colab import drive, files
from youtube_transcript_api import YouTubeTranscriptApi
from yt_dlp import YoutubeDL
from getpass import getpass
import os
import re
import openai
from fpdf import FPDF

# Mount Google Drive
drive.mount('/content/drive')

In [None]:
def download_subs(video_id):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcript = None

        try:
            transcript = transcript_list.find_transcript(['en'])
        except:
            print("English subtitles are not available.")
            return None

        transcript.fetch()
        subs = []
        for line in transcript.fetch():
            subs.append(line['text'])
        return '\n'.join(subs)
    except Exception as e:
        print(f"Failed to download subtitles: {e}")
        return None

def get_video_title(video_id):
    ydl_opts = {}
    with YoutubeDL(ydl_opts) as ydl:
        info_dict = ydl.extract_info(video_id, download=False)
        return info_dict.get('title', None)

def save_subs_to_txt(title, subs):
    save_path_subs = f"/content/drive/My Drive/youtube_subtitles/{title}_subtitles.txt"
    with open(save_path_subs, 'w', encoding='utf-8') as f:
        f.write(subs)
    print(f"Subtitles saved to: {save_path_subs}")

def ask_generate_summary():
    while True:
        choice = input("Do you want to generate an AI summary? (Y/N): ")
        if choice.upper() == 'Y':
            return True
        elif choice.upper() == 'N':
            return False
        else:
            print("Invalid choice. Please enter 'Y' or 'N'.")

def ask_summary_format():
    while True:
        choice = input("Do you want the summary as text or PDF? (text/pdf): ")
        if choice.lower() == 'text':
            return 'text'
        elif choice.lower() == 'pdf':
            return 'pdf'
        else:
            print("Invalid choice. Please enter 'text' or 'pdf'.")

def generate_summary(api_key, subs, model, language, max_tokens=3000):
    openai.api_key = api_key

    summaries = []
    chunk_size = 6000

    for i in range(0, len(subs), chunk_size):
        chunk = subs[i:i+chunk_size]
        prompt = f"Please generate a summary of the following YouTube video subtitles. Respond in {language}.\n\n{chunk}"

        if model.startswith('gpt-3.5-turbo') or model.startswith('gpt-4'):
            response = openai.ChatCompletion.create(
                model=model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=max_tokens,
                n=1,
                stop=None,
                temperature=0.7
            )
            summaries.append(response['choices'][0]['message']['content'].strip())
        else:
            return "Error: Selected model is not supported. Please choose either 'gpt-3.5-turbo' or 'gpt-4'."

    return '\n'.join(summaries)

def save_summary_to_pdf(title, summary, language):
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, summary)

    save_path_summary = f"/content/drive/My Drive/youtube_subtitles/{title}_summary_{language}.pdf"
    pdf.output(save_path_summary)
    print(f"Summary saved to: {save_path_summary}")

    return save_path_summary

# List of available languages
languages = [
    "English",
    "Chinese (Simplified)",
    "Chinese (Traditional)",
    "Spanish",
    "French",
    "German",
    "Italian",
    "Japanese",
    "Korean",
    "Portuguese",
    "Russian",
    "Arabic",
    "Hindi",
    "Bengali",
    "Indonesian",
    "Urdu",
    "Vietnamese",
    "Turkish",
    "Persian",
    "Thai"
]

# Prompt user for OpenAI API key
api_key = getpass("Enter your OpenAI API key: ")
print("API key received. Continuing...")

# List of available OpenAI language models
models = [
    'gpt-3.5-turbo',
    'gpt-4'
]

# Prompt user to select a language model
print("Select an OpenAI language model:")
for i, model in enumerate(models):
    print(f"{i+1}. {model}")
model_choice = int(input("Enter the number corresponding to your language model choice: "))
model = models[model_choice - 1]

while True:
    # Prompt user for YouTube video URL or ID
    video_url_or_id = input("Enter the YouTube video URL or ID (or type 'the end' to quit): ")
    if video_url_or_id.lower() == 'the end':
        print("Program ended.")
        break

    video_id = re.search(r'(?<=v=)[^&#]+', video_url_or_id)
    video_id = video_id.group(0) if video_id else video_url_or_id

    title = get_video_title(video_id)
    if not title:
        print("Unable to retrieve video title.")
        continue

    subs = download_subs(video_id)
    if not subs:
        print("Subtitles are not available. Please enter another YouTube video URL or ID.")
        continue

    save_subs_to_txt(title, subs)

    if ask_generate_summary():
        # Prompt user to select the language for AI-generated summary
        print("Select the language for the AI-generated summary:")
        for i, lang in enumerate(languages):
            print(f"{i+1}. {lang}")
        lang_choice = int(input("Enter the number corresponding to your language choice: "))
        language = languages[lang_choice - 1]

        summary = generate_summary(api_key, subs, model, language)

        summary_format = ask_summary_format()

        if summary_format == 'text':
            print("\nSummary:")
            print(summary)
        else:
            pdf_path = save_summary_to_pdf(title, summary, language)
            print(f"\nSummary PDF: {pdf_path}")
            files.download(pdf_path)
    else:
        print("AI summary generation skipped.")
        continue
