In [1]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.probability import FreqDist
from nltk.stem import WordNetLemmatizer
from tkinter import messagebox, scrolledtext
import tkinter as tk
from youtube_transcript_api import YouTubeTranscriptApi

In [3]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /Users/anand/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/anand/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/anand/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [5]:
#preprocessing
def preprocess_text(text):
    lemmatizer = WordNetLemmatizer()
    stop_words = set(stopwords.words('english'))
    words = word_tokenize(text.lower())
    filtered_words = [
        lemmatizer.lemmatize(word)
        for word in words
        if word.isalnum() and word not in stop_words
    ]
    return " ".join(filtered_words)

In [7]:
def summarize_text(text, num_sentences=15):
    sentences = sent_tokenize(text)
    word_frequencies = FreqDist(word_tokenize(text.lower()))
    
    sentence_scores = {}
    for sentence in sentences:
        for word in word_tokenize(sentence.lower()):
            if word in word_frequencies:
                if sentence not in sentence_scores:
                    sentence_scores[sentence] = word_frequencies[word]
                else:
                    sentence_scores[sentence] += word_frequencies[word]
    
    summary_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)[:num_sentences]
    summary = " ".join(summary_sentences)
    return summary

In [9]:
#extracting video_id
def extract_video_id(youtube_url):
    video_id_match = re.search(r"(?<=v=)[^&]+", youtube_url)
    if video_id_match:
        return video_id_match.group(0)
    else:
        raise ValueError("Invalid YouTube URL. Please enter a valid URL.")

In [13]:
#getting the transcript
def get_transcript(video_id):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return " ".join([item['text'] for item in transcript])
    except Exception as e:
        raise ValueError("Failed to fetch transcript: " + str(e))

In [15]:
def generate_summary():
    youtube_url = url_entry.get()
    try:
        video_id = extract_video_id(youtube_url)
        transcript = get_transcript(video_id)
        processed_text = preprocess_text(transcript)
        
        summary = summarize_text(processed_text)  
        result_text.delete(1.0, tk.END)
        result_text.insert(tk.END, f"Generated Summary:\n{summary}")
    except Exception as e:
        messagebox.showerror("Error", str(e))

In [17]:
def clear_fields():
    url_entry.delete(0, tk.END)
    result_text.delete(1.0, tk.END)

In [19]:
root = tk.Tk()
root.title("YouTube Transcript Summarizer")
root.geometry("1200x900")
root.config(bg="#e6f2ff")

input_frame = tk.Frame(root, bg="#e6f2ff")
input_frame.pack(pady=20)

url_label = tk.Label(input_frame, text="Enter YouTube Video URL:", font=("Arial", 12), bg="#e6f2ff", fg="#004d80")
url_label.grid(row=0, column=0, padx=5)

url_entry = tk.Entry(input_frame, width=80, font=("Arial", 12), bg="#ffffff", fg="#004d80", borderwidth=2, relief="solid")
url_entry.grid(row=0, column=1, padx=5)

generate_button = tk.Button(input_frame, text="Generate Summary", font=("Arial", 12), bg="#4da6ff", fg="red", borderwidth=2, relief="solid", command=generate_summary)
generate_button.grid(row=1, column=0, columnspan=2, pady=10)

clear_button = tk.Button(input_frame, text="Clear", font=("Arial", 12), bg="#ff6666", fg="red", borderwidth=2, relief="solid", command=clear_fields)
clear_button.grid(row=1, column=2, padx=5)

result_frame = tk.Frame(root, bg="#e6f2ff")
result_frame.pack(pady=10)

result_text = scrolledtext.ScrolledText(result_frame, wrap=tk.WORD, height=30, width=100, font=("Arial", 12), bg="#ffffff", fg="#004d80", borderwidth=2, relief="solid")
result_text.pack()

root.mainloop()


2024-12-18 22:04:48.209 python[10390:271583] +[IMKClient subclass]: chose IMKClient_Modern
2024-12-18 22:04:48.209 python[10390:271583] +[IMKInputSession subclass]: chose IMKInputSession_Modern
