In [None]:
# Install required libraries
!pip install -q ipywidgets
!pip install -q git+https://github.com/openai/whisper.git
!pip install -q yt-dlp
!pip install -q transformers
!pip install -q torch
!pip install -q youtube-transcript-api

In [None]:
import os
import re
import json
import random
from urllib.parse import urlparse, parse_qs

import torch
import whisper
from transformers import pipeline

import yt_dlp
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import NoTranscriptFound, TranscriptsDisabled

from IPython.display import display, HTML
from ipywidgets import widgets

In [None]:
class VideoSummarizer:
    def __init__(self, whisper_model="base", summary_model="facebook/bart-large-cnn"):
        """Initialize the video summarizer with specified models."""
        print("Initializing VideoSummarizer...")
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")

        print(f"Loading Whisper model: {whisper_model}")
        self.whisper_model = whisper.load_model(whisper_model, device=self.device)

        print(f"Loading summarization model: {summary_model}")
        self.summarizer = pipeline("summarization", model=summary_model, device=0 if self.device == "cuda" else -1)

    def extract_youtube_id(self, url):
        """Extract YouTube ID from various URL formats."""
        if not url: return None
        match = re.search(r'(?:v=|\/|youtu\.be\/)([0-9A-Za-z_-]{11}).*', url)
        return match.group(1) if match else None

    def get_youtube_transcript(self, video_id):
        """Try to get an official transcript from YouTube."""
        try:
            transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
            transcript = transcript_list.find_transcript(['en', 'en-US', 'en-GB'])
            transcript_pieces = transcript.fetch()
            return " ".join([t["text"] for t in transcript_pieces])
        except NoTranscriptFound:
            return None
        except Exception as e:
            print(f"An error occurred getting the YouTube transcript: {e}")
            return None

    def download_audio(self, url, output_file="temp_audio.mp3"):
        """Download audio from a video URL."""
        print("Downloading audio...")
        ydl_opts = {
            'format': 'bestaudio/best',
            'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'mp3'}],
            'outtmpl': os.path.splitext(output_file)[0],
            'quiet': True,
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        return output_file

    def transcribe_audio(self, audio_file):
        """Transcribe audio file using Whisper."""
        print("Transcribing audio with Whisper...")
        result = self.whisper_model.transcribe(audio_file, fp16=torch.cuda.is_available())
        return result["text"]

    def chunk_text(self, text: str, max_chunk_length: int):
        """Splits text into chunks of a specified maximum length without cutting words."""
        words = text.split()
        chunks = []
        current_chunk = []
        current_length = 0
        for word in words:
            if current_length + len(word) + 1 > max_chunk_length:
                chunks.append(" ".join(current_chunk))
                current_chunk = [word]
                current_length = len(word)
            else:
                current_chunk.append(word)
                current_length += len(word) + 1
        if current_chunk:
            chunks.append(" ".join(current_chunk))
        return chunks

    def summarize_text(self, text, max_length=350, min_length=200):
        """
        Summarize text using the loaded model. This version is more robust
        and processes all chunks in a single batch.
        """
        print("Generating summary...")

        # 1. Prepare Chunks
        raw_chunks = self.chunk_text(text, max_chunk_length=800)
        chunks = [chunk for chunk in raw_chunks if chunk.strip()]

        if not chunks:
            return "Could not generate a summary because the text was empty or too short."

        print(f"Text divided into {len(chunks)} chunk(s) for summarization.")

        # 2. Batch Summarization
        summary_results = self.summarizer(
            chunks,
            max_length=max_length if len(chunks) == 1 else 150,
            min_length=min_length if len(chunks) == 1 else 40,
            do_sample=False
        )

        print(f"DEBUG: Raw summarizer output: {summary_results}")

        # 3. Combine Summaries Safely
        combined_summary = " ".join([res['summary_text'] for res in summary_results if res])

        if not combined_summary:
            return "The model could not produce a summary for the given text."

        return self.humanize_summary(combined_summary)

    def humanize_summary(self, summary):
        """A simplified version to make the summary sound more natural."""
        summary = summary.strip()
        if not summary.endswith(('.', '!', '?')):
            summary += '.'
        return summary

    def process_video(self, video_url, output_file=None, cleanup=True):
        """Process a video URL - get transcript or transcribe, then summarize."""
        youtube_id = self.extract_youtube_id(video_url)
        transcript = None

        if youtube_id:
            print(f"Detected YouTube ID: {youtube_id}")
            transcript = self.get_youtube_transcript(youtube_id)
            if transcript: print("Found official YouTube transcript!")

        if not transcript:
            print("No official transcript found. Transcribing from audio.")
            audio_file = self.download_audio(video_url)
            transcript = self.transcribe_audio(audio_file)
            if cleanup and os.path.exists(audio_file): os.remove(audio_file)

        summary = self.summarize_text(transcript)
        result = {"video_url": video_url, "transcript": transcript, "summary": summary}

        if output_file:
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(result, f, indent=2, ensure_ascii=False)
            print(f"Results saved to {output_file}")

        return result

In [None]:
def get_video_metadata(url):
    """Get video title and thumbnail from YouTube URL."""
    ydl_opts = {'quiet': True, 'no_warnings': True, 'skip_download': True}
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        try:
            info = ydl.extract_info(url, download=False)
            return {
                'title': info.get('title', 'Unknown Title'),
                'thumbnail': info.get('thumbnail', ''),
                'duration': info.get('duration', 0),
                'channel': info.get('uploader', 'Unknown Channel')
            }
        except Exception:
            return None

def format_summary_display(url, summary, transcript=None):
    """Format the summary and video info for nice display."""
    metadata = get_video_metadata(url)
    if not metadata:
        return f"<h2>Summary Result</h2><p>{summary}</p>"

    duration_str = f"{metadata['duration'] // 60}:{metadata['duration'] % 60:02d}"
    html = f"""
    <div style="font-family: sans-serif; display: flex; align-items: flex-start; margin-bottom: 20px; border: 1px solid #ddd; padding: 15px; border-radius: 10px;">
        <img src="{metadata['thumbnail']}" style="max-width: 200px; border-radius: 8px; margin-right: 20px;" />
        <div style="flex-grow: 1;">
            <h2 style="margin-top: 0;">{metadata['title']}</h2>
            <p><strong>Channel:</strong> {metadata['channel']}<br><strong>Duration:</strong> {duration_str}</p>
        </div>
    </div>
    <div style="font-family: sans-serif; background-color: #f9f9f9; padding: 15px; border-radius: 8px; margin-bottom: 20px;">
        <h3 style="margin-top: 0;">📝 Summary</h3>
        <p style="font-size: 16px; line-height: 1.6;">{summary}</p>
    </div>
    """
    if transcript:
        transcript_preview = transcript[:500] + "..." if len(transcript) > 500 else transcript
        html += f"""
        <details>
            <summary style="cursor: pointer; color: #007bff; font-weight: bold;">Show Transcript Preview</summary>
            <div style="background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin-top: 10px; max-height: 200px; overflow-y: auto;">
                <pre style="white-space: pre-wrap; font-size: 14px;">{transcript_preview}</pre>
            </div>
        </details>
        """
    return html

In [None]:
def run_video_summarizer():

    url_input = widgets.Text(placeholder='Enter YouTube URL', description='Video URL:', layout=widgets.Layout(width='80%'))
    model_dropdown = widgets.Dropdown(options=['tiny', 'base', 'small', 'medium'], value='base', description='Whisper Model:')
    run_button = widgets.Button(description='Summarize Video', button_style='primary', icon='play')
    output_area = widgets.Output()

    def on_button_clicked(b):
        with output_area:
            output_area.clear_output()
            url = url_input.value.strip()
            if not url:
                print("❌ Please enter a valid video URL.")
                return

            try:
                summarizer = VideoSummarizer(whisper_model=model_dropdown.value)
                result = summarizer.process_video(url)
                display(HTML(format_summary_display(url, result["summary"], result["transcript"])))
            except Exception as e:
                print(f"❌ An error occurred: {e}")

    run_button.on_click(on_button_clicked)

    display(widgets.HTML("<h1>🎬 Video Summarizer</h1>"))
    display(widgets.VBox([url_input, model_dropdown, run_button, output_area]))

run_video_summarizer()