In [None]:
from rich import print

from youtube_summarizer.youtube_scrapper import youtube_scrap

result = youtube_scrap("https://www.youtube.com/watch?v=iYT2haVIgSM")
print(result)

In [None]:
result.title

'Getting Started with the NVIDIA Jetson AGX Thor Developer Kit for Physical AI'

In [None]:
result.thumbnail

'https://img.youtube.com/vi/iYT2haVIgSM/maxresdefault.jpg'

In [None]:
result.channel.title

'NVIDIA Developer'

In [None]:
result.durationFormatted

'00:06:32'

In [None]:
result.likeCountInt

585

In [None]:
result.publishDateText

'Aug 25, 2025'

In [None]:
result.chapters

[Chapter(title='Introduction', timeDescription='0:05', startSeconds=5),
 Chapter(title='What’s in the Box', timeDescription='1:01', startSeconds=61),
 Chapter(title='Hardware Overview', timeDescription='1:25', startSeconds=85),
 Chapter(title='First Power and Boot', timeDescription='2:15', startSeconds=135),
 Chapter(title='The NVIDIA Software Stack', timeDescription='2:54', startSeconds=174),
 Chapter(title='Isaac GR00T N1', timeDescription='3:54', startSeconds=234),
 Chapter(title='Video Search and Summarization', timeDescription='4:16', startSeconds=256),
 Chapter(title='NVIDIA Holoscan', timeDescription='5:04', startSeconds=304),
 Chapter(title='Conclusion', timeDescription='6:05', startSeconds=365)]

In [None]:
result.transcript

[TranscriptSegment(text='[Music]', startMs='5303', endMs='5840', startTimeText='0:05'),
 TranscriptSegment(text="Hi, I'm Leela with NVIDIA, and this is the Jetson\xa0\nAGX Thor Developer Kit. Jetson Thor is the\xa0\xa0", startMs='5840', endMs='13200', startTimeText='0:05'),
 TranscriptSegment(text="ultimate platform for humanoid robotics. It's\xa0\npart of NVIDIA's three-computer solution for\xa0\xa0", startMs='13200', endMs='19279', startTimeText='0:13'),
 TranscriptSegment(text='accelerating physical AI: NVIDIA DGX for\xa0\ntraining, NVIDIA Omniverse for synthetic\xa0\xa0', startMs='19280', endMs='26400', startTimeText='0:19'),
 TranscriptSegment(text='data generation and physical AI simulation, and\xa0\nNVIDIA Jetson Thor for runtime robotics. Jetson\xa0\xa0', startMs='26400', endMs='34079', startTimeText='0:26'),
 TranscriptSegment(text='AGX Thor is the ideal runtime computer for any\xa0\nkind of physical AI application, from humanoids\xa0\xa0', startMs='34080', endMs='40640', star

In [74]:
import re
from typing import Any, List


def transcript_by_chapter_string(chapters: List[Any], transcript: List[Any]) -> str:
    """Return a single string: '## <Chapter>' followed by its transcript text."""

    # Pre-compile regex patterns for better performance
    whitespace_pattern = re.compile(r"[\xa0\u00a0\u2000-\u200b\u2028\u2029\ufeff\n]")
    multi_space_pattern = re.compile(r" {2,}")

    def _get_value(item, key, default=None):
        """Extract value from object or dict efficiently."""
        if hasattr(item, key):
            return getattr(item, key)
        return item.get(key, default) if isinstance(item, dict) else default

    # Build chapter time windows and pre-allocate segments lists
    windows = []
    for i, ch in enumerate(chapters):
        start_ms = int(_get_value(ch, "startSeconds", 0)) * 1000
        next_start = _get_value(chapters[i + 1], "startSeconds", 10**9) if i + 1 < len(chapters) else 10**9
        end_ms = int(next_start) * 1000

        windows.append({"title": _get_value(ch, "title", f"Chapter {i+1}"), "start_ms": start_ms, "end_ms": end_ms, "segments": []})

    # Single pass assignment of transcript segments to windows
    # Sort transcript by startMs for potential early exit optimization
    sorted_transcript = sorted(transcript, key=lambda x: int(_get_value(x, "startMs", "0")))

    for seg in sorted_transcript:
        seg_ms = int(_get_value(seg, "startMs", "0"))

        # Binary search would be overkill for typical chapter counts, linear search is fine
        for window in windows:
            if window["start_ms"] <= seg_ms < window["end_ms"]:
                window["segments"].append(seg)
                break

    # Build output string with optimized text processing
    blocks = []
    for window in windows:
        if not window["segments"]:
            blocks.append(f"## {window['title']}\n")
            continue

        # Process text in single pass with generator expression
        text_parts = ((_get_value(seg, "text", "") or "").strip() for seg in window["segments"])
        text = " ".join(filter(None, text_parts))

        # Apply regex cleaning
        text = whitespace_pattern.sub(" ", text)
        text = multi_space_pattern.sub(" ", text).strip()

        blocks.append(f"## {window['title']}\n{text}")

    return "\n\n".join(blocks)


chapter_groups = transcript_by_chapter_string(result.chapters, result.transcript)

In [None]:
import re
from typing import Any, List


def transcript_by_chapter_string(chapters: List[Any], transcript: List[Any]) -> str:
    """Return a single string: '## <Chapter>' followed by its transcript text."""

    # Pre-compile regex patterns for better performance
    whitespace_pattern = re.compile(r"[\xa0\u00a0\u2000-\u200b\u2028\u2029\ufeff\n]")
    multi_space_pattern = re.compile(r" {2,}")

    def _get_value(item, key, default=None):
        """Extract value from object or dict efficiently."""
        if hasattr(item, key):
            return getattr(item, key)
        return item.get(key, default) if isinstance(item, dict) else default

    # Build chapter time windows and pre-allocate segments lists
    windows = []
    for i, ch in enumerate(chapters):
        start_ms = int(_get_value(ch, "startSeconds", 0)) * 1000
        next_start = _get_value(chapters[i + 1], "startSeconds", 10**9) if i + 1 < len(chapters) else 10**9
        end_ms = int(next_start) * 1000

        windows.append({"title": _get_value(ch, "title", f"Chapter {i+1}"), "start_ms": start_ms, "end_ms": end_ms, "segments": []})

    # Single pass assignment of transcript segments to windows
    # Sort transcript by startMs for potential early exit optimization
    sorted_transcript = sorted(transcript, key=lambda x: int(_get_value(x, "startMs", "0")))

    for seg in sorted_transcript:
        seg_ms = int(_get_value(seg, "startMs", "0"))

        # Binary search would be overkill for typical chapter counts, linear search is fine
        for window in windows:
            if window["start_ms"] <= seg_ms < window["end_ms"]:
                window["segments"].append(seg)
                break

    # Build output string with optimized text processing
    blocks = []
    for window in windows:
        if not window["segments"]:
            blocks.append(f"## {window['title']}\n")
            continue

        # Process text in single pass with generator expression
        text_parts = ((_get_value(seg, "text", "") or "").strip() for seg in window["segments"])
        text = " ".join(filter(None, text_parts))

        # Apply regex cleaning
        text = whitespace_pattern.sub(" ", text)
        text = multi_space_pattern.sub(" ", text).strip()

        blocks.append(f"## {window['title']}\n{text}")

    return "\n\n".join(blocks)


chapter_groups = transcript_by_chapter_string(result.chapters, result.transcript)

In [None]:
chapter_groups

"## Introduction\n[Music] Hi, I'm Leela with NVIDIA, and this is the Jetson AGX Thor Developer Kit. Jetson Thor is the ultimate platform for humanoid robotics. It's part of NVIDIA's three-computer solution for accelerating physical AI: NVIDIA DGX for training, NVIDIA Omniverse for synthetic data generation and physical AI simulation, and NVIDIA Jetson Thor for runtime robotics. Jetson AGX Thor is the ideal runtime computer for any kind of physical AI application, from humanoids to Edge AI agents. It gives you unmatched performance and scalability in a compact, power-efficient form factor. Plus, it's powered by the advanced Blackwell GPU and 128 GB of memory, delivering up to 2070 FP4 TFLOPS of AI compute to effortlessly run the latest generative AI models.\n\n## What’s in the Box\nLet's take a closer look at the Jetson AGX Thor Developer Kit. It includes a Jetson T5000 module, a reference carrier board, an active heat sink with a fan, and a power supply. If you want to set up with a mo

In [None]:
print(chapter_groups)

In [None]:
import os

from dotenv import load_dotenv
from google.genai import Client, types
from pydantic import BaseModel, Field
from rich import print

load_dotenv()


class Chapter(BaseModel):
    header: str = Field(description="A descriptive title for the chapter")
    key_points: list[str] = Field(description="Important takeaways and insights from this chapter")
    summary: str = Field(description="A comprehensive summary of the chapter content")


class Analysis(BaseModel):
    title: str = Field(description="The main title or topic of the video content")
    chapters: list[Chapter] = Field(description="Structured breakdown of content into logical chapters")
    key_facts: list[str] = Field(description="Important facts, statistics, or data points mentioned")
    takeaways: list[str] = Field(description="Key insights and actionable takeaways for the audience")
    overall_summary: str = Field(description="A comprehensive summary synthesizing all chapters, facts, and themes")


client = Client(
    api_key=os.getenv("GEMINI_API_KEY"),
    http_options={"timeout": 600000},
)


response = client.models.generate_content_stream(
    model="models/gemini-2.5-pro",
    contents=types.Content(
        parts=[
            types.Part(file_data=types.FileData(file_uri="https://youtu.be/TXjbT6BF6Hc")),
        ]
    ),
    config=types.GenerateContentConfig(
        system_instruction="Analyze the video/transcript according to the schema and follow the original language.",
        temperature=0,
        response_mime_type="application/json",
        response_schema=Analysis,
        thinking_config=types.ThinkingConfig(thinking_budget=2048),
    ),
)


result_parts = [chunk.text for chunk in response if chunk.text is not None and (print(chunk.text, end="") or True)]
final_result = "".join(result_parts)

final_result = Analysis.model_validate_json(final_result)

print(final_result)

In [None]:
final_result

Analysis(title='Trump Holds Meeting with Zelensky in the Oval Office', chapters=[Chapter(header='Introduction and Welcome', key_points=['Donald Trump welcomes Ukrainian President Volodymyr Zelenskyy to the Oval Office.', 'Trump states that substantial progress is being made in their discussions.', 'He mentions a recent good meeting with the President of Russia and an upcoming meeting with seven powerful European leaders.'], summary="The video begins with a live news report from the White House, where the press is being led into the Oval Office. President Donald Trump is meeting with Ukrainian President Volodymyr Zelenskyy. Trump starts by welcoming Zelenskyy, stating it's an honor to have him. He mentions they've had good discussions and that substantial progress is being made. He also refers to a recent meeting with the President of Russia and a forthcoming meeting with seven European leaders, highlighting the importance of the current discussions."), Chapter(header="Zelenskyy's Remar

In [None]:
final_result = Analysis.model_validate_json(final_result)

In [None]:
print(final_result)

In [None]:
from youtube_summarizer.youtube_loader import youtube_loader

youtube_loader("https://www.youtube.com/watch?v=S07XJKq-H6Q")

In [None]:
import os

from dotenv import load_dotenv
from google.genai import Client, types
from pydantic import BaseModel, Field
from rich import print

load_dotenv()


class Chapter(BaseModel):
    header: str = Field(description="A descriptive title for the chapter")
    key_points: list[str] = Field(description="Important takeaways and insights from this chapter")
    summary: str = Field(description="A comprehensive summary of the chapter content")


class Analysis(BaseModel):
    title: str = Field(description="The main title or topic of the video content")
    chapters: list[Chapter] = Field(description="Structured breakdown of content into logical chapters")
    key_facts: list[str] = Field(description="Important facts, statistics, or data points mentioned")
    takeaways: list[str] = Field(description="Key insights and actionable takeaways for the audience")
    overall_summary: str = Field(description="A comprehensive summary synthesizing all chapters, facts, and themes")


client = Client(api_key=os.getenv("GEMINI_API_KEY"))

response = client.models.generate_content(
    model="models/gemini-2.5-pro",
    contents=types.Content(
        parts=[types.Part(text=caption)],
    ),
    config=types.GenerateContentConfig(
        temperature=0,
        response_mime_type="application/json",
        response_schema=Analysis,
        thinking_config=types.ThinkingConfig(thinking_budget=2048),
    ),
)

print(response.text)