# core

In [None]:
#| default_exp core

In [None]:
#| export
from fastcore.all import *
from yt_dlp import YoutubeDL
import json, datetime, httpx

In [None]:
#| hide
from nbdev.showdoc import *

## Videos

In [None]:
video_url = 'https://www.youtube.com/watch?v=8SF_h3xF3cE'

In [None]:
#| export
class YTVideo:
    def __init__(self, data:dict):
        store_attr()
        self.subtitles,self.chapters = None,None
    
    def __repr__(self):
        flds = ['webpage_url','title','language','uploader','categories','tags']
        flds = [f'{o}={self.data[o]!r}' for o in flds if o in self.data]
        if 'duration' in self.data: flds.append(f'duration={datetime.timedelta(seconds=self.data["duration"])}')
        for o in ['automatic_captions','subtitles','chapters','heatmap']: flds.append(f'have_{o}={o in self.data}')
        sig = ', '.join(flds)
        return f'YTVideo({sig})'

In [None]:
#| export
@patch(cls_method=True)
def from_url(cls:YTVideo, url:str, quiet:bool=True)->YTVideo:
    opts = {
        'writedescription':True, 'writesubtitles':True, 'writeautomaticsub':True, 'no_warnings':True, 'skip_download':True,
        'subtitlesformat':'srt', 'quiet':quiet,
    }
    with YoutubeDL(opts) as ydl:
        data = ydl.extract_info(url, download=False)
        return YTVideo(data)

In [None]:
video = YTVideo.from_url(video_url)
video

YTVideo(webpage_url='https://www.youtube.com/watch?v=8SF_h3xF3cE', title='Practical Deep Learning for Coders: Lesson 1', language='en', uploader='Jeremy Howard', categories=['Education'], tags=['deep learning', 'fastai'], duration=1:22:55, have_automatic_captions=True, have_subtitles=True, have_chapters=True, have_heatmap=True)

### Subtitles

In [None]:
#| export
@patch
def subtitles_url(self:YTVideo, language:str=None):
    lang = ifnone(language, self.data.get('language'))
    d = self.data.get('subtitles', self.data.get('automatic_captions'))
    if d is None: return
    return next(o['url'] for o in d[lang] if o['ext']=='srt')

In [None]:
subtitle_url = video.subtitles_url()
subtitle_url

'https://www.youtube.com/api/timedtext?v=8SF_h3xF3cE&ei=e7dSabvuMtuu-coPr5ns0QY&caps=asr&opi=112496729&xoaf=5&xowf=1&hl=en&ip=0.0.0.0&ipbits=0&expire=1767053803&sparams=ip%2Cipbits%2Cexpire%2Cv%2Cei%2Ccaps%2Copi%2Cxoaf&signature=813D5D94C9A5FD449DDE82B79DB9C7B05E144D67.ED963DB928EB963217AE67BFB9CF70B84BFFDBD5&key=yt8&lang=en&fmt=srt'

In [None]:
subs = httpx.get(subtitle_url).text

In [None]:
block = subs.split('\n\n')[0]
block

'1\n00:00:02,000 --> 00:00:10,000\nWelcome to Practical Deep Learning for coders,\xa0\nlesson one. This is version five of this course,\xa0\xa0'

In [None]:
#| export
_subtitle_entry_pat = re.compile(r'(\d+)\n(\d+:\d+:\d+),\d+ --> (\d+:\d+:\d+),\d+\n(.+)', re.DOTALL)

class SubtitleEntry:
    __repr__ = basic_repr()
    def __init__(self, index:int, start:datetime.timedelta, end:datetime.timedelta, text:str): store_attr()

    @classmethod
    def from_str(cls, s:str)->'Self':
        if s.count('\n')<2: return None
        match = _subtitle_entry_pat.match(s.strip())
        text = re.sub(r'[\n\xa0]+', '\n', match.group(4))
        return cls(int(match.group(1)), match.group(2), match.group(3), text)

In [None]:
SubtitleEntry.from_str(block)

SubtitleEntry(index=1, start='00:00:02', end='00:00:10', text='Welcome to Practical Deep Learning for coders,\nlesson one. This is version five of this course,')

In [None]:
#| export
class Subtitles:
    def __init__(self, entries:L): store_attr()

    def __repr__(self):
        sig = ', '.join([f'entries={len(self.entries)}', f'last_ts={self.entries[-1].end!r}'])
        return f'Subtitles({sig})'

    @classmethod
    def from_str(cls, s:str)->'Self': 
        entries = L.split(s.strip(), '\n\n').map(SubtitleEntry.from_str).filter()
        return cls(entries)

In [None]:
Subtitles.from_str(subs)

Subtitles(entries=769, last_ts='01:22:55')

In [None]:
#| export
@patch(cls_method=True)
def from_url(cls:Subtitles, url:str)->Subtitles: return cls.from_str(httpx.get(url).text)

In [None]:
subtitles = Subtitles.from_url(subtitle_url)
subtitles

Subtitles(entries=769, last_ts='01:22:55')

In [None]:
#| export
@patch
def format_subs(self:Subtitles)->str:
    "Formats subtitles to use in LLMs."
    return '\n'.join(self.entries.map(lambda o: f'[{o.start}] {o.text}'))

In [None]:
subtitles.format_subs()[:100]

'[00:00:02] Welcome to Practical Deep Learning for coders,\nlesson one. This is version five of this c'

In [None]:
#| export
@patch
@delegates(YTVideo.subtitles_url)
def fetch_subtitles(self:YTVideo, force:bool=False, **kwargs)->YTVideo:
    if force or (self.subtitles is None): self.subtitles = Subtitles.from_url(self.subtitles_url(**kwargs))
    return self

In [None]:
video.fetch_subtitles()
video.subtitles

Subtitles(entries=769, last_ts='01:22:55')

In [None]:
@patch
@delegates(YTVideo.fetch_subtitles)
def format_subs(self:YTVideo, **kwargs)->str:
    self.fetch_subtitles(**kwargs)
    if self.subtitles is None: return
    return self.subtitles.format_subs()

In [None]:
video.format_subs()[:100]

'[00:00:02] Welcome to Practical Deep Learning for coders,\nlesson one. This is version five of this c'

### Chapters

In [None]:
#| export
def _format_chapter(s:dict)->str:
    start = datetime.timedelta(seconds=s['start_time'])
    return f'[{start}] {s["title"]}'

In [None]:
_format_chapter(video.data['chapters'][0])

'[0:00:00] Introduction'

In [None]:
#| export
@patch
def format_chapters(self:YTVideo)->str:
    if 'chapters' not in self.data: return
    return '\n'.join(map(_format_chapter, self.data['chapters']))

In [None]:
print(video.format_chapters()[:120])

[0:00:00] Introduction
[0:00:25] What has changed since 2015
[0:01:20] Is it a bird
[0:02:09] Images are made of numbers


### Summary prompt

In [None]:
video

YTVideo(webpage_url='https://www.youtube.com/watch?v=8SF_h3xF3cE', title='Practical Deep Learning for Coders: Lesson 1', language='en', uploader='Jeremy Howard', categories=['Education'], tags=['deep learning', 'fastai'], duration=1:22:55, have_automatic_captions=True, have_subtitles=True, have_chapters=True, have_heatmap=True)


    chapter_prompt="Generate a succinct video summary (1-2 sentences) followed by video chapter timestamps for this video. Format each line of the chapter summaries as 'MM:SS - Chapter Title' (e.g., '02:30 - Introduction'). Start with 00:00. Include all major topics and transitions and be thorough - do not miss any important topics.  For the summary, do not say 'In this video, we will cover the following topics', 'This video discusses..' or anything like that. Instead, reference the main speaker's name if you know it.  If there is a Q&A Section, enumerate individual questions as additional chapters."

In [None]:
@patch
@delegates(YTVideo.fetch_subtitles)
def create_summary_prompt(self:YTVideo, **kwargs)->str:
    self.fetch_subtitles(**kwargs)
    subs = self.format_subs()
    if subs is None: return
    chapters = self.format_chapters()
    prompt = f'''In the <transcript> tag we have transcript of a video with the title: {self.data['title']!r}. Analyse the transcript to generate a detailed summary of the content of the video.

Your task:
<task>
1. Start with a 2-3 sentence overview of the entire video
2. Identify where major topics begin and end (chapters)
3. Summarise each chapter's key concepts, it should have very high information value.
4. Extract all resources mentioned, e.g. links, books, papers, videos, YouTube channels etc.
</task>

Format your response in Markdown:
<format>
- Start with "## Overview" followed by the 2-3 sentence summary
- For each chapter use: "## [Chapter Title](<video URL with timestamp>)" where the URL includes &t=SECONDS (e.g., [00:02:00] = &t=120)
- End with a "## Resources" section containing a list of the resources mentioned through the video, including a brief context.
<format>

Keep the following writing guidelines in mind:
<guidelines>
1. Do not add filler words. 
2. Make every sentence information-dense without repetition.
3. Get to the point while providing necessary context.
4. Use short words and fewer words.
5. Avoid multiple examples if one suffices.
6. Make questions neutral without telegraphing answers.
7. Remove sentences that restate the premise.
8. Cut transitional fluff like "This is important because..."
9. Combine related ideas into single statements.
10. Avoid overusing bullet points. Prefer flowing prose that combines related concepts. Use lists only for truly distinct items.
11. Trust the reader's intelligence.
12. Start sections with specific advice, not general statements.
13. Replace em dashes with periods, commas, or colons.
14. Cut qualifying phrases that add no concrete information.
15. Use direct statements. Avoid hedge words unless exceptions matter.
16. Remove setup phrases like "It's worth noting that" or "The key point is."
17. Avoid unnecessarily specific claims when general statements work.
18. Avoid explanatory asides and redundant clauses.
19. Each sentence should add new information.
20. Avoid "Remember... the goal is not X but Y" conclusions.
21. No emojis in professional writing.
22. Use simple language. Present information objectively. Avoid exaggeration.
23. No formulaic conclusions with labels and prescriptive wisdom.
</guidelines>

<video title>{self.data['title']}</video title>
Here is the transcript
<transcript>
{transcript}
</transcript>

    
    
    Analyze this video transcript and identify another chapter based on topic shifts.

    Video url: {self.data['webpage_url']}
    duration: {self.data['duration_string']}

Video URL: {video_url}
Video duration: {last_timestamp}





Transcript with timestamps:

Please go ahead and draft the post. Please also include front matter similar to the front matter in the examples and select the best slide from the talk as the cover image (which is not the title slide, but instead another interesting slide that is punchy).
    '''
    return prompt

video.create_summary_prompt()

'\n\n    Video url: https://www.youtube.com/watch?v=8SF_h3xF3cE\n    duration: 1:22:55\n    '

In [None]:
def create_chapter_prompt(subtitles: list[Subtitle], video_url: str) -> str:
    """Create a prompt for AI to identify chapter breaks and extract practical tips from a transcript."""
    last_timestamp = subtitles[-1].end_time.split(",")[0]
    formatted_transcript = format_subtitles_for_ai(subtitles)
    
    prompt = f"""Analyse this video transcript and identify natural chapter breaks based on topic shifts.

Video URL: {video_url}
Video duration: {last_timestamp}

Your task:
1. Identify where major topics begin and end
2. Summarise each chapter's key concepts in no more than 15 bullet points. It shouldn't take more than 10 minutes to read the summary. The summary should have very high information value.
3. Extract all practical "golden nuggets" mentioned throughout the video
4. Extract all resources mentioned in the lesson e.g. books, papers, videos, YouTube channels etc.
4. Start with a 2-3 sentence overview of the entire lecture

Format your response in Markdown:
- Start with "## Overview" followed by the 2-3 sentence summary
- For each chapter use: "## [Chapter Title](<video URL with timestamp>)" where the URL includes &t=SECONDS
  (e.g., [00:02:00] = &t=120)
- Follow each chapter heading with bullet points
- End with "## Key Tools & Tips" section containing:
  - **Libraries/Imports**: List each library/module mentioned with brief context of its use
  - **Code Patterns**: Specific coding techniques or patterns demonstrated (include short snippets)
  - **Tools/Services**: Software, platforms, or services recommended (with any comparisons or cost notes)
  - **Best Practices**: General tips, tricks, or insights shared

Transcript with timestamps:
{formatted_transcript}"""
    
    return prompt

## Playlists

In [None]:
playlist_url = 'https://www.youtube.com/playlist?list=PLfYUBJiXbdtSvpQjSnJJ_PmDQB_VyT5iU'

In [None]:
#| export
class YTPlaylist:
    def __init__(self, data:dict): store_attr()
    def __repr__(self):
        flds = ['webpage_url', 'title', 'channel', 'playlist_count']
        sig = ', '.join(f'{o}={self.data[o]!r}' for o in flds)
        return f'YTPlaylist({sig})'

In [None]:
#| export
@patch(cls_method=True)
def from_url(cls:YTPlaylist, url:str, quiet:bool=True)->YTPlaylist:
    with YoutubeDL({'flat_playlist':True, 'extract_flat':True, 'quiet':quiet}) as ydl:
        data = ydl.extract_info(url, download=False)
        return YTPlaylist(data)

In [None]:
playlist = YTPlaylist.from_url(playlist_url)
playlist

YTPlaylist(webpage_url='https://www.youtube.com/playlist?list=PLfYUBJiXbdtSvpQjSnJJ_PmDQB_VyT5iU', title='Practical Deep Learning for Coders', channel='Jeremy Howard', playlist_count=8)

In [None]:
playlist.data['description']

'This free course is designed for people with some coding experience who want to learn how to apply deep learning and machine learning to practical problems.\n\nThis course covers topics such as how to:\n- Build and train deep learning models for computer vision, natural language processing, tabular analysis, and collaborative filtering problems\n- Create random forests and regression models\n- Deploy models\n- Use PyTorch, the world’s fastest growing deep learning software, plus popular libraries like fastai and Hugging Face\n\nThere are 9 lessons, and each lesson is around 90 minutes long. The course is based on our 5-star rated book, which is freely available online.\n\nYou don’t need any special hardware or software — we’ll show you how to use free resources for both building and deploying models. You don’t need any university math either — we’ll teach you the calculus and linear algebra you need during the course.'

In [None]:
playlist.data['entries'][0]

{'_type': 'url',
 'ie_key': 'Youtube',
 'id': '8SF_h3xF3cE',
 'url': 'https://www.youtube.com/watch?v=8SF_h3xF3cE',
 'title': 'Practical Deep Learning for Coders: Lesson 1',
 'description': None,
 'duration': 4976,
 'channel_id': None,
 'channel': None,
 'channel_url': None,
 'uploader': None,
 'uploader_id': None,
 'uploader_url': None,
 'thumbnails': [{'url': 'https://i.ytimg.com/vi/8SF_h3xF3cE/hqdefault.jpg?sqp=-oaymwEbCKgBEF5IVfKriqkDDggBFQAAiEIYAXABwAEG&rs=AOn4CLDzGS4dCrE4P5ZXNZfLHs7PWgtB7g',
   'height': 94,
   'width': 168},
  {'url': 'https://i.ytimg.com/vi/8SF_h3xF3cE/hqdefault.jpg?sqp=-oaymwEbCMQBEG5IVfKriqkDDggBFQAAiEIYAXABwAEG&rs=AOn4CLCzscwuCifqJOpZaat71ITGGBbp6A',
   'height': 110,
   'width': 196},
  {'url': 'https://i.ytimg.com/vi/8SF_h3xF3cE/hqdefault.jpg?sqp=-oaymwEcCPYBEIoBSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBGEE8rwgRWK3J7GRU1tFnqjWysIg',
   'height': 138,
   'width': 246},
  {'url': 'https://i.ytimg.com/vi/8SF_h3xF3cE/hqdefault.jpg?sqp=-oaymwEcCNACELwBSFXyq4qp

# -

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()