# core

In [None]:
#| default_exp core

In [None]:
#| export
from fastcore.all import *
from yt_dlp import YoutubeDL
import json, datetime, httpx

In [None]:
#| hide
from nbdev.showdoc import *

## Videos

In [None]:
video_url = 'https://www.youtube.com/watch?v=8SF_h3xF3cE'

In [None]:
#| export
class YTVideo:
    def __init__(self, data:dict): store_attr()
    def __repr__(self):
        flds = ['webpage_url','title','language','uploader','categories','tags']
        flds = [f'{o}={self.data[o]!r}' for o in flds if o in self.data]
        if 'duration' in self.data: flds.append(f'duration={datetime.timedelta(seconds=self.data["duration"])}')
        for o in ['automatic_captions','subtitles','chapters','heatmap']: flds.append(f'have_{o}={o in self.data}')
        sig = ', '.join(flds)
        return f'YTVideo({sig})'

In [None]:
#| export
@patch(cls_method=True)
def from_url(cls:YTVideo, url:str, quiet:bool=True)->YTVideo:
    opts = {
        'writedescription':True, 'writesubtitles':True, 'writeautomaticsub':True, 'no_warnings':True, 'skip_download':True,
        'subtitlesformat':'srt', 'quiet':quiet,
    }
    with YoutubeDL(opts) as ydl:
        data = ydl.extract_info(url, download=False)
        return YTVideo(data)

In [None]:
video = YTVideo.from_url(video_url)
video

YTVideo(webpage_url='https://www.youtube.com/watch?v=8SF_h3xF3cE', title='Practical Deep Learning for Coders: Lesson 1', language='en', uploader='Jeremy Howard', categories=['Education'], tags=['deep learning', 'fastai'], duration=1:22:55, have_automatic_captions=True, have_subtitles=True, have_chapters=True, have_heatmap=True)

In [None]:
#| export
@patch
def subtitles_url(self:YTVideo, language:str=None):
    lang = ifnone(language, self.data.get('language'))
    d = self.data.get('subtitles', self.data.get('automatic_captions'))
    if d is None: return
    return next(o['url'] for o in d[lang] if o['ext']=='srt')

In [None]:
subtitle_url = video.subtitles_url()
subtitle_url

'https://www.youtube.com/api/timedtext?v=8SF_h3xF3cE&ei=BaVSaa5mxdjJuA-bva6ADQ&caps=asr&opi=112496729&xoaf=5&xowf=1&hl=en&ip=0.0.0.0&ipbits=0&expire=1767049077&sparams=ip%2Cipbits%2Cexpire%2Cv%2Cei%2Ccaps%2Copi%2Cxoaf&signature=9C07413569B4AA607681EF5C2BB0B1EE3AC7BD1B.0AB32937CF29E8512B47B5D175D0D948C9F82A13&key=yt8&lang=en&fmt=srt'

In [None]:
subs = httpx.get(subtitle_url).text

In [None]:
block = subs.split('\n\n')[0]
block

'1\n00:00:02,000 --> 00:00:10,000\nWelcome to Practical Deep Learning for coders,\xa0\nlesson one. This is version five of this course,\xa0\xa0'

In [None]:
#| export
_subtitle_entry_pat = re.compile(r'(\d+)\n(\d+:\d+:\d+),\d+ --> (\d+:\d+:\d+),\d+\n(.+)', re.DOTALL)

class SubtitleEntry:
    __repr__ = basic_repr()
    def __init__(self, index:int, start:datetime.timedelta, end:datetime.timedelta, text:str): store_attr()

    @classmethod
    def from_str(cls, s:str)->'Self':
        if s.count('\n')<2: return None
        match = _subtitle_entry_pat.match(s.strip())
        return cls(int(match.group(1)), match.group(2), match.group(3), match.group(4))

In [None]:
SubtitleEntry.from_str(block)

SubtitleEntry(index=1, start='00:00:02', end='00:00:10', text='Welcome to Practical Deep Learning for coders,\xa0\nlesson one. This is version five of this course,')

In [None]:
#| export
class Subtitles:
    def __init__(self, entries:L): store_attr()

    def __repr__(self):
        sig = ', '.join([f'entries={len(self.entries)}', f'last_ts={self.entries[-1].end!r}'])
        return f'Subtitles({sig})'

    @classmethod
    def from_str(cls, s:str)->'Self': 
        entries = L.split(s.strip(), '\n\n').map(SubtitleEntry.from_str).filter()
        return cls(entries)

In [None]:
Subtitles.from_str(subs)

Subtitles(entries=769, last_ts='01:22:55')

In [None]:
#| export
@patch(cls_method=True)
def from_url(cls:Subtitles, url:str)->Subtitles: return cls.from_str(httpx.get(url).text)

In [None]:
Subtitles.from_url(subtitle_url)

Subtitles(entries=769, last_ts='01:22:55')

## Playlists

In [None]:
playlist_url = 'https://www.youtube.com/playlist?list=PLfYUBJiXbdtSvpQjSnJJ_PmDQB_VyT5iU'

In [None]:
#| export
class YTPlaylist:
    def __init__(self, data:dict): store_attr()
    def __repr__(self):
        flds = ['webpage_url', 'title', 'channel', 'playlist_count']
        sig = ', '.join(f'{o}={self.data[o]!r}' for o in flds)
        return f'YTPlaylist({sig})'

In [None]:
#| export
@patch(cls_method=True)
def from_url(cls:YTPlaylist, url:str, quiet:bool=True)->YTPlaylist:
    with YoutubeDL({'flat_playlist':True, 'extract_flat':True, 'quiet':quiet}) as ydl:
        data = ydl.extract_info(url, download=False)
        return YTPlaylist(data)

In [None]:
playlist = YTPlaylist.from_url(playlist_url)
playlist

YTPlaylist(webpage_url='https://www.youtube.com/playlist?list=PLfYUBJiXbdtSvpQjSnJJ_PmDQB_VyT5iU', title='Practical Deep Learning for Coders', channel='Jeremy Howard', playlist_count=8)

In [None]:
playlist.data['description']

'This free course is designed for people with some coding experience who want to learn how to apply deep learning and machine learning to practical problems.\n\nThis course covers topics such as how to:\n- Build and train deep learning models for computer vision, natural language processing, tabular analysis, and collaborative filtering problems\n- Create random forests and regression models\n- Deploy models\n- Use PyTorch, the world’s fastest growing deep learning software, plus popular libraries like fastai and Hugging Face\n\nThere are 9 lessons, and each lesson is around 90 minutes long. The course is based on our 5-star rated book, which is freely available online.\n\nYou don’t need any special hardware or software — we’ll show you how to use free resources for both building and deploying models. You don’t need any university math either — we’ll teach you the calculus and linear algebra you need during the course.'

In [None]:
playlist.data['entries'][0]

{'_type': 'url',
 'ie_key': 'Youtube',
 'id': '8SF_h3xF3cE',
 'url': 'https://www.youtube.com/watch?v=8SF_h3xF3cE',
 'title': 'Practical Deep Learning for Coders: Lesson 1',
 'description': None,
 'duration': 4976,
 'channel_id': None,
 'channel': None,
 'channel_url': None,
 'uploader': None,
 'uploader_id': None,
 'uploader_url': None,
 'thumbnails': [{'url': 'https://i.ytimg.com/vi/8SF_h3xF3cE/hqdefault.jpg?sqp=-oaymwEbCKgBEF5IVfKriqkDDggBFQAAiEIYAXABwAEG&rs=AOn4CLDzGS4dCrE4P5ZXNZfLHs7PWgtB7g',
   'height': 94,
   'width': 168},
  {'url': 'https://i.ytimg.com/vi/8SF_h3xF3cE/hqdefault.jpg?sqp=-oaymwEbCMQBEG5IVfKriqkDDggBFQAAiEIYAXABwAEG&rs=AOn4CLCzscwuCifqJOpZaat71ITGGBbp6A',
   'height': 110,
   'width': 196},
  {'url': 'https://i.ytimg.com/vi/8SF_h3xF3cE/hqdefault.jpg?sqp=-oaymwEcCPYBEIoBSFXyq4qpAw4IARUAAIhCGAFwAcABBg==&rs=AOn4CLBGEE8rwgRWK3J7GRU1tFnqjWysIg',
   'height': 138,
   'width': 246},
  {'url': 'https://i.ytimg.com/vi/8SF_h3xF3cE/hqdefault.jpg?sqp=-oaymwEcCNACELwBSFXyq4qp

# -

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()