In [None]:
import json
import os
import subprocess
from pathlib import Path


import openai
import requests
from dotenv import load_dotenv
from pytube import YouTube
from tqdm import tqdm
from youtube_transcript_api import YouTubeTranscriptApi

load_dotenv()
openai.api_key = os.environ['OPENAI_API_KEY']  # Not sure why this line is necessary, but whatever

In [None]:
dirname = Path('../docs/llm-bootcamp/spring-2023').absolute()
assert dirname.exists()

lectures = [
    {
        'slug': 'askfsdl-walkthrough',
        'yt_id': 'pUKs4xM1r5U',
    },
    {
        'slug': 'augmented-language-models',
        'yt_id': 'YdeuQhlHmCA',
    },
    {
        'slug': 'launch-an-llm-app-in-one-hour',
        'yt_id': 'twHxmU9OxDU',
    },
    {
        'slug': 'llm-foundations',
        'yt_id': 'MyFrMFab6bo',
    },
    {
        'slug': 'llmops',
        'yt_id': 'Fquj2u7ay40',
    },
    {
        'slug': 'prompt-engineering',
        'yt_id': 'JnBHR_yL2w8',
    },
    {
        'slug': 'ux-for-luis',
        'yt_id': 'l5mG4z343qg',
    },
    {
        'slug': 'whats-next',
        'yt_id': 'ax_R4yz1WwM',
    },
    {
        'slug': 'shabani-train-your-own',
        'yt_id': 'roEKOzxilq4'
    },
    {
        'slug': 'chase-agents',
        'yt_id': 'DWUdGhRrv2c',
    },
    {
        'slug': 'welinder-fireside-chat',
        'yt_id': '54UThDl00qI',
    },
]
lecture = lectures[0]

In [None]:
def get_chapters_with_transcripts(yt_id: str) -> list[dict]:
  url = f"https://yt.lemnoslife.com/videos?part=chapters&id={yt_id}"
  r = requests.get(url)
  chapters = r.json()['items'][0]['chapters']['chapters']
  assert len(chapters) >= 0, "Video has no chapters"
  chapters = [{k: v for k, v in chap.items() if k != 'thumbnails'} for chap in chapters]  # Drop the 'thumbnails' key

  transcript = YouTubeTranscriptApi.get_transcript(yt_id)

  for ind in range(len(chapters)):
    chapter = chapters[ind]
    next_chapter = chapters[ind+1] if ind < len(chapters) - 1 else {'time': 1e10}
    chapters[ind]['transcript'] = ' '.join([seg['text'] for seg in transcript if seg['start'] >= chapter['time'] and seg['start'] < next_chapter['time']])

  return chapters


def summarize_chapter(chapter_transcript: str) -> str:
  instructions = f"""
Summarize the following excerpt of a lecture transcript into just a few informative bullet points.
Write from the perspective of the speaker (so don't use the phrase "The speaker".

{chapter_transcript}""".strip()
  
  response = openai.ChatCompletion.create(model='gpt-4', messages=[{'role': 'user', 'content': instructions}])
  try:
      return response['choices'][0]['message']['content']
  except:
      return 'Chapter is too long to summarize'
    

def summarize_chapters(chapters) -> None:
    for chapter in tqdm(chapters):
        if 'summary' in chapter:
            next
        chapter['summary'] = summarize_chapter(chapter['transcript'])


def extract_chapter_screens(yt_id: str, chapters: list[dict]) -> None:
    # Download the highest resolution video
    YouTube(f'https://youtu.be/{yt_id}').streams.get_highest_resolution().download(filename='video.mp4')

    for ind, chapter in enumerate(chapters):
        cmd = f"ffmpeg -y -ss {chapter['time'] + 5} -i video.mp4 -frames:v 1 chapter_{ind}.jpg"
        subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()

    os.remove('video.mp4')


def write_chapter_summaries_markdown(chapters):
    markdown = "## Chapter Summaries\n\n"
    for ind, chapter in enumerate(chapters):
        markdown += f"### {chapter['title']}\n\n"
        markdown += f"![Chapter {ind} Cover Image](chapter_{ind}.jpg)\n\n"
        markdown += f"{chapter['summary']}\n\n"
    with open('chapter_summaries.md', 'w') as f:
        f.write(markdown)

In [None]:
for lecture in lectures:
    print(lecture['slug'])
    os.chdir(dirname / lecture['slug'])
    chapters = get_chapters_with_transcripts(lecture['yt_id'])
    summarize_chapters(chapters)
    extract_chapter_screens(lecture['yt_id'], chapters)
    write_chapter_summaries_markdown(chapters)