In [None]:
from youtube_transcript_api import YouTubeTranscriptApi

In [253]:
video_id = 'CeSAjK2CBEA'

In [254]:
YouTubeTranscriptApi.get_transcript(video_id)

[{'text': '♪ Gentle piano music playing ♪', 'start': 0.7, 'duration': 1.168},
 {'text': '♪', 'start': 1.901, 'duration': 3.403},
 {'text': 'Steve Jobs: When you grow up,\nyou tend to get told',
  'start': 5.337,
  'duration': 2.737},
 {'text': 'that the world is the way it is,',
  'start': 8.107,
  'duration': 2.136},
 {'text': 'and your life is just to live\nyour life inside the world;',
  'start': 10.276,
  'duration': 4.471},
 {'text': 'try not to bash\ninto the walls too much.',
  'start': 14.78,
  'duration': 2.803},
 {'text': 'But life can be much broader', 'start': 17.616, 'duration': 2.703},
 {'text': 'once you discover\none simple fact',
  'start': 20.352,
  'duration': 2.537},
 {'text': 'and that is, everything\naround you that you call life',
  'start': 22.922,
  'duration': 3.036},
 {'text': 'was made up by people\nthat were no smarter than you.',
  'start': 25.991,
  'duration': 4.072},
 {'text': 'And you can change it.\nYou can influence it.',
  'start': 30.096,
  'durati

In [255]:
# concatenate transcript with start time followed by text
transcript = YouTubeTranscriptApi.get_transcript(video_id)
text = []
sep = '-'
for i in transcript:
    text.append(str(i['start']) + sep + i['text'])
text

['0.7-♪ Gentle piano music playing ♪',
 '1.901-♪',
 '5.337-Steve Jobs: When you grow up,\nyou tend to get told',
 '8.107-that the world is the way it is,',
 '10.276-and your life is just to live\nyour life inside the world;',
 '14.78-try not to bash\ninto the walls too much.',
 '17.616-But life can be much broader',
 '20.352-once you discover\none simple fact',
 '22.922-and that is, everything\naround you that you call life',
 '25.991-was made up by people\nthat were no smarter than you.',
 '30.096-And you can change it.\nYou can influence it.',
 '32.665-You can build your own things\nthat other people can use.',
 '37.002-[APPLAUSE]',
 '38.504-Good evening.',
 "40.673-I'm Steve Jobs\nfrom Apple Computer.",
 "42.441-We're very glad\nto be here tonight.",
 '44.81-I think you always\nhad to be a little different',
 '47.246-to buy an Apple computer.',
 '48.981-And I think you still\nhave to think differently',
 '51.117-to buy an Apple computer.',
 '53.419-And I think the people\nthat do bu

In [256]:
def chunk_with_overlap(arr, chunk_length, overlap_length=0):
    """
    Splits an array into chunks with a specified overlap.

    Parameters:
        arr (list): The list to be chunked.
        chunk_length (int): The number of items in each chunk.
        overlap_length (int): The number of items to overlap between consecutive chunks.

    Returns:
        list: A list of chunks (each chunk is a list of items from arr).
    """
    if chunk_length <= 0:
        raise ValueError("chunk_length must be a positive integer.")
    if overlap_length < 0 or overlap_length >= chunk_length:
        raise ValueError("overlap_length must be non-negative and less than chunk_length.")

    chunks = []
    i = 0
    while i < len(arr):
        chunk = arr[i:i + chunk_length]
        chunks.append(chunk)
        # Move the index forward by (chunk_length - overlap_length)
        i += chunk_length - overlap_length

    return chunks

In [257]:
chunks = chunk_with_overlap(text, 4000, 5)
chunks

[['0.7-♪ Gentle piano music playing ♪',
  '1.901-♪',
  '5.337-Steve Jobs: When you grow up,\nyou tend to get told',
  '8.107-that the world is the way it is,',
  '10.276-and your life is just to live\nyour life inside the world;',
  '14.78-try not to bash\ninto the walls too much.',
  '17.616-But life can be much broader',
  '20.352-once you discover\none simple fact',
  '22.922-and that is, everything\naround you that you call life',
  '25.991-was made up by people\nthat were no smarter than you.',
  '30.096-And you can change it.\nYou can influence it.',
  '32.665-You can build your own things\nthat other people can use.',
  '37.002-[APPLAUSE]',
  '38.504-Good evening.',
  "40.673-I'm Steve Jobs\nfrom Apple Computer.",
  "42.441-We're very glad\nto be here tonight.",
  '44.81-I think you always\nhad to be a little different',
  '47.246-to buy an Apple computer.',
  '48.981-And I think you still\nhave to think differently',
  '51.117-to buy an Apple computer.',
  '53.419-And I think t

In [258]:
first_chunk = chunks[0]
first_chunk

['0.7-♪ Gentle piano music playing ♪',
 '1.901-♪',
 '5.337-Steve Jobs: When you grow up,\nyou tend to get told',
 '8.107-that the world is the way it is,',
 '10.276-and your life is just to live\nyour life inside the world;',
 '14.78-try not to bash\ninto the walls too much.',
 '17.616-But life can be much broader',
 '20.352-once you discover\none simple fact',
 '22.922-and that is, everything\naround you that you call life',
 '25.991-was made up by people\nthat were no smarter than you.',
 '30.096-And you can change it.\nYou can influence it.',
 '32.665-You can build your own things\nthat other people can use.',
 '37.002-[APPLAUSE]',
 '38.504-Good evening.',
 "40.673-I'm Steve Jobs\nfrom Apple Computer.",
 "42.441-We're very glad\nto be here tonight.",
 '44.81-I think you always\nhad to be a little different',
 '47.246-to buy an Apple computer.',
 '48.981-And I think you still\nhave to think differently',
 '51.117-to buy an Apple computer.',
 '53.419-And I think the people\nthat do bu

In [259]:
import os
from openai import OpenAI


client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [260]:

def get_summaries(chunks):
    sample_output = {
        'summaries': [
            {
                "offset": 0,
                "text": "This some text"
            },
            {
                "offset": 10,
                "text": "This is some more text"
            }
        ]
    }
    result = client.chat.completions.create(
        model="gpt-4o",
        response_format={"type": 'json_object'},
        messages=[
            {
                "role": "user",
                "content": f'''
                    Your task is to create chapters summaries with timestamps based on transcript of the video
                        transcript are given in format offset-text
                        here are the transcript - {chunks}
                        
                        output as json
                        final output should be like
                        {sample_output}
                '''
            }
        ]
    )

    return {'content': result.choices[0].message.content, 'usage': result.usage}


In [261]:
summaries_result = get_summaries(first_chunk)
print(summaries_result['content'])


                           
{
  "summaries": [
    {
      "offset": 0.7,
      "text": "The video begins with gentle piano music, setting a reflective tone."
    },
    {
      "offset": 5.337,
      "text": "Steve Jobs discusses the common notion that the world is fixed and predefined, and encourages thinking differently to make an impact."
    },
    {
      "offset": 32.665,
      "text": "Steve Jobs introduces himself at an Apple event, emphasizing the innovative spirit and different thinking required to embrace Apple products."
    },
    {
      "offset": 60.693,
      "text": "Jobs explains that computers, particularly Apple's, are powerful tools for creativity, equating them to bicycles for the mind."
    },
    {
      "offset": 74.507,
      "text": "He humorously announces dropping his interim title, met with cheers, as he speaks about Apple products intersecting technology and humanity."
    },
    {
      "offset": 98.43,
      "text": "Jobs introduces the iPod as a revo

In [262]:
print(summaries_result['usage'])

CompletionUsage(completion_tokens=330, prompt_tokens=897, total_tokens=1227, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0))


In [263]:

def get_chapters(chunks):
    sample_output = {
        'chapters': [
            {
                "offset": 0,
                "title": "This is first chapter"
            },
            {
                "offset": 10,
                "title": "this is second chapter"
            }
        ]
    }
    result = client.chat.completions.create(
        model="gpt-4o",
        response_format={"type": 'json_object'},
        messages=[
            {
                "role": "user",
                "content": f'''
                    Your task is to create chapters summaries with timestamps based on transcript of the video
                        transcript are given in format offset-text
                        here are the transcript - {chunks}
                        
                        output as json
                        final output should be like
                        {sample_output}

                        each chapter title should about 5 words long
                '''
            }
        ]
    )
    return {'content': result.choices[0].message.content, 'usage': result.usage}

In [264]:
import json


def generate_chapters(video_id):
    transcript = YouTubeTranscriptApi.get_transcript(video_id)
    text = []
    sep = '-'
    for i in transcript:
        text.append(str(i['start']) + sep + i['text'])
    chunks = chunk_with_overlap(text, 4000, 5)
    print('chunks length:', len(chunks))
    # generate summary for each chunk and then combine them
    summaries = []
    for chunk in chunks:
        summaries_result = get_summaries(chunk)
        summaries += json.loads(summaries_result['content'])['summaries']
    summaries_chunks = [str(v['offset']) + sep + v['text'] for v in summaries]
    # generate chapters
    chapters_result = get_chapters(summaries_chunks)
    return json.loads(chapters_result['content'])['chapters']
generate_chapters('CeSAjK2CBEA')

chunks length: 1


[{'offset': 0.7, 'title': 'Introduction with gentle music'},
 {'offset': 5.337, 'title': 'Shaping the world and influence'},
 {'offset': 38.504, 'title': 'Introducing Apple and its mindset'},
 {'offset': 60.693, 'title': 'Computer: a bicycle for the mind'},
 {'offset': 82.148, 'title': 'Enhancing creativity with technology'},
 {'offset': 98.43, 'title': 'Revolutionary iPod introduction'},
 {'offset': 104.57, 'title': 'Introducing the groundbreaking iPhone'},
 {'offset': 128.127, 'title': 'Importance of focus and choices'},
 {'offset': 140.839, 'title': 'Commitment to innovation and future'},
 {'offset': 156.822, 'title': 'Concluding with audience gratitude'}]

In [266]:
import json
import concurrent.futures

def generate_chapters(video_id):
    transcript = YouTubeTranscriptApi.get_transcript(video_id)
    text = []
    sep = '-'
    
    # Prepare transcript text
    for i in transcript:
        text.append(str(i['start']) + sep + i['text'])
    
    # Chunking
    chunks = chunk_with_overlap(text, 1000, 5)
    print('Chunks length:', len(chunks))
    
    summaries = []
    
    # Fetch summaries in parallel
    def fetch_summary(chunk):
        result = get_summaries(chunk)
        print('summaries token usage:', result['usage'])
        return json.loads(result['content'])['summaries']
    
    with concurrent.futures.ThreadPoolExecutor() as executor:
        results = list(executor.map(fetch_summary, chunks))
    
    # Flatten the results
    for res in results:
        summaries += res
    
    summaries_chunks = [str(v['offset']) + sep + v['text'] for v in summaries]
    
    # Generate chapters
    chapters_result = get_chapters(summaries_chunks)
    print('chapters token usage:', chapters_result['usage'])
    return json.loads(chapters_result['content'])['chapters']

# Run the function
generate_chapters('3qHkcs3kG44')


Chunks length: 5
summaries token usage: CompletionUsage(completion_tokens=230, prompt_tokens=1396, total_tokens=1626, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=1152))
summaries token usage: CompletionUsage(completion_tokens=652, prompt_tokens=13544, total_tokens=14196, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=13312))
summaries token usage: CompletionUsage(completion_tokens=1079, prompt_tokens=13408, total_tokens=14487, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=131

[{'offset': 0.56, 'title': 'Welcome and Life Perspective'},
 {'offset': 11.04, 'title': 'Unique Tech Investor Insights'},
 {'offset': 40.399, 'title': 'Balanced Life Philosophy'},
 {'offset': 94.479, 'title': 'Broader Life Experience'},
 {'offset': 144.16, 'title': 'Mountain of Life Choices'},
 {'offset': 201.84, 'title': 'Innovation and Risk Taking'},
 {'offset': 259.68, 'title': "Understanding Life's Knowledge"},
 {'offset': 304.96, 'title': 'Diverse Reading Interests'},
 {'offset': 448.319, 'title': "Social Media's Impact"},
 {'offset': 488.16, 'title': 'Value of Anonymity'},
 {'offset': 640.399, 'title': 'Celebrity Privacy Challenges'},
 {'offset': 971.12, 'title': 'Social Consistency Commitment'},
 {'offset': 1045.919, 'title': 'Happiness as a Choice'},
 {'offset': 1191.44, 'title': 'Leverage in Decision-Making'},
 {'offset': 1330.96, 'title': 'Non-linear Work Success'},
 {'offset': 1504.72, 'title': 'Financial Equity over Salary'},
 {'offset': 1512.32, 'title': 'Reverse Industria