### Given a youtube URL the following will download transcript then summarize it

In [5]:
from __future__ import unicode_literals
import openai
import os
import whisper
from dotenv import load_dotenv
import youtube_dl
from youtube_transcript_api import YouTubeTranscriptApi
import shutil
from pytube import YouTube
from pydub import AudioSegment
import time
import re
import json
load_dotenv()
from openai import OpenAI
client = OpenAI()

openai.api_key = os.getenv("OPENAI_API_KEY")

# video_url = 'https://www.youtube.com/watch?v=SNgoul4vyDM'
# summary_title = "ufo_hearing_whisper_transcribe_audio_gpt_4"

video_url = 'https://youtu.be/RF72hGIdrG8?si=B3sowi0RGHEgcUU5'
summary_title = "apple_watch_ultra"

video_id = video_url.split('=')[1]
task = "Please provide a long detailed summary of the following transcript from a Youtube video \n"

models = ['gpt-3.5-turbo', 'gpt-4', 'gpt-4-1106-preview']
USE = 2
TRANSCRIBE = True
LOCAL_WHISPER = False
tokens_cost = 0
if USE == 0:
    chunk_size = 2300
    input_cost = 0.0015/1000
    output_cost = 0.002/1000
elif USE == 2:
    chunk_size = 40_000
    input_cost = 0.01/1000
    output_cost = 0.03/1000
else:
    chunk_size = 4000
    input_cost = 0.03/1000
    output_cost = 0.06/1000

audio_cost_per_second = 0.006/60
audio_cost = 0

OpenAIError: The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable

In [50]:
file = client.files.create(
  file=open("b1m_solved_urban_sprawl/transcript.txt", "rb"),
  purpose='assistants'
)

In [53]:
assistant = client.beta.assistants.create(
  name="Summarizer",
  description="Your task is to provide detailed and comprehensive summaries of YouTube video transcripts. These summaries should capture all key ideas, main points, and important details presented in the transcript. Focus on maintaining the essence and flow of the original content while ensuring clarity and coherence in the summary. Your summaries should be long enough to encompass all critical information and insights from the transcript, aiming to give a complete understanding of the video's content.",
  model="gpt-4-1106-preview",
  tools=[{"type": "retrieval"}],
  file_ids=[file.id]
)

In [54]:
thread = client.beta.threads.create()

In [57]:
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id,
)

In [59]:
messages = client.beta.threads.messages.list(
  thread_id=thread.id
)
messages

SyncCursorPage[ThreadMessage](data=[ThreadMessage(id='msg_xE5hTluqqBdVn8UZz54ZcT7b', assistant_id='asst_MgG8KonDU56w7rUL9O1bWdd4', content=[MessageContentText(text=Text(annotations=[], value="The transcript appears to be a detailed account of architect Moshe Safdie's project, Habitat 67, which originally aimed to create a high-density housing solution in Montreal for Expo 67. Safdie's vision was to have modules stacked high like a hillside, providing each housing unit with a roof terrace. He faced budget constraints which forced a scale-back of the project from a community of 1,200 families to just 158 residences across three smaller pyramids.\n\nDespite the scale-down, Habitat 67 was a success and became a highly desirable place to live, with its long waitlist and long-term occupancy. However, the revolutionary impact on architecture it promised never fully materialized. Later, Safdie's architects and Epic Games used Unreal Engine to digitally complete Habitat 67 to its original desig

In [55]:
message = client.beta.threads.messages.create(
    thread_id=thread.id,
    role="user",
    content="summarize the transcript"
)

In [19]:
def download_audio_from_youtube(video_url, output_path, file_name="audio.mp4"):
    
    yt = YouTube(video_url)
    audio_stream = yt.streams.filter(only_audio=True).first()
    audio_stream.download(output_path, filename=file_name)
    print("Audio download completed!")
    
    
    return output_path + "/" + file_name

def split_audio_into_chunks(audio, output_folder, chunk_duration=900):
    # audio = AudioSegment.from_file(audio_path, format="mp4")
    audio_duration_ms = len(audio)
    
    for start_time_ms in range(0, audio_duration_ms, chunk_duration * 1000):
        end_time_ms = start_time_ms + (chunk_duration * 1000)
        chunk = audio[start_time_ms:end_time_ms]
        location = f"{output_folder}/audio_chunks"
        if not os.path.exists(location):
            os.makedirs(location)
        output_file = f"{output_folder}/audio_chunks/chunk_{start_time_ms//1000}.mp4"
        chunk.export(output_file, format="mp4")


def get_video_title(url):
    with youtube_dl.YoutubeDL({}) as ydl:
        info_dict = ydl.extract_info(url, download=False)
        return info_dict.get('title', None)


def summarize(input, model=models[USE]):
    global input_tokens, output_tokens, input_cost, output_cost, tokens_cost
  
    completion = client.chat.completions.create(
      model=model,
      messages=input,
      temperature=0
    )
    # completion = openai.ChatCompletion.create(
    #     model=model,
    #     temperature=0,
    #     messages=input)

    input_tokens += completion.usage.prompt_tokens
    output_tokens += completion.usage.completion_tokens

    tokens_cost = input_tokens*input_cost + output_tokens*output_cost
    print(f"Tokens thus far: {input_tokens + output_tokens} with a cost of ${round(tokens_cost, 4)}")
    reply_content = completion.choices[0].message.content
    return reply_content


def create_directory(dir_name):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)


def download_transcript(video_id):
    video_id = video_id.split('=')[-1]
    transcript = YouTubeTranscriptApi.get_transcript(video_id)

    with open(f"{summary_title}/transcript.txt", "w") as file:
        for line in transcript:
            file.write(line['text'] + '\n')


def chunk_transcript_sentences(file_name, chunk_size, summary_title):
    # Open and read the file
    with open(file_name, 'r') as file:
        text = file.read().replace('\n', ' ')

    # Split the text into sentences
    sentences = re.split('(?<=[.!?]) +', text)

    chunks, chunk = [], []
    current_chunk_size = 0

    for sentence in sentences:
        sentence_words = sentence.split(' ')
        sentence_length = len(sentence_words)

        # If adding the next sentence doesn't exceed the chunk size, add it to the current chunk
        if current_chunk_size + sentence_length <= chunk_size:
            chunk.append(sentence)
            current_chunk_size += sentence_length
        else:
            # Otherwise, finish the current chunk and start a new one
            chunks.append(' '.join(chunk))
            chunk = [sentence]
            current_chunk_size = sentence_length

    # Add the last chunk if it's non-empty
    if chunk:
        chunks.append(' '.join(chunk))

    create_directory(f"{summary_title}/chunks")

    # Write each chunk to a file
    for i, chunk in enumerate(chunks):
        with open(f'{summary_title}/chunks/chunk{i}.txt', 'w') as file:
            file.write(chunk)


def chunk_transcript(file_name):
    with open(file_name, 'r') as file:
        text = file.read().replace('\n', ' ')
    words = text.split(' ')
    chunks = [words[i:i + chunk_size]
              for i in range(0, len(words), chunk_size)]
    create_directory(f"{summary_title}/chunks")
    for i, chunk in enumerate(chunks):
        with open(f'{summary_title}/chunks/chunk{i}.txt', 'w') as file:
            if chunk[-1] != '.':
                chunk[-1] += '.'
            file.write(' '.join(chunk))


def get_sorted_files_by_date(directory):
    file_list = os.listdir(directory)
    files_with_mtime = [(file, os.path.getmtime(
        os.path.join(directory, file))) for file in file_list]
    sorted_files = sorted(files_with_mtime, key=lambda x: x[1])
    sorted_filenames = [file[0] for file in sorted_files]
    return sorted_filenames


def read_text_from_files(directory_path):
    text_array = []
    files = get_sorted_files_by_date(directory_path)
    for file_name in files:
        file_path = os.path.join(directory_path, file_name)
        if os.path.isfile(file_path):
            with open(file_path, 'r') as file:
                text_array.append(file.read())
    return text_array


def read_and_join_text_from_files(directory_path):
    joined_text = ""
    files = get_sorted_files_by_date(directory_path)
    for file_name in files:
        file_path = os.path.join(directory_path, file_name)
        if os.path.isfile(file_path):
            with open(file_path, 'r') as file:
                joined_text += file.read() + "\n"
    return joined_text

In [11]:
if os.path.exists(summary_title):
  shutil.rmtree(summary_title)
create_directory(summary_title)

### Download audio then transcribe using openai whisper api

In [20]:
if TRANSCRIBE:
    audio_path = download_audio_from_youtube(video_url, summary_title)

Audio download completed!


In [21]:
if TRANSCRIBE:
    if os.path.exists(f"{summary_title}/transcript.txt"):
        os.remove(f"{summary_title}/transcript.txt")
    if LOCAL_WHISPER:
        model = whisper.load_model("small")
        transcript = model.transcribe(f"{summary_title}/audio.mp4")["text"]
        with open(f"{summary_title}/transcript.txt", "w") as f:
            f.write(transcript)
    else:
        global audio_cost
        audio = AudioSegment.from_file(f"{summary_title}/audio.mp4", format="mp4")
        audio_duration_ms = len(audio)
        audio_cost += audio_duration_ms // 1000 * audio_cost_per_second
        print(f"Audio_duration: {audio_duration_ms}ms")
        audio_size = os.path.getsize(f"{summary_title}/audio.mp4");
        print(f"audio file size: {audio_size}")
        if audio_size > 25_000_000:
            split_audio_into_chunks(audio, summary_title, chunk_duration=1200)
            files = get_sorted_files_by_date(summary_title+"/audio_chunks")
            print(f"Chunks to transcribe: {files}")
            for file in files:
                print(f"Transcribing {file}")
                file = open(summary_title + "/audio_chunks/" + file, "rb")
                transcript = client.audio.transcriptions.create(
                  model="whisper-1", 
                  file=file
                ).text
                # transcript = openai.Audio.transcribe("whisper-1", file).text
                with open(f"{summary_title}/transcript.txt", "a") as f:
                    f.write(transcript)
                    f.close()
                file.close()
        else:
            file = open(summary_title + "/" + "audio.mp4", "rb")
            transcript = client.audio.transcriptions.create(
                  model="whisper-1", 
                  file=file
                ).text
            with open(f"{summary_title}/transcript.txt", "a") as f:
                f.write(transcript)
                f.close()
            file.close()
        print("Total audio cost: $", round(audio_cost, 4))
else:
    download_transcript(video_id)

Audio_duration: 7638947ms
audio file size: 46581451
Chunks to transcribe: ['chunk_0.mp4', 'chunk_1200.mp4', 'chunk_2400.mp4', 'chunk_3600.mp4', 'chunk_4800.mp4', 'chunk_6000.mp4', 'chunk_7200.mp4']
Transcribing chunk_0.mp4
Transcribing chunk_1200.mp4
Transcribing chunk_2400.mp4
Transcribing chunk_3600.mp4
Transcribing chunk_4800.mp4
Transcribing chunk_6000.mp4
Transcribing chunk_7200.mp4
Total audio cost: $ 0.7638


### Chunkify the transcript

In [22]:
if os.path.exists(summary_title+"/chunks"):
    shutil.rmtree(summary_title+"/chunks")
create_directory(summary_title+"/chunks")
chunk_transcript_sentences(f'{summary_title}/transcript.txt', chunk_size, summary_title)
# chunk_transcript(f'{summary_title}/transcript.txt')
data = read_text_from_files(f"{summary_title}/chunks")

### Summarize each chunk

In [23]:
input_tokens = 0
output_tokens = 0
tokens_cost = 0

data = read_text_from_files(f"{summary_title}/chunks")
if os.path.exists(summary_title+"/summaries"):
    shutil.rmtree(summary_title+"/summaries")
create_directory(f"{summary_title}/summaries")

if os.path.exists(summary_title+"/instructions"):
    shutil.rmtree(summary_title+"/instructions")
create_directory(f"{summary_title}/instructions")

if os.path.exists(summary_title+"/system"):
    shutil.rmtree(summary_title+"/system")
create_directory(f"{summary_title}/system")

print(f"Summarizing {len(data)} articles")
multiple_summaries = len(data) > 1

for i in range(len(data)):
    messages = []
    if i > 1:
        with open(f"{summary_title}/summaries/summary{i-2}.txt", "r") as f:
            sum1 = f.read()
        with open(f"{summary_title}/summaries/summary{i-1}.txt", "r") as f:
            sum2 = f.read()

        messages.append(
            {"role": "user", "content": "Summarize the following as detailed as possible:\n" + sum1 + '\n' + sum2})
        system_message = summarize(messages)

        system_message = "Always only provide a detailed summary of the input. Don't answer questions or complete the text. The following is the context to keep in mind: \n" + system_message
        with open(f"{summary_title}/system/system{i}.txt", "w") as f:
            f.write(system_message)
        messages = []
        messages.append({"role": "system", "content": system_message})

    elif i > 0:
        with open(f"{summary_title}/summaries/summary{i-1}.txt", "r") as f:
            system_message = "Always only provide a detailed summary of the input. Don't answer questions or complete the text. The following is the context to keep in mind: \n" + f.read()
            messages.append({"role": "system", "content": system_message})

    messages.append({"role": "user", "content": task + data[i]})

    with open(f"{summary_title}/instructions/instruction_{i}.txt", "a") as file:
        for msg in messages:
            file.write(msg["role"] + ":\n")
            file.write(msg["content"] + "\n")
    # print(f"Summarizing article {i} with {len(instruction.split(' '))} words")

    response = summarize(messages)
    with open(f"{summary_title}/summaries/summary{i}.txt", "w") as f:
        f.write(response)
print(f"Total cost so far is: ${round(tokens_cost, 3)} tokens and ${audio_cost} audio")

Summarizing 1 articles
Tokens thus far: 20740 with a cost of $0.2166
Total cost so far is: $0.217 tokens and $0.7638 audio


### Join summaries together then summarize them altogether using GPT-4

In [24]:
directory_path = f'{summary_title}/summaries'
joined_summaries = read_and_join_text_from_files(directory_path)
files = get_sorted_files_by_date(directory_path)
multiple_summaries = len(files) > 1
with open(f'{summary_title}/summaries/joined_summaries.txt', 'w') as file:
    file.write(joined_summaries)
if multiple_summaries:
    print(f"Summarizing joined summaries with {len(joined_summaries.split(' '))} words")
    messages = [{"role": "user", "content": "Make the following summaries flow as one long and detailed article: \n" + joined_summaries.rstrip()}]
    with open(f"{summary_title}/instructions/instruction.txt", "a") as f:
      for message in messages:
        f.write(message["content"] + "\n")

    sum = summarize(input=messages, model="gpt-4")
else:
    sum = joined_summaries
with open(f"{summary_title}/summary.txt", "w") as f:
    f.write(sum)

In [None]:
import io
from openai import OpenAI
from pydub import AudioSegment
from pydub.playback import play
import pygame

client = OpenAI()

with open(f"{summary_title}/summary.txt", "r") as f:
    story = f.read()

def save_audio(response, count):
    with open(f"{summary_title}/openai_audio/story_{count}.mp3", "wb") as f:
        f.write(response["AudioStream"].read())

def stream_and_play(text, count):

  response = client.audio.speech.create(
    model="tts-1",
    voice="alloy",
    input=text,
  )

  # Convert the binary response content to a byte stream
  # byte_stream = io.BytesIO(response.content)

  # Read the audio data from the byte stream
  # audio = AudioSegment.from_file(byte_stream, format="mp3")
  response.stream_to_file(f"{summary_title}/openai_audio/story_{count}.mp3")
  # save_audio(audio, count)
  # Play the audio
  # play(audio)

for count, section in enumerate(story.split('\n\n')):
  audio = stream_and_play(section, count)
# stream_and_play()




In [None]:
def read_audio(file):
  pygame.mixer.init()
  pygame.mixer.music.load(file)
  pygame.mixer.music.play()

  while pygame.mixer.music.get_busy():
    # Optional: add a delay to reduce CPU usage
    time.sleep(0.1)  

  return pygame.mixer.music

for i in range(14):
   read_audio(f"{summary_title}/openai_audio/story_{i}.mp3")

In [None]:
import boto3
# voice_id = "Matthew"
# voice_id = "Ruth"
voice_id = "Stephen"
output_format = "mp3"
import pygame

if os.path.exists(summary_title+"/audio"):
    shutil.rmtree(summary_title+"/audio")
create_directory(f"{summary_title}/audio")

# Create an Amazon Polly client
polly_client = boto3.Session(
    aws_access_key_id=os.getenv("polly_access_key_id"),
    aws_secret_access_key=os.getenv("polly_secret_key"),
    region_name='ca-central-1').client('polly')

def generate_audio(input, count):
  start_time = time.time()
  audio = polly_client.synthesize_speech(
      Text=input,
      VoiceId=voice_id,
      OutputFormat=output_format,
      Engine="neural"
      )
  save_audio(audio, count)
  return audio

def save_audio(response, count):
  with open(f"{summary_title}/audio/story_{count}.mp3", "wb") as f:
      f.write(response["AudioStream"].read())

def read_audio(file):
  pygame.mixer.init()
  pygame.mixer.music.load(file)
  pygame.mixer.music.play()

  while pygame.mixer.music.get_busy():
    # Optional: add a delay to reduce CPU usage
    time.sleep(0.1)  

  return pygame.mixer.music

for i in range(count):
  media_player = read_audio(f"{summary_title}/audio/story_{i}.mp3")

In [None]:
with open(f"{summary_title}/summary.txt", "r") as f:
  story = f.read()
count = 0
for section in story.split('\n\n'):
  audio = generate_audio(section, count)
  count += 1

In [None]:
for i in range(count):
  section = story.split('\n\n')[i]
  print(i, section)
  media_player = read_audio(f"{summary_title}/audio/story_{i}.mp3")