In [1]:
!pip install yt-dlp openai python-dotenv ipykernel



In [39]:

import os
import subprocess
import openai
from openai import OpenAI
import uuid
from dotenv import load_dotenv

# Load environment variables
load_dotenv(dotenv_path="../env_file/.env")


True

In [41]:

def download_audio(video_url):
    try:
        unique_id = str(uuid.uuid4())
        output_filename = f"downloaded_audio_{unique_id}.mp3"
        command = [
            'yt-dlp',
            '-f', 'bestaudio',
            '--extract-audio',
            '--audio-format', 'mp3',
            '--ffmpeg-location', '/opt/homebrew/bin/ffmpeg',
            '--output', output_filename.replace(".mp3", ".%(ext)s"),
            video_url
        ]
        subprocess.run(command, check=True)
        return output_filename
    except Exception as e:
        print(f"Error downloading audio: {e}")
        return None


In [51]:
def transcribe_audio(audio_path):
    try:
        client = OpenAI()
        with open(audio_path, "rb") as audio_file:
            transcript = client.audio.transcriptions.create(
                model="whisper-1",
                file=audio_file,
                response_format="verbose_json"
            )
        text = transcript.text
        segments = transcript.segments
        return text, segments
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        return None, None


In [59]:
def format_segments_with_timestamps(segments):
    formatted = []
    for seg in segments:
        start = int(seg.start)
        minutes = start // 60
        seconds = start % 60
        timestamp = f"{minutes}:{seconds:02d}"
        formatted.append(f"[{timestamp}] {seg.text}")
    return "\n".join(formatted)

In [61]:
def summarize_text(transcript, segments, mode="basic"):
    prompts = {
        "basic": f"Summarize the following transcript:\n\n{transcript}",
        "bullets": f"Summarize the following transcript into concise bullet points:\n\n{transcript}",
        "quotes": f"Extract 5-10 of the most compelling or insightful quotes from this transcript. Include timestamps:\n\n{format_segments_with_timestamps(segments)}",
        "insights": f"Summarize the following transcript by extracting the key insights and implications. Be concise but thoughtful:\n\n{transcript}"
    }

    prompt = prompts.get(mode, prompts["basic"])

    try:
        client = OpenAI()
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=700
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error generating summary: {e}")
        return None


In [69]:

video_url = input("Enter YouTube URL: ").strip()
mode = input("Choose summary style (basic, bullets, quotes, insights): ").strip().lower()
show_transcript = input("Would you like to see the full transcript first? (yes/no): ").strip().lower()

audio_file = download_audio(video_url)

if audio_file:
    transcript_text, segments = transcribe_audio(audio_file)
    if transcript_text:
        if show_transcript == "yes":
            print("\n--- Transcript fetched ---\n")
            print(transcript_text)

        print("\n--- Summary ---\n")
        print(summarize_text(transcript_text, segments, mode))
    else:
        print("Transcription failed.")
else:
    print("Audio download failed.")


Enter YouTube URL:  https://www.youtube.com/watch?v=kE5iJgFm04Y
Choose summary style (basic, bullets, quotes, insights):  quotes
Would you like to see the full transcript first? (yes/no):  no


[youtube] Extracting URL: https://www.youtube.com/watch?v=kE5iJgFm04Y
[youtube] kE5iJgFm04Y: Downloading webpage
[youtube] kE5iJgFm04Y: Downloading tv client config
[youtube] kE5iJgFm04Y: Downloading tv player API JSON
[youtube] kE5iJgFm04Y: Downloading ios player API JSON
[youtube] kE5iJgFm04Y: Downloading m3u8 information
[info] kE5iJgFm04Y: Downloading 1 format(s): 251
[download] Destination: downloaded_audio_71074d1c-c084-4a20-be29-1e179dea78bd.webm
[download] 100% of   10.57MiB in 00:00:01 at 7.34MiB/s   
[ExtractAudio] Destination: downloaded_audio_71074d1c-c084-4a20-be29-1e179dea78bd.mp3
Deleting original file downloaded_audio_71074d1c-c084-4a20-be29-1e179dea78bd.webm (pass -k to keep)

--- Summary ---

1. [0:28] "Clearly people did it, but I don't know how people figured out how to take care of newborns without ChatGBT. That has been a real lifesaver."
2. [0:52] "I'm like, wait, you're texting your dad and you don't – it is something about the new generation that it is so seaml