## Downloading and Transcribing a Youtube CLIP

In [6]:
from faster_whisper import WhisperModel
import yt_dlp
import os

In [17]:
def download_audio(youtube_url, out_dir="downloads"):
    os.makedirs(out_dir, exist_ok=True)
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': f'{out_dir}/%(id)s.%(ext)s',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
        }]
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(youtube_url, download=True)
        return f"{out_dir}/{info['id']}.mp3", info['id']
    
def transcribe(audio_path, model_size="medium", output_dir="transcripts"):
    model = WhisperModel(model_size, compute_type="int8")
    segments, _ = model.transcribe(audio_path)
    os.makedirs(output_dir, exist_ok=True)
    text_chunks = []
    for seg in segments:
        text_chunks.append({
            "start": seg.start,
            "end": seg.end,
            "text": seg.text
        })

    output_path = os.path.join(output_dir, os.path.basename(audio_path).replace(".mp3", ".txt"))
    with open(output_path, "w") as f:
        for chunk in text_chunks:
            f.write(f"{chunk['text']}\n")

    print(f"Transcription saved to: {output_path}")
    return None

In [19]:
download_audio('https://www.youtube.com/watch?v=b5-xIieufYs', out_dir = '~/Downloads/')

[youtube] Extracting URL: https://www.youtube.com/watch?v=b5-xIieufYs
[youtube] b5-xIieufYs: Downloading webpage
[youtube] b5-xIieufYs: Downloading tv client config
[youtube] b5-xIieufYs: Downloading tv player API JSON
[youtube] b5-xIieufYs: Downloading ios player API JSON
[youtube] b5-xIieufYs: Downloading m3u8 information
[info] b5-xIieufYs: Downloading 1 format(s): 251
[download] Destination: /Users/jpoberhauser/Downloads//b5-xIieufYs.webm
[download] 100% of   52.21MiB in 00:00:01 at 29.55MiB/s    
[ExtractAudio] Destination: /Users/jpoberhauser/Downloads//b5-xIieufYs.mp3
Deleting original file /Users/jpoberhauser/Downloads//b5-xIieufYs.webm (pass -k to keep)


('~/Downloads//b5-xIieufYs.mp3', 'b5-xIieufYs')

This is example is a 24 minute youtube clip and it gets compeltey transcribed in ~13 minutes using the medium model and in using the small model ~5 minutes

In [20]:
transcribe('/Users/jpoberhauser/Desktop/baseballCompanion/data/b5-xIieufYs.mp3',
            model_size="small", 
            output_dir="/Users/jpoberhauser/Desktop/baseballCompanion/data/transcripts/")

Transcription saved to: /Users/jpoberhauser/Desktop/baseballCompanion/data/transcripts/b5-xIieufYs.txt
