In [None]:
from faster_whisper import WhisperModel
import yt_dlp
import os

In [None]:
def download_audio(youtube_url, out_dir="downloads"):
    os.makedirs(out_dir, exist_ok=True)
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': f'{out_dir}/%(id)s.%(ext)s',
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
        }]
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(youtube_url, download=True)
        return f"{out_dir}/{info['id']}.mp3", info['id']
    
def transcribe(audio_path, model_size="medium", output_dir="transcripts"):
    model = WhisperModel(model_size, compute_type="int8")
    segments, _ = model.transcribe(audio_path)
    os.makedirs(output_dir, exist_ok=True)
    text_chunks = []
    for seg in segments:
        text_chunks.append({
            "start": seg.start,
            "end": seg.end,
            "text": seg.text
        })

    output_path = os.path.join(output_dir, os.path.basename(audio_path).replace(".mp3", ".txt"))
    with open(output_path, "w") as f:
        for chunk in text_chunks:
            f.write(f"{chunk['text']}\n")

    print(f"Transcription saved to: {output_path}")
    return None

In [None]:
download_audio('https://www.youtube.com/watch?v=CFtDihdNwJk', out_dir = '~/Downloads/')

In [None]:
transcribe('~/Downloads/CFtDihdNwJk.mp3',
            model_size="small", 
            output_dir="/Users/jpoberhauser/Desktop/baseballCompanion/data/transcripts/")

## Now, you have the entire text transcribed as a .txt like:

 Luis Robert, trade rumors are real.
 Bob Nightingale has reported that Luis Robert Jr.
 and the New York Mets have been linked in possible trades.
 The White Sox are looking at Mets prospects.
 We're gonna talk about everything that there is
 to possibly talk about with these Luis Robert trade rumors
 in today's YouTube video and podcast episode.
 Make sure you are subscribed to the Mets on Podcast,
 YouTube channel, so don't miss out on any of the content
 coming at you, videos after every single series,
 and a third bonus episode every single week.
 So you're gonna want to stick around and see that.
 And if you are listening to us on Apple,
 podcast, Spotify, Google, whatever it is,
 drop us a reading, drop us a review,
 download and subscribe.
 We really do appreciate it.
 James, when I saw this news,
 I know we've talked about this off camera many a times.
 Luis Robert would be awesome on this team.
 He'd be fantastic to have.
 He'd be kind of perfect,
 because when you look up and down
 this Mets roster right now,
 there's, they're really strong and deep,
 basically every single position of the starting nine.
 I guess of the starting eight,
 really beside center field.
 I know that we love Tyrone Taylor.
 He's become a bit of a cult hero for Mets fans.
 He walks up to Ice Cube.
 He was really clutching the playoffs last year.
 Really good defensive center fielder.
 He's a solid ball player.
 Jose Seria is out.
 He will be back at some point.
 But again, if you look at this team up and down,
 you're like, what position is the one
 of the guys on the field
 that would be the most worth upgrading?
 And of the lineup,
 it's very clearly center field.
 Luis Robert Jr.'s that spot right now
 where he has two years of team options left
 on his White Sox contract,
 $20 million each for the next two seasons
 and both totally voluntary money.
 So the White Sox can just get rid of him
 from the end of the season if they don't want to 