In [20]:
import subprocess
from pydub import AudioSegment
import math


def extract_audio_from_video(video_path, audio_path):
    command = [
        "ffmpeg",
        "-i",
        video_path,
        "-vn",
        audio_path,
    ]
    subprocess.run(command)


def cut_audio_in_chunks(audio_path, chunk_size, chunks_folder):
    track = AudioSegment.from_mp3(audio_path) # AudioSegment.from_mp3()를 이용하여 mp3 파일을 불러옵니다.
    chunk_len = chunk_size * 60 * 1000 # chunk size를 밀리초로 변환합니다. (pydub은 밀리초 단위로 시간을 다룹니다.)
    chunks = math.ceil(len(track) / chunk_len) # 오디오의 길이를 10분으로 나누어 몇 개의 10분 오디오가 나오는지 확인합니다. 예를 들어 7.3이라면 7덩이의 오디오 뭉치가 생깁니다. 그러나 0.3에 해당하는 부분은 생성할 수 없으므로 8개를 생성해야합니다.
    for i in range(chunks): # 각 덩어리의 시작과 종료 시간을 찾아보자 
        start_time = i * chunk_len # 시작 시간을 계산합니다.
        end_time = (i + 1) * chunk_len # 종료 시간을 계산합니다

        chunk = track[start_time:end_time]

        chunk.export(f"{chunks_folder}/chunk_{i}.mp3", format="mp3")

In [23]:
cut_audio_in_chunks("./files/podcast.mp3", 10, "./files/chunks")

### 긴 오디오 파일을 10분 길이의 mp3 파일들로 변환

# Whisper Transcript

In [1]:
import openai 

transcript = openai.Audio.transcribe("whisper-1",
                                     open("./files/chunks/chunk_0.mp3","rb"),
                                     )
                                    
transcript 

<OpenAIObject at 0x7f2d9c360e00> JSON: {
  "text": "The following is a conversation with Elon Musk, his fourth time on this, The Lex Friedman Podcast. \u266a Whistling \u266a Ha ha ha. I thought you were gonna finish it. It's one of the greatest themes in all of film history. \u266a Whistling \u266a Yeah, that's great. So, I was just thinking about the Roman Empire, as one does. Ha ha ha ha ha. There's that whole meme where all guys are thinking about the Roman Empire at least once a day. And half the population's confused whether it's true or not. But, more seriously, thinking about the wars going on in the world today. And as you know, war and military conquest has been a big part of Roman society and culture. And it, I think, has been a big part of most empires and dynasties throughout human history, so. Yeah, they usually came as a result of conquest. I mean, there's some like the Austro-Hungarian Empire where there was just a lot of sort of clever marriages. But fundamentally, the

In [3]:
from typing import final
import glob


def transcribe_chunks(chunk_folder, destination):
    files = glob.glob(f"{chunk_folder}/*.mp3")
    final_transcript = ""
    for file in files:
        with open(file, "rb") as audio_file:
            transcript = openai.Audio.transcribe(
                "whisper-1",
                audio_file,
            )
            final_transcript += transcript["text"]
    with open(destination, "w") as file:                     # "w"는 쓰기 모드를 의미합니다. 파일이 없으면 새로 생성하고, 파일이 있으면 덮어씁니다. 
        file.write(final_transcript)