# Audio Extraction

In [1]:
import os

In [2]:
"""
첫번째 파트 : 동영상에서 오디오만 추출하기
두번째 파트 : 오디오를 10분 단위로 분할하기

자른 각각의 오디오를 Whisper API 에게 넘겨주고, 응답을 받아 한개의 파일로 저장할겁니다
그렇게 하여 10분 분량 오디오들을 모두 합친 전체 녹취록을 얻어낼 겁니다

"""
None

## MP4 -> MP3 음성 추출

### 오디오 추출 함수

In [3]:
# ffmpeg 명령을 파이썬 코드에서 실행시키기 위해.
import subprocess

In [4]:
base_path = r'D:\NLP2501\dataset'  # 동영상 경로
out_path = r'D:\NLP2501\dataset\out' # 분할 오디오 추출 경로

video_file = 'podcast.mp4'
audio_file = 'audio.mp3'

src_path = os.path.join(base_path, video_file)
dst_path = os.path.join(base_path, audio_file)


In [5]:
def extract_autio_from_video(video_path, audio_path):
    # CLI 에서 실행할 command 준비

    # 커맨드창 (CLI) 에서 명령
    #  > ffmpeg -i D:\NLP2501\dataset\podcast.mp4 -vn D:\NLP2501\dataset\audio.mp3 -y
    command = ["ffmpeg", "-i", video_path, "-vn", audio_path, "-y"]
    subprocess.run(command)

    

In [6]:
extract_autio_from_video(src_path, dst_path)

# Cutting the Audio

In [7]:
# pydub 패키지
#  공식: https://github.com/jiaaro/pydub 
#  pip install pydub  <- 설치 필요  (사전에 ffmpeg 가 설치되고 경로 설정도 되어 있어야 한다)

In [8]:
from pydub import AudioSegment

In [9]:
dst_path

'D:\\NLP2501\\dataset\\audio.mp3'

In [10]:
track = AudioSegment.from_mp3(dst_path)

In [35]:
# track

In [12]:
track.duration_seconds  # 총 시간 (초)

4422.426122448979

In [13]:
len(track)  # ms

4422426

In [14]:
# 오디오의 첫 5분 
five_minutes = 5 * 60 * 1000   # 단위 ms
first_five = track[:five_minutes]

In [34]:
# first_five

In [16]:
first_five.duration_seconds

300.0

In [18]:
# 잘라낸 오디오를 export
first_five.export(os.path.join(out_path, 'first_five.mp3'), format='mp3')

<_io.BufferedRandom name='D:\\NLP2501\\dataset\\out\\first_five.mp3'>

## 오디오 파일 분할

In [19]:
# Whisper API 는 최대 10m 분량의 오디오 입력

In [20]:
ten_minutes = 10 * 60 * 1000

In [22]:
import math
chunks = math.ceil(len(track) / ten_minutes)

chunks

8

In [23]:
# 각 오디오 chunk 들의 시작과 종료시간 계산
for i in range(chunks):
    start_time = i * ten_minutes
    end_time = (i + 1) * ten_minutes
    print(f"{i} start: {start_time}  end {end_time}")

0 start: 0  end 600000
1 start: 600000  end 1200000
2 start: 1200000  end 1800000
3 start: 1800000  end 2400000
4 start: 2400000  end 3000000
5 start: 3000000  end 3600000
6 start: 3600000  end 4200000
7 start: 4200000  end 4800000


In [24]:
for i in range(chunks):
    start_time = i * ten_minutes
    end_time = (i + 1) * ten_minutes

    chunk = track[start_time: end_time]
    chunk.export(os.path.join(out_path, f'chunk_{i}.mp3'), format='mp3')

# Whisper Transcript

In [25]:
import openai

In [28]:
# 오디오 -> 녹취록 생성
transcript = openai.audio.transcriptions.create(
    model='whisper-1',
    file=open(os.path.join(out_path, 'chunk_0.mp3'), 'rb'),
    language="en",
)

In [27]:
transcript.text[:1000]

"If success is this lagging indicator of commitment now, how can you be sure that you are paying your dues? The best-selling author and host. The number one health and wellness podcast. On Purpose with Jay Shetty. Society has gone in the direction of becoming addicted to pleasure. Yes. Or pleasure-seeking. Where, from the Stoic's perspective, why did we even ever go down that road? Like, why did we leave wisdom and self-control? Or did we never have it at all and we've always been trying to balance it? Yeah. I mean, I guess that's the big question is, like, why do we take something that we like too far? Yeah. Right. So the Epicureans would say like, look, drinking is great, but if you have a hangover the next day, was it actually so great? And so, you know, if you, if you push the pleasure too far, it becomes not pleasurable, but in the moment that feels very far away, right? Like in the moment you want the thing now, obviously sex is this thing for people, it's like the thing you're a

In [33]:
# first_five

## glob 이슈

In [30]:
import glob

In [32]:
files = glob.glob(os.path.join(out_path, "chunk_*.mp3"))
files.sort()

['D:\\NLP2501\\dataset\\out\\chunk_0.mp3',
 'D:\\NLP2501\\dataset\\out\\chunk_1.mp3',
 'D:\\NLP2501\\dataset\\out\\chunk_2.mp3',
 'D:\\NLP2501\\dataset\\out\\chunk_3.mp3',
 'D:\\NLP2501\\dataset\\out\\chunk_4.mp3',
 'D:\\NLP2501\\dataset\\out\\chunk_5.mp3',
 'D:\\NLP2501\\dataset\\out\\chunk_6.mp3',
 'D:\\NLP2501\\dataset\\out\\chunk_7.mp3']