In [5]:
import subprocess
from pydub import AudioSegment
import math
import openai
import glob
import os

In [7]:
def extract_audio_from_video(video_path, audio_path):
    command = [
        "ffmpeg",
        "-i",
        video_path,
        "-vn",
        audio_path,
    ]
    subprocess.run(command)


def cut_audio_in_chunks(audio_path, chunk_size, chunks_folder):
    track = AudioSegment.from_mp3(audio_path)
    chunk_len = chunk_size * 60 * 1000
    chunks = math.ceil(len(track) / chunk_len)
    for i in range(chunks):
        start_time = i * chunk_len
        end_time = (i + 1) * chunk_len
        chunk = track[start_time:end_time]
        chunk.export(
            f"./{chunks_folder}/chunk_{i}.mp3",
            format="mp3",
        )


def transcribe_chunks(chunk_folder, file_name):
    files = glob.glob(f"{chunk_folder}/*.mp3")
    save_path = os.path.join(chunk_folder, file_name)
    for file in files:
        with open(file, "rb") as audio_file, open(save_path, "a") as text_file:
            transcript = openai.Audio.transcribe(
                "whisper-1",
                audio_file,
            )
            text_file.write(transcript["text"])

In [8]:
import glob

# Usage example
def get_mp4_files(path):
    mp4_files = glob.glob(path + "/*.mp4")
    return mp4_files

# 사용 예시
file_paths = get_mp4_files("/home/kskim/summarygen/fastcampus_video")
print(file_paths)
chunk_size = 10  # in minutes

def process_multiple_files(file_paths, chunk_size):
    for file_path in file_paths:
        # Extract audio from video
        audio_path = os.path.splitext(os.path.basename(file_path))[0] + ".mp3"
        extract_audio_from_video(file_path, audio_path)
        
        # Get the original file name without extension
        file_name = os.path.splitext(os.path.basename(file_path))[0]
        
        # Create chunks folder if it doesn't exist
        chunks_folder = f"{file_name}_chunks"
        if not os.path.exists(chunks_folder):
            os.makedirs(chunks_folder)
        
        # Cut audio into chunks
        cut_audio_in_chunks(audio_path, chunk_size, chunks_folder)
        
        # Transcribe chunks
        transcribe_chunks(chunks_folder, file_name)
        
        # Remove temporary audio file
        os.remove(audio_path)

['/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_2강 - GPT 시리즈와 발전 과정.mp4', '/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_4강 - Auto GPT 더 유용하게(활용법).mp4', '/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_4강 - 서비스 관점에서 ChatGPT의 기능적 보완을 위한 플러그인 제작(실습).mp4', '/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_3강 - ChatGPT와 그 생태계(2).mp4', '/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_3강 - GPT에게 일을 시킬 때 주의해야 하는 부분.mp4', '/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_4강 - Langchain 개념과 실습.mp4', '/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_3강 - ChatGPT와 그 생태계(1).mp4', '/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_2강 - Auto GPT 심화.mp4', '/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_2강 - Chat GPT를 활용한 다양한 시도와 원칙.mp4', '/home/kskim/summarygen/fastcampus_video/모두를 위한 ChatGPT UP! VOD_1강 - Auto GPT에 대한 간략한 소개와 활용 방법 전반.mp4', '/home/kskim/summarygen/fas

In [None]:
process_multiple_files(file_paths, chunk_size)
