### 1. MP4 파일을 MP3로 파일 변환

In [None]:
from moviepy.editor import *

video = VideoFileClip('./test_video.mp4')

video.audio.write_audiofile('./test_video.mp3')

### 2. 텍스트 추출

In [None]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

device = "cuda" if torch.cuda.is_available() else 'cpu'
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "openai/whisper-large-v3"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)

model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens = 128,
    chunk_length_s = 30,
    batch_size = 16,
    return_timestamps = True,
    torch_dtype=torch_dtype,
    device=device
)

speech_output_path = './test_video.mp3'
result_openai = pipe(speech_output_path)

with open('stt_file.txt', 'w', encoding='utf-8') as stt_file:
    stt_file.write(result_openai["text"])

### 3. 이전 내용 요약

In [None]:
import os
os.environ["OPENAI_API_KEY"] =""

from langchain.document_loaders import TextLoader

loader = TextLoader('./stt_file.txt', encoding='utf-8')
document = loader.load()


from langchain.chains.summarize import load_summarize_chain
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=3000, chunk_overlap=300)

def summarize_review(review):
    docs = text_splitter.create_documents([review])

    map_prompt_template = '''다음 복숭아 제품을 판매하는 홈쇼핑 대본을 참고하여 제품의 장단점을 bullet으로 요약해줘
    대본 : {text}
    '''
    combine_prompt_template = '''다음 bullet summary를 종합하여 홈쇼핑 제품의 특징을 요약해줘:{text}
    '''
    
    MAP_PROMPT = PromptTemplate(template=map_prompt_template, input_variables=["text"])
    COMBINE_PROMPT = PromptTemplate(template=combine_prompt_template, input_variables=["text"])
    
    chain = load_summarize_chain(ChatOpenAI(temperature=0, model_name = 'gpt-4o-mini'),
                                  chain_type="map_reduce", return_intermediate_steps=True,
                                  map_prompt=MAP_PROMPT, combine_prompt=COMBINE_PROMPT)
    
    return chain({"input_documents": docs}, return_only_outputs=True)

summarize_review(document[0].page_content)

### 4. 함수화

In [None]:
import os
import torch
from moviepy.editor import *
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from langchain.document_loaders import TextLoader
from langchain.chains.summarize import load_summarize_chain
from langchain import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter

os.environ["OPENAI_API_KEY"] = ""
chat_model = ChatOpenAI(temperature=0, model_name = 'gpt-4o-mini')

def video2voice(video_path):
    video = VideoFileClip(video_path)
    video.audio.write_audiofile('./voice/test_video.mp3')
    
def voice2text(voice_path):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
    model_id = "openai/whisper-large-v3"

    model = AutoModelForSpeechSeq2Seq.from_pretrained(
        model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
    )

    model.to(device)
    processor = AutoProcessor.from_pretrained(model_id)
    pipe = pipeline(
        "automatic-speech-recognition",
        model=model,
        tokenizer=processor.tokenizer,
        feature_extractor=processor.feature_extractor,
        max_new_tokens = 128,
        chunk_length_s = 30,
        batch_size = 16,
        return_timestamps = True,
        torch_dtype=torch_dtype,
        device=device
    )
    
    result_openai = pipe(voice_path)
    
    with open('./text/stt_file.txt', 'w', encoding='utf-8') as stt_file:
        stt_file.write(result_openai["text"])

def before_streaming_summarize(chat_model, text_path):
    
    loader = TextLoader(text_path, encoding='utf-8')
    document = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=3000, chunk_overlap=300)
    
    docs = text_splitter.create_documents([document[0].page_content])

    map_prompt_template = '''
        너는 라이브 커머스 방송 매니저야
        {text}에서 시청자와 소통했던 내용 및 이벤트 진행상황에 대해서만 말해
    '''
    combine_prompt_template = '''
        {text}를 시청하지 못하면 알 수 없을 내용들을 작성해
    '''
    
    MAP_PROMPT = PromptTemplate(template=map_prompt_template, input_variables=["text"])
    COMBINE_PROMPT = PromptTemplate(template=combine_prompt_template, input_variables=["text"])
    
    chain = load_summarize_chain(chat_model, chain_type="map_reduce", return_intermediate_steps=True,
                                  map_prompt=MAP_PROMPT, combine_prompt=COMBINE_PROMPT)
    
    return chain({"input_documents": docs}, return_only_outputs=True)
