In [None]:
!pip install transformers huggingface_hub

from huggingface_hub import login
login(token="토큰값 입력하세요")

import torchaudio
from transformers import WhisperProcessor, WhisperForConditionalGeneration
import re

audio_file = "/content/development_meeting_script.wav"

model_name = "openai/whisper-medium"
processor = WhisperProcessor.from_pretrained(model_name)
model = WhisperForConditionalGeneration.from_pretrained(model_name)

audio_input, sample_rate = torchaudio.load(audio_file)

# 오디오 청크 처리 함수
def transcribe_chunk(audio_chunk):
    input_features = processor(
        audio_chunk.squeeze(0),
        sampling_rate=16000,
        return_tensors="pt",
        language="ko"
    ).input_features
    generated_ids = model.generate(input_features, max_new_tokens=300)
    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
    return transcription[0]

# 청크 길이 설정
chunk_length = int(20 * sample_rate)
num_chunks = (audio_input.shape[1] + chunk_length - 1) // chunk_length

full_transcription = []

for i in range(num_chunks):
    start = i * chunk_length
    end = min((i + 1) * chunk_length, audio_input.shape[1])
    audio_chunk = audio_input[:, start:end]
    transcription = transcribe_chunk(audio_chunk)
    full_transcription.append(transcription)

text = " ".join(full_transcription)
formatted_text = re.sub(r'([.!?])\s*', r'\1\n', text)

print(formatted_text)

with open("/content/formatted_text.txt", "w") as f:
    f.write(formatted_text)

print("Transcription complete. Result saved to formatted_text.txt")
