<a href="https://colab.research.google.com/github/kajian1008/Whisper_test/blob/main/okoshite.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers librosa pydub
!apt-get install ffmpeg

In [None]:
import os
import glob
import torch
from transformers import WhisperForConditionalGeneration, WhisperProcessor
import librosa
from pydub import AudioSegment

In [None]:
model_path = "/content/drive/MyDrive/models100+"
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = WhisperForConditionalGeneration.from_pretrained(model_path).to(device)
processor = WhisperProcessor.from_pretrained(model_path)

In [None]:
def split_audio(file_path, segment_length=30):
    audio = AudioSegment.from_file(file_path)
    segments = []

    for start in range(0, len(audio), segment_length * 1000):
        end = min(start + segment_length * 1000, len(audio))
        segment = audio[start:end]
        segments.append(segment)

    return segments

In [None]:
audio_files = glob.glob("/content/drive/MyDrive/*.mp3")
os.makedirs("/content/drive/MyDrive/テキスト", exist_ok=True)

for input_audio_file in audio_files:
    print(f"Processing: {input_audio_file}")
    segments = split_audio(input_audio_file)
    full_transcription = []

    for i, segment in enumerate(segments):
        segment_file_path = f"/content/drive/MyDrive/テキスト/temp_segment_{i}.mp3"
        segment.export(segment_file_path, format="mp3")

        try:
            audio_data, sampling_rate = librosa.load(segment_file_path, sr=16000)
            inputs = processor(audio_data, return_tensors="pt", sampling_rate=sampling_rate).to(device)
            outputs = model.generate(**inputs)
            transcription = processor.batch_decode(outputs, skip_special_tokens=True)
            clean_transcription = [text.replace("['", "").replace("']", "") for text in transcription]
            full_transcription.extend(clean_transcription)
            os.remove(segment_file_path)
        except Exception as e:
            print(f"Error processing segment {i} of {input_audio_file}: {e}")

    text_file_name = f"/content/drive/MyDrive/テキスト/{os.path.basename(input_audio_file).split('.')[0]}.txt"
    with open(text_file_name, "w", encoding="utf-8") as f:
        for line in full_transcription:
            f.write(line + "\n")
    print(f"{text_file_name} に文字起こしを保存しました。")
    os.remove(input_audio_file)

print("すべてのファイルの処理が完了しました。")