In [None]:
# 1. 安裝必要的依賴庫
# 在Colab中執行以下命令來安裝所需套件
!pip install -q transformers torchaudio
!pip install -q git+https://github.com/openai/whisper.git

# 2. 導入必要的庫
import os
import json
import torch
import whisper
from google.colab import drive

# 3. 掛載Google Drive
# 掛載Drive以訪問WAV檔案
drive.mount('/content/drive')

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Mounted at /content/drive


In [None]:
# 4. 設置環境和參數
# 檢查是否有GPU可用
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# 設定輸入和輸出路徑
input_folder = "/content/drive/MyDrive/2025AWS/FEniX訓練資料/audio_segments"  # WAV檔案資料夾
output_folder = "/content/drive/MyDrive/2025AWS/FEniX_Text"  # 儲存轉錄結果的資料夾
os.makedirs(output_folder, exist_ok=True)


Using device: cuda


In [None]:
# 初始化Whisper模型
# 使用 'large-v3' 模型以獲得高準確度，根據需求可改為 'small' 或 'medium'
try:
    model = whisper.load_model("large-v3").to(device)
except Exception as e:
    print(f"Error loading Whisper model: {e}")
    raise

100%|█████████████████████████████████████| 2.88G/2.88G [01:16<00:00, 40.3MiB/s]


In [None]:
# 5. 定義語音轉文字函數
def transcribe_audio(audio_path):
    """使用Whisper模型將WAV音頻轉為文字"""
    try:
        result = model.transcribe(audio_path, language="zh")  # 設為中文，根據需求可改
        return result["text"]
    except Exception as e:
        print(f"Error transcribing {audio_path}: {e}")
        return ""

# 6. 主處理邏輯
def process_wav_files(input_folder, output_folder):
    """處理所有WAV檔案並按講者分類轉錄"""
    transcriptions = {}  # 儲存所有講者的轉錄結果

    # 遍歷輸入資料夾中的所有WAV檔案
    for filename in os.listdir(input_folder):
        if filename.endswith(".wav"):
            # 提取講者ID（假設檔案名以SPEAKER_XX開頭）
            speaker_id = filename.split("_")[0]  # e.g., SPEAKER_00
            if speaker_id not in transcriptions:
                transcriptions[speaker_id] = []

            # 設定路徑
            wav_path = os.path.join(input_folder, filename)
            print(f"Processing {filename}...")

            # 轉錄音頻
            text = transcribe_audio(wav_path)
            if text:
                transcriptions[speaker_id].append({
                    "filename": filename,
                    "text": text
                })

    # 儲存轉錄結果到JSON檔案
    output_json = os.path.join(output_folder, "transcriptions.json")
    with open(output_json, "w", encoding="utf-8") as f:
        json.dump(transcriptions, f, ensure_ascii=False, indent=4)
    print(f"Transcriptions saved to {output_json}")


In [None]:
# 7. 執行處理
if __name__ == "__main__":
    # 確保輸入資料夾存在
    if not os.path.exists(input_folder):
        print(f"Input folder {input_folder} does not exist. Please check the path.")
    else:
        process_wav_files(input_folder, output_folder)

Processing SPEAKER_01_0-1.wav...
Processing SPEAKER_00_7-7.wav...
Processing SPEAKER_01_7-11.wav...
Processing SPEAKER_00_8-12.wav...
Processing SPEAKER_01_14-14.wav...
Processing SPEAKER_00_13-15.wav...
Processing SPEAKER_03_15-19.wav...
Processing SPEAKER_00_20-27.wav...
Processing SPEAKER_01_19-19.wav...
Processing SPEAKER_01_27-28.wav...
Processing SPEAKER_02_17-19.wav...
Processing SPEAKER_01_29-30.wav...
Processing SPEAKER_00_28-30.wav...
Processing SPEAKER_00_30-44.wav...
Processing SPEAKER_00_44-45.wav...
Processing SPEAKER_00_45-60.wav...
Processing SPEAKER_01_46-46.wav...
Processing SPEAKER_01_60-65.wav...
Processing SPEAKER_01_67-72.wav...
Processing SPEAKER_00_66-67.wav...
Processing SPEAKER_00_67-67.wav...
Processing SPEAKER_01_74-74.wav...
Processing SPEAKER_01_78-78.wav...
Processing SPEAKER_03_77-78.wav...
Processing SPEAKER_01_80-88.wav...
Processing SPEAKER_00_85-86.wav...
Processing SPEAKER_02_78-80.wav...
Processing SPEAKER_02_85-85.wav...
Processing SPEAKER_00_88-9

In [None]:
#設置路徑準備將transcript切分speaker
# 輸入JSON檔案路徑
input_json_path = "/content/drive/MyDrive/2025AWS/FEniX_Text/transcriptions.json"
# 輸出資料夾路徑
output_folder = "/content/drive/MyDrive/2025AWS/FEniX_Text"

# 確保輸出資料夾存在
os.makedirs(output_folder, exist_ok=True)

In [None]:
#定義分割JSON的函數
def split_transcriptions_by_speaker(input_json_path, output_folder):
    """讀取transcriptions.json並按講者分割為單獨的JSON檔案"""
    try:
        # 讀取輸入JSON檔案
        with open(input_json_path, "r", encoding="utf-8") as f:
            data = json.load(f)

        # 檢查JSON結構是否包含"SPEAKER"鍵
        if "SPEAKER" not in data:
            raise ValueError("JSON does not contain 'SPEAKER' key.")

        # 初始化字典來儲存按講者分類的轉錄
        transcriptions_by_speaker = {}

        # 遍歷"SPEAKER"中的每個條目
        for entry in data["SPEAKER"]:
            filename = entry.get("filename", "")
            if not filename or not filename.startswith("SPEAKER_"):
                print(f"Skipping invalid filename: {filename}")
                continue

            # 提取講者ID（例如 SPEAKER_00）
            speaker_id = filename.split("_")[0] + "_" + filename.split("_")[1]
            # 初始化講者ID的列表（如果尚未存在）
            if speaker_id not in transcriptions_by_speaker:
                transcriptions_by_speaker[speaker_id] = []

            # 添加轉錄條目到對應的講者
            transcriptions_by_speaker[speaker_id].append({
                "filename": filename,
                "text": entry.get("text", "")
            })

        # 為每個講者儲存JSON檔案
        for speaker_id, entries in transcriptions_by_speaker.items():
            output_json_path = os.path.join(output_folder, f"{speaker_id}.json")
            with open(output_json_path, "w", encoding="utf-8") as f:
                json.dump(entries, f, ensure_ascii=False, indent=4)
            print(f"Saved {speaker_id}.json to {output_folder}")

        print("All speaker JSON files have been created successfully!")

    except FileNotFoundError:
        print(f"Input file {input_json_path} not found. Please ensure it exists.")
    except json.JSONDecodeError:
        print(f"Error decoding {input_json_path}. Please check the JSON format.")
    except Exception as e:
        print(f"An error occurred: {e}")

In [None]:
# 執行處理
if __name__ == "__main__":
    # 檢查輸入檔案是否存在
    if not os.path.exists(input_json_path):
        print(f"Input file {input_json_path} does not exist. Please check the path.")
    else:
        split_transcriptions_by_speaker(input_json_path, output_folder)

Saved SPEAKER_00.json to /content/drive/MyDrive/2025AWS/FEniX_Text
Saved SPEAKER_01.json to /content/drive/MyDrive/2025AWS/FEniX_Text
Saved SPEAKER_03.json to /content/drive/MyDrive/2025AWS/FEniX_Text
Saved SPEAKER_02.json to /content/drive/MyDrive/2025AWS/FEniX_Text
Saved SPEAKER_04.json to /content/drive/MyDrive/2025AWS/FEniX_Text
All speaker JSON files have been created successfully!
