In [1]:
# ----------------------------
# Cell 1：安裝套件與 UI 設定
# ----------------------------
print("🚀 開始執行 Cell 1：安裝套件與 UI 設定...")

import sys
import os
print(f"🐍 Python version: {sys.version}")
# Confirmed Python 3.11 (cp311 wheels will be used)

# Based on your error log, torch_xla 2.7.0 was listed as available.
# We will try to install torch_xla and let it pull its specific, compatible
# torch, torchvision, and torchaudio versions directly from the XLA releases.
TARGET_TORCH_XLA_VERSION = "2.7.0"

print(f"🎯 Target TorchXLA version: {TARGET_TORCH_XLA_VERSION}")

# 1. Uninstall potentially conflicting packages
print("🔄 Uninstalling torch, torch_xla, torchvision, torchaudio, and fastai to ensure a clean environment...")
!pip uninstall -y torch torch_xla torchvision torchaudio fastai 2>/dev/null || true
print("✅ Uninstallation attempt complete.")

# 2. Install TorchXLA and its XLA-compatible PyTorch/TorchVision/TorchAudio
# This command focuses on installing torch_xla from its specific repository,
# which should ensure that it pulls compatible versions of torch, torchvision, and torchaudio
# that were built together and are ABI-compatible.
PYTORCH_XLA_RELEASES_INDEX = "https://storage.googleapis.com/pytorch-xla-releases/index.html"
# The libtpu-releases index is also important for the underlying TPU libraries.
LIBTPU_RELEASES_INDEX = "https://storage.googleapis.com/libtpu-releases/index.html"

print(f"🔄 Installing TorchXLA {TARGET_TORCH_XLA_VERSION} and its compatible PyTorch dependencies...")
print(f"   This will use Python 3.11 (cp311) compatible wheels from: {PYTORCH_XLA_RELEASES_INDEX} and {LIBTPU_RELEASES_INDEX}")

# This single command should fetch torch_xla and its specific, compatible torch, torchvision, torchaudio.
# It's crucial that pip resolves these from the XLA indices.
!pip install -q \
    torch_xla=={TARGET_TORCH_XLA_VERSION} \
    -f {PYTORCH_XLA_RELEASES_INDEX} \
    -f {LIBTPU_RELEASES_INDEX}

# As a verification, we can try to install specific versions of torch, torchvision, torchaudio
# that are known to be bundled or compatible with torch_xla 2.7.0 from the SAME XLA index.
# For torch_xla 2.7.0 (cp311), the compatible torch is often torch~2.4.0 (XLA build).
# However, the above command should ideally handle this. If it doesn't, this section can be enabled.
# print("🔄 (Optional) Explicitly installing compatible torch, torchvision, torchaudio from XLA index...")
# !pip install -q \
#     torch~=2.4.0 \
#     torchvision~=0.19.0 \
#     torchaudio~=2.4.0 \
#     -f {PYTORCH_XLA_RELEASES_INDEX} \
#     -f {LIBTPU_RELEASES_INDEX} --no-deps
# The --no-deps flag would be to prevent conflicts if torch_xla already installed them.

print("✅ PyTorch/TorchXLA installation attempt complete.")
print("🔍 Verifying installed versions (after restart, these will be effective):")
# These commands might show versions before restart if run immediately.
# The true test is importing after restarting the session.
!pip show torch torch_xla torchvision torchaudio | grep -E "^(Name|Version):" || echo "Verification step: Some packages might not be fully listed until after restart."


# 3. Install Transformers and other utilities
print("🔄 Installing Hugging Face Transformers and other utilities (sentencepiece, librosa, soundfile, ipywidgets, accelerate)...")
!pip install -q "transformers>=4.39.0,<4.43.0" sentencepiece librosa soundfile ipywidgets "accelerate>=0.25.0"
print("✅ Utilities installation complete.")

# 4. Install FFmpeg for audio processing
print("🔄 Updating apt and installing ffmpeg...")
!apt-get update -qq > /dev/null && apt-get install -y -qq ffmpeg > /dev/null
print("✅ ffmpeg installation complete.")

print("\n👍 Cell 1 package installation process finished.")
print("‼️ IMPORTANT: You MUST restart the Colab session now for these changes to take effect.")
print("   Go to 'Session' > 'Restart session' in the Colab menu (or 'Runtime' > 'Restart session').")
print("   After restarting, re-run all cells starting from this Cell 1 (the UI part below will then execute).")

# UI part - This will effectively run after the restart when the user re-runs Cell 1
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# Clear previous output from installations for a cleaner UI display when re-run after restart
clear_output(wait=True) # This clears the installation logs from above when the cell is re-run
print("🔄 Session likely restarted (or this is the first run of UI part). Displaying UI configuration options...")
print(f"🐍 Python version: {sys.version}") # Re-print python version for context

display(HTML("""
<style>
    .widget-label { min-width: 20ex !important; }
    .widget-dropdown > select { background-color: #f0f0f0; border-radius: 4px; }
    .widget-text input[type="text"] { background-color: #f0f0f0; border-radius: 4px; }
    .widget-button { background-color: #4CAF50 !important; color: white !important; border-radius: 5px; }
    .widget-inttext input[type="number"] { background-color: #f0f0f0; border-radius: 4px; }
    .settings-box { padding: 15px; border: 1px solid #ccc; border-radius: 8px; background-color: #fafafa; box-shadow: 2px 2px 5px rgba(0,0,0,0.1); }
    .settings-box .widget-box { margin-bottom: 10px; }
    .settings-box .widget-html-value b { font-size: 1.1em; color: #2c3e50; margin-top: 12px; display: block; border-bottom: 1px solid #eee; padding-bottom: 5px;}
</style>
"""))

model_options = [
    ("Tiny (最快, 準確度較低)", "tiny"),
    ("Base (快速, 基礎準確度)", "base"),
    ("Small (推薦, 速度與準確度均衡)", "small"),
    ("Medium (較慢, 準確度高)", "medium"),
    ("Large-v1 (v1, 準確度高)", "large"),
    ("Large-v2 (v2, 準確度更高)", "large-v2"),
    ("Large-v3 (v3, 最新, 準確度最佳)", "large-v3")
]
model_widget = widgets.Dropdown(options=model_options, value="small", description="Whisper 模型:")

language_options = [
    ("自動偵測 (auto)", "auto"),
    ("中文 (zh)", "zh"),
    ("英文 (en)", "en"),
    ("日文 (ja)", "ja"),
    ("韓文 (ko)", "ko"),
    ("廣東話 (yue)", "yue"),
    ("其他 (自行輸入)", "custom")
]
language_dropdown_widget = widgets.Dropdown(options=language_options, value="auto", description="轉錄語言:")
language_text_widget = widgets.Text(value="", placeholder="若選'其他',請輸ISO代碼 (例: de, fr)")

def on_language_change(change):
    if change['type'] == 'change' and change['name'] == 'value': # Ensure it's a value change
        if change.new == "custom":
            language_text_widget.layout.display = "flex"
        else:
            language_text_widget.layout.display = "none"
language_dropdown_widget.observe(on_language_change, names='value')
language_text_widget.layout.display = "none" # Initial state

task_widget = widgets.Dropdown(options=["transcribe", "translate"], value="transcribe", description="任務:")

precision_options = [
    ("BF16 (TPU 建議, 加速)", "bf16"),
    ("FP32 (標準精度, CPU/GPU)", "fp32")
]
precision_widget = widgets.Dropdown(options=precision_options, value="bf16", description="運算精度:")

chunk_length_s_widget = widgets.IntText(value=28, description="音訊切塊長度(秒):", style={'description_width': 'initial'}, min=1, max=30)
stride_length_s_left_widget = widgets.IntText(value=5, description="左側重疊(秒):", style={'description_width': 'initial'}, min=0)
stride_length_s_right_widget = widgets.IntText(value=5, description="右側重疊(秒):", style={'description_width': 'initial'}, min=0)

settings_box_layout = widgets.Layout(display='flex', flex_flow='column', align_items='stretch', width='auto')
settings_box = widgets.VBox([
    model_widget,
    language_dropdown_widget,
    language_text_widget,
    task_widget,
    precision_widget,
    widgets.HTML("<b>長音檔處理 (進階設定):</b>"),
    chunk_length_s_widget,
    stride_length_s_left_widget,
    stride_length_s_right_widget
], layout=settings_box_layout)

display(HTML("<h2>語音轉錄設定</h2>"), settings_box)

print("\n✅ Cell 1 UI 設定完成。請確認以上設定，然後執行下一個 Cell。")
print("   如果您剛剛執行了套件安裝並被提示重啟，請務必先 '執行階段 -> 重新啟動執行階段' 或 '工作階段 -> 重新啟動工作階段'。")

🔄 Session likely restarted (or this is the first run of UI part). Displaying UI configuration options...
🐍 Python version: 3.11.12 (main, Apr  9 2025, 08:55:54) [GCC 11.4.0]


VBox(children=(Dropdown(description='Whisper 模型:', index=2, options=(('Tiny (最快, 準確度較低)', 'tiny'), ('Base (快速,…


✅ Cell 1 UI 設定完成。請確認以上設定，然後執行下一個 Cell。
   如果您剛剛執行了套件安裝並被提示重啟，請務必先 '執行階段 -> 重新啟動執行階段' 或 '工作階段 -> 重新啟動工作階段'。


In [2]:
# @title
# -------------------------------------------------
# Cell 2：載入模型、初始化 Pipeline 與 XLA 熱機
# -------------------------------------------------
print("🚀 開始執行 Cell 2：載入模型、初始化 Pipeline 與 XLA 熱機...")

# 1. 匯入必要函式庫
import torch
import warnings
import time
import numpy as np
import gc # For garbage collection

# 嘗試匯入 torch_xla，如果失敗，提示用戶檢查安裝和 Runtime 重啟
try:
    import torch_xla
    import torch_xla.core.xla_model as xm
    import torch_xla.debug.metrics as met
    print("✅ torch_xla 相關模組匯入成功。")
    print(f"   Torch Version: {torch.__version__}")
    print(f"   Torch XLA Version: {torch_xla.__version__}")
except ImportError as e:
    print(f"❌ torch_xla 模組匯入失敗！詳細錯誤: {e}")
    print("   請確保您已成功執行 Cell 1 中的 PyTorch/XLA 安裝，")
    print("   並且在安裝後已經『重新啟動工作階段』(Session -> Restart session)。")
    print("   如果問題持續，請檢查 Cell 1 的安裝日誌是否有錯誤，並確認版本相容性。")
    raise # 終止執行，因為後續步驟依賴 XLA
except Exception as e:
    print(f"❌ 匯入 torch_xla 相關模組時發生其他錯誤: {e}")
    raise

from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline

# 靜音一些不影響功能的警告
warnings.filterwarnings("ignore", message=".*TorchScript only supports basic types list, tuple, dict.*")
warnings.filterwarnings("ignore", message=".*PySoundFile failed.*")
warnings.filterwarnings("ignore", message=".*Due to a bug fix.*")
warnings.filterwarnings("ignore", message=".*Passing `max_length` to BeamSearchScorer is deprecated*")

# 2. 取得使用者在 Cell 1 的設定
print("⚙️ 讀取使用者設定...")
selected_model_name_suffix = model_widget.value
_selected_language_option = language_dropdown_widget.value
if _selected_language_option == "custom":
    selected_language = language_text_widget.value.strip().lower()
    if not selected_language:
        print("   ⚠️ 自訂語言為空，將使用自動偵測 (auto)。")
        selected_language = "auto"
else:
    selected_language = _selected_language_option

selected_task = task_widget.value
selected_precision = precision_widget.value
# Ensure chunk_length is at least 1 if it's positive, Whisper default is 30s
chunk_length = max(1, chunk_length_s_widget.value) if chunk_length_s_widget.value > 0 else 30
stride_left = max(0, stride_length_s_left_widget.value)
stride_right = max(0, stride_length_s_right_widget.value)


MODEL_NAME = f"openai/whisper-{selected_model_name_suffix}"
print(f"   模型: {MODEL_NAME}, 語言: {selected_language}, 任務: {selected_task}, 精度: {selected_precision}")
print(f"   長音檔參數 -> 切塊: {chunk_length}s, 重疊: [{stride_left}s, {stride_right}s]")

# 3. 設定 TPU 裝置與運算精度
tpu_device_acquisition_successful = False
try:
    tpu_cores = xm.xrt_world_size()
    print(f"🌍 偵測到 {tpu_cores} 個 XLA 裝置核心。")
    device = xm.xla_device() # This gets a single XLA device object representing all cores for SPMD
    print(f"✅ TPU 裝置成功取得: {device} (代表所有 {tpu_cores} 個核心)")
    tpu_device_acquisition_successful = True
except Exception as e:
    print(f"⚠️ 無法取得 TPU 裝置，錯誤: {e}")
    print("   請確認 Colab Runtime 已選擇 TPU (TPU v2)。後續處理將 fallback 至 CPU。")
    device = torch.device("cpu")


if selected_precision == "bf16" and tpu_device_acquisition_successful:
    torch_dtype = torch.bfloat16
    print("   運算精度設定為: BF16 (適用於 TPU)")
elif selected_precision == "bf16" and not tpu_device_acquisition_successful:
    torch_dtype = torch.float32 # CPU doesn't natively support BF16 well for torch ops
    print("   ⚠️ CPU 不直接支援 BF16，運算精度自動調整為 FP32。")
else:
    torch_dtype = torch.float32
    print(f"   運算精度設定為: FP32 (適用於 {device.type})")

# 4. 載入 Whisper Processor
processor = None
print(f"\n🔄 載入 Whisper Processor for {MODEL_NAME}...")
try:
    processor = WhisperProcessor.from_pretrained(MODEL_NAME)
    print("✅ Processor 載入成功!")
except Exception as e:
    print(f"❌ Processor 載入失敗: {e}. 請檢查模型名稱 ({MODEL_NAME}) 是否正確，或網路連線。")

# 5. 載入 Whisper 模型並移至 TPU
model = None
if processor:
    print(f"🔄 載入 Whisper 模型 {MODEL_NAME} (dtype: {torch_dtype}) 並移至裝置 {device}...")
    try:
        model = WhisperForConditionalGeneration.from_pretrained(
            MODEL_NAME,
            torch_dtype=torch_dtype, # Apply dtype at loading
            low_cpu_mem_usage=True if "large" in MODEL_NAME else False, # Helpful for large models
        ).to(device) # Move model to XLA device
        model.eval() # Set to evaluation mode
        print("✅ 模型載入並移至裝置成功!")
    except Exception as e:
        print(f"❌ 模型載入或移至裝置失敗: {e}")
        if "out of memory" in str(e).lower() or "OOM" in str(e).upper():
            print("   💡 提示：可能是裝置記憶體不足。嘗試：")
            print("      1. 選用較小的 Whisper 模型 (如 small, base)。")
            print("      2. 確認運算精度為 BF16 (若在 TPU)。")
            print("      3. 『重新啟動工作階段』以釋放所有資源，然後重試。")
        model = None # Ensure model is None if loading failed
else:
    print("⚠️ 由於 Processor 載入失敗，跳過模型載入。")

# 6. 初始化 ASR Pipeline
asr_pipeline = None
if model and processor: # Proceed only if both model and processor are loaded
    print("\n🔄 初始化 ASR Pipeline...")
    try:
        asr_pipeline = pipeline(
            "automatic-speech-recognition",
            model=model, # Pass the model already on the XLA device
            tokenizer=processor.tokenizer,
            feature_extractor=processor.feature_extractor,
            torch_dtype=torch_dtype, # Re-affirm torch_dtype for pipeline consistency
            device=device, # Explicitly pass the XLA device to the pipeline
        )
        print("✅ ASR Pipeline 初始化成功!")
    except Exception as e:
        print(f"❌ ASR Pipeline 初始化失敗: {e}")
else:
    print("⚠️ 由於模型或 Processor 載入失敗，跳過 Pipeline 初始化。")

# 7. XLA 熱機 (Warm-up)
if asr_pipeline and tpu_device_acquisition_successful:
    print("\n🔥 開始 XLA 熱機 (處理 2 秒靜音以編譯計算圖)...")
    print(f"   使用模型: {MODEL_NAME}, 任務: {selected_task}, 語言 (熱機時): {'auto' if selected_language == 'auto' else selected_language}")

    warmup_chunk_length = chunk_length # Use user-defined or default from above
    warmup_stride_config = [stride_left, stride_right] if stride_left >= 0 and stride_right >= 0 else None

    print(f"   熱機參數 -> 切塊: {warmup_chunk_length}s, 重疊: {warmup_stride_config}")

    dummy_audio_np = np.zeros(16000 * 2, dtype=np.float32) # 2 seconds of silence at 16kHz

    generate_pipeline_kwargs_warmup = {"task": selected_task}
    if selected_language.lower() != "auto":
        generate_pipeline_kwargs_warmup["language"] = selected_language

    t_start_warmup = time.time()
    try:
        with torch.no_grad(): # Inference mode
            print("   🚀 執行第一次熱機呼叫 (XLA 編譯中，此步驟可能需要一些時間，請耐心等候)...")
            _ = asr_pipeline(
                dummy_audio_np, # Pass numpy array directly
                generate_kwargs=generate_pipeline_kwargs_warmup,
                chunk_length_s=warmup_chunk_length,
                stride_length_s=warmup_stride_config,
            )
            xm.mark_step() # Crucial for ensuring XLA operations complete
            print("   ✅ 第一次熱機呼叫完成。")

        t_elapsed_warmup = time.time() - t_start_warmup
        print(f"✅ XLA 熱機順利完成！耗時 {t_elapsed_warmup:.2f} 秒。")
        print("   TPU 已準備就緒，可以執行 Cell 3 處理您的音檔了。")
        if tpu_device_acquisition_successful:
             print(f"   📊 TPU 記憶體使用情況:\n{met.metrics_report()}")
    except Exception as e:
        print(f"❌ XLA 熱機失敗: {e}")
        print("   😭 熱機過程中發生錯誤。可能原因：")
        print("      1. PyTorch/XLA 版本與 Colab TPU 環境不相容 (請檢查 Cell 1 安裝與重啟)。")
        print("      2. 模型過大，TPU 記憶體不足 (OOM)。")
        print("      3. 選定的語言/任務/模型組合導致 XLA 編譯困難。")
        print("   建議操作：")
        print("      - 仔細檢查 Cell 1 的套件安裝日誌，確保沒有錯誤。")
        print("      - 務必在 Cell 1 安裝套件後『重新啟動工作階段』。")
        print("      - 嘗試使用較小的模型 (如 'tiny' 或 'base') 進行測試。")
        import traceback
        traceback.print_exc()
elif not tpu_device_acquisition_successful and asr_pipeline :
    print("\nℹ️ 使用 CPU 執行，跳過 XLA 熱機。")
else:
    print("\n⚠️ Pipeline 未成功初始化或非 TPU 環境，跳過 XLA 熱機。請檢查本 Cell 的錯誤訊息。")

if 'dummy_audio_np' in locals():
    del dummy_audio_np
gc.collect()
if tpu_device_acquisition_successful:
    xm.wait_device_ops()

🚀 開始執行 Cell 2：載入模型、初始化 Pipeline 與 XLA 熱機...
✅ torch_xla 相關模組匯入成功。
   Torch Version: 2.7.1+cu126
   Torch XLA Version: 2.7.0
⚙️ 讀取使用者設定...
   模型: openai/whisper-small, 語言: auto, 任務: transcribe, 精度: bf16
   長音檔參數 -> 切塊: 28s, 重疊: [5s, 5s]
⚠️ 無法取得 TPU 裝置，錯誤: module 'torch_xla.core.xla_model' has no attribute 'xrt_world_size'
   請確認 Colab Runtime 已選擇 TPU (TPU v2)。後續處理將 fallback 至 CPU。
   ⚠️ CPU 不直接支援 BF16，運算精度自動調整為 FP32。

🔄 載入 Whisper Processor for openai/whisper-small...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/185k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/283k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/836k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.48M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/494k [00:00<?, ?B/s]

normalizer.json:   0%|          | 0.00/52.7k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/34.6k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.19k [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


✅ Processor 載入成功!
🔄 載入 Whisper 模型 openai/whisper-small (dtype: torch.float32) 並移至裝置 cpu...


config.json:   0%|          | 0.00/1.97k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/967M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/3.87k [00:00<?, ?B/s]

✅ 模型載入並移至裝置成功!

🔄 初始化 ASR Pipeline...
✅ ASR Pipeline 初始化成功!

ℹ️ 使用 CPU 執行，跳過 XLA 熱機。


In [12]:
# @title
# --------------------------------------------
# Cell 3：上傳音檔並進行長音檔轉錄
# --------------------------------------------
print("🚀 開始執行 Cell 3：上傳音檔並進行長音檔轉錄...")

# 1. 匯入必要模組
from google.colab import files
import librosa
import time
import os
import gc
import numpy as np # Ensure numpy is available

# 2. 檢查 Pipeline 是否已準備就緒
if 'asr_pipeline' not in globals() or asr_pipeline is None:
    print("❌ ASR Pipeline 尚未初始化或初始化失敗。請先成功執行 Cell 1 和 Cell 2。")
    print("   特別注意：如果 Cell 1 或 Cell 2 提示需要重啟執行階段，請務必執行後再回來。")
else:
    print("✅ ASR Pipeline 已準備就緒。")

    # 3. 提示使用者上傳音檔
    print("\n📤 請上傳音檔 (mp3 / wav / m4a / ogg / flac ...)")
    print("   您可以一次選擇多個檔案。")
    try:
        uploaded_files = files.upload()
        if not uploaded_files:
            print(" 🤔 沒有上傳任何檔案。")
        else:
            print(f"📂 已成功上傳 {len(uploaded_files)} 個檔案。")
    except Exception as e:
        print(f"❌ 檔案上傳過程中發生錯誤: {e}")
        uploaded_files = {} # Ensure it's a dict to prevent errors later

    # 4. 定義日誌函式
    def show_log(msg, prefix=""):
        print(f"{prefix}{time.strftime('[%Y-%m-%d %H:%M:%S]')} {msg}")

    # 5. 逐個檔案處理
    if uploaded_files:
        # 從 Cell 1 widgets 中再次獲取最新的設定
        _current_language_option = language_dropdown_widget.value
        if _current_language_option == "custom":
            current_language = language_text_widget.value.strip().lower()
            if not current_language: current_language = "auto" # Default to auto if custom is empty
        else:
            current_language = _current_language_option
        current_task = task_widget.value
        current_chunk_length = max(1, chunk_length_s_widget.value) if chunk_length_s_widget.value > 0 else 30
        current_stride_left = max(0, stride_length_s_left_widget.value)
        current_stride_right = max(0, stride_length_s_right_widget.value)
        current_model_suffix = model_widget.value

        generate_pipeline_kwargs_main = {"task": current_task}
        if current_language.lower() != "auto":
            generate_pipeline_kwargs_main["language"] = current_language
            if 'processor' in globals() and processor is not None:
                try:
                    base_lang_code = current_language.split('-')[0]
                    forced_ids = processor.get_decoder_prompt_ids(language=base_lang_code, task=current_task)
                    if forced_ids:
                        generate_pipeline_kwargs_main["forced_decoder_ids"] = forced_ids
                except Exception as e_fdid:
                    show_log(f"⚠️ 無法為語言 '{current_language}' 獲取 forced_decoder_ids: {e_fdid}. 繼續執行而不使用。", prefix="  ")
            else:
                show_log(f"⚠️ Processor 未載入，無法設定 forced_decoder_ids。", prefix="  ")

        main_stride_config = [current_stride_left, current_stride_right] if current_stride_left >=0 and current_stride_right >=0 else None

        show_log(f"📝 開始處理 {len(uploaded_files)} 個音檔...")
        show_log(f"   設定 -> 語言: {current_language}, 任務: {current_task}", prefix="  ")
        show_log(f"   音訊處理 -> 切塊: {current_chunk_length}s, 重疊: {main_stride_config}", prefix="  ")

        total_audio_duration_processed = 0
        total_transcription_time = 0

        for i, (fname_original, file_content) in enumerate(uploaded_files.items()):
            show_log(f"--- [{i+1}/{len(uploaded_files)}] 開始處理檔案: {fname_original} ---", prefix="➡️ ")

            safe_fname = "".join(c if c.isalnum() or c in ('.', '_', '-') else '_' for c in fname_original)
            temp_audio_path = f"./{safe_fname}"

            try:
                with open(temp_audio_path, "wb") as f:
                    f.write(file_content)
                show_log(f"暫存檔案已寫入: {temp_audio_path}", prefix="  ")

                audio_duration_seconds = 0.0
                try:
                    y, sr = librosa.load(temp_audio_path, sr=16000, mono=True)
                    audio_duration_seconds = float(librosa.get_duration(y=y, sr=sr))
                    show_log(f"音訊長度: {audio_duration_seconds:.2f} 秒 (已重採樣至 16kHz)", prefix="  ")
                    total_audio_duration_processed += audio_duration_seconds
                    del y
                    gc.collect()
                except Exception as e_librosa:
                    show_log(f"⚠️ 使用 librosa 獲取音訊資訊失敗: {e_librosa}。將繼續嘗試轉錄。", prefix="  ")

                show_log(f"🤖 使用 ASR Pipeline 進行轉錄 (模型: {current_model_suffix})...", prefix="  ")
                t_transcribe_start = time.time()

                with torch.no_grad():
                    output = asr_pipeline(
                        temp_audio_path,
                        chunk_length_s=current_chunk_length,
                        stride_length_s=main_stride_config,
                        generate_kwargs=generate_pipeline_kwargs_main,
                        return_timestamps=False,
                    )

                if 'tpu_device_acquisition_successful' in globals() and tpu_device_acquisition_successful and 'xm' in globals() :
                    xm.mark_step()

                t_transcribe_elapsed = time.time() - t_transcribe_start
                total_transcription_time += t_transcribe_elapsed

                transcription_text = output["text"] if isinstance(output, dict) and "text" in output else str(output)
                show_log(f"✅ 轉錄完成，耗時 {t_transcribe_elapsed:.2f} 秒。", prefix="  ")
                if audio_duration_seconds > 0.001:
                    rtf = t_transcribe_elapsed / audio_duration_seconds
                    show_log(f"   即時率 (Real-Time Factor, RTF): {rtf:.3f} (越小越快, <1 表示比實時快)", prefix="  ")

                preview_length = 250
                preview = transcription_text[:preview_length] + ("..." if len(transcription_text) > preview_length else "")
                print(f"\n📜 轉錄結果預覽 (前 {preview_length} 字元):\n\"{preview}\"")

                base_fname_no_ext, _ = os.path.splitext(safe_fname)
                lang_suffix = current_language if current_language.lower() != "auto" else "auto"
                out_filename = f"{base_fname_no_ext}_transcript_{current_model_suffix}_{lang_suffix}.txt"

                save_path = f"/content/{out_filename}"
                with open(save_path, "w", encoding="utf-8") as f:
                    f.write(transcription_text)
                show_log(f"💾 完整逐字稿已儲存至 (Colab 檔案系統): {save_path}", prefix="  ")

            except Exception as e_file_proc:
                show_log(f"❌ 處理檔案 {fname_original} 時發生嚴重錯誤: {e_file_proc}", prefix="  ")
                import traceback
                traceback.print_exc()
            finally:
                if os.path.exists(temp_audio_path):
                    try: os.remove(temp_audio_path)
                    except Exception as e_del: show_log(f"⚠️ 刪除暫存檔案 {temp_audio_path} 失敗: {e_del}", prefix="  ")

                gc.collect()
                if 'tpu_device_acquisition_successful' in globals() and tpu_device_acquisition_successful and 'xm' in globals():
                    xm.wait_device_ops()
                show_log(f"--- 檔案 {fname_original} 處理結束 ---\n", prefix="⬅️ ")

        show_log("🎉🎉🎉 所有音檔處理完畢！🎉🎉🎉", prefix="🏁 ")
        if total_audio_duration_processed > 0.001 and total_transcription_time > 0:
            overall_rtf = total_transcription_time / total_audio_duration_processed
            show_log(f"總音訊時長: {total_audio_duration_processed:.2f} 秒", prefix="📊 ")
            show_log(f"總轉錄耗時: {total_transcription_time:.2f} 秒", prefix="📊 ")
            show_log(f"整體即時率 (RTF): {overall_rtf:.3f}", prefix="📊 ")
        show_log("請至 Colab 左側「檔案」面板 (資料夾圖示) 下載 *_transcript.txt 檔案。", prefix="🏁 ")
        if 'tpu_device_acquisition_successful' in globals() and tpu_device_acquisition_successful and 'met' in globals() and 'xm' in globals():
            show_log(f"最終 TPU 記憶體使用情況:\n{met.metrics_report()}", prefix="📊 ")

    elif not uploaded_files and 'asr_pipeline' in globals() and asr_pipeline is not None:
        show_log("🤔 沒有選擇任何檔案進行轉錄。如果您已上傳，請確認檔案列表。", prefix="")

🚀 開始執行 Cell 3：上傳音檔並進行長音檔轉錄...
✅ ASR Pipeline 已準備就緒。

📤 請上傳音檔 (mp3 / wav / m4a / ogg / flac ...)
   您可以一次選擇多個檔案。


Saving Implementing Database Persistence in an IAM System (1).wav to Implementing Database Persistence in an IAM System (1).wav
📂 已成功上傳 1 個檔案。
[2025-06-05 07:54:40] 📝 開始處理 1 個音檔...
  [2025-06-05 07:54:40]    設定 -> 語言: auto, 任務: transcribe
  [2025-06-05 07:54:40]    音訊處理 -> 切塊: 28s, 重疊: [5, 5]
➡️ [2025-06-05 07:54:40] --- [1/1] 開始處理檔案: Implementing Database Persistence in an IAM System (1).wav ---
  [2025-06-05 07:54:40] 暫存檔案已寫入: ./Implementing_Database_Persistence_in_an_IAM_System__1_.wav
  [2025-06-05 07:54:40] 音訊長度: 400.05 秒 (已重採樣至 16kHz)
  [2025-06-05 07:54:40] 🤖 使用 ASR Pipeline 進行轉錄 (模型: small)...




  [2025-06-05 07:56:13] ✅ 轉錄完成，耗時 93.11 秒。
  [2025-06-05 07:56:13]    即時率 (Real-Time Factor, RTF): 0.233 (越小越快, <1 表示比實時快)

📜 轉錄結果預覽 (前 250 字元):
"Hello 今天我們來深入看看你這些期末專題 IAM 系統的資料喔接著看起來 欸 滿有意思的把一個現成的 呃 程式馬庫是 Quarry Butler IAM 的分支對吧改造成一個 嗯 完整的後登服務對 你的目標很清楚嘛就是要讓這個本來是Library的工具可以透過API來用沒錯主要就是用Express.js把那個嗯本來要直接Import才能用的IM邏輯把它包起來變成一個獨立的後端Server嗯哼這其實嗯蠻常見的啦就是怎麼拿現成的東西快速搭一個新服務出來了解所以我們今天就是來梳理一下你跟學長討論的..."
  [2025-06-05 07:56:13] 💾 完整逐字稿已儲存至 (Colab 檔案系統): /content/Implementing_Database_Persistence_in_an_IAM_System__1__transcript_small_auto.txt
⬅️ [2025-06-05 07:56:14] --- 檔案 Implementing Database Persistence in an IAM System (1).wav 處理結束 ---

🏁 [2025-06-05 07:56:14] 🎉🎉🎉 所有音檔處理完畢！🎉🎉🎉
📊 [2025-06-05 07:56:14] 總音訊時長: 400.05 秒
📊 [2025-06-05 07:56:14] 總轉錄耗時: 93.11 秒
📊 [2025-06-05 07:56:14] 整體即時率 (RTF): 0.233
🏁 [2025-06-05 07:56:14] 請至 Colab 左側「檔案」面板 (資料夾圖示) 下載 *_transcript.txt 檔案。
