In [1]:
!pip uninstall -y whisperx


[0m

In [3]:
!pip install srt

Collecting srt
  Downloading srt-3.5.3.tar.gz (28 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: srt
  Building wheel for srt (setup.py) ... [?25ldone
[?25h  Created wheel for srt: filename=srt-3.5.3-py3-none-any.whl size=22429 sha256=a724de08821a4bee32fcf2e49a2f9aea531c369be1c73492498279b116b03d5d
  Stored in directory: /home/jovyan/.cache/pip/wheels/1f/43/f1/23ee9119497fcb57d9f7046fbf34c6d9027c46a1fa7824cf08
Successfully built srt
Installing collected packages: srt
Successfully installed srt-3.5.3


In [5]:
from pathlib import Path
import srt
import re
from datetime import timedelta

# =========================
# 基本設定
# =========================
BASE = Path.cwd() / "work"

WHISPER_SRT_REL = "mkdocs/My_Notes/課_四89_稅捐規避專題/逐字稿初稿/W16.srt"
CORRECTED_TXT_REL = "mkdocs/My_Notes/課_四89_稅捐規避專題/逐字稿初稿/W16.txt"
OUT_SRT_REL = "mkdocs/My_Notes/課_四89_稅捐規避專題/逐字稿初稿/W16_final.srt"

START_CUE_INDEX = 246  # 你提供的起點 cue

# =========================
# 讀取檔案
# =========================
whisper_srt = (BASE / WHISPER_SRT_REL).resolve()
corrected_txt = (BASE / CORRECTED_TXT_REL).resolve()
out_srt = (BASE / OUT_SRT_REL).resolve()

subs = list(srt.parse(whisper_srt.read_text(encoding="utf-8")))

# =========================
# 1. 找到起始 cue
# =========================
start_pos = None
for i, sub in enumerate(subs):
    if sub.index == START_CUE_INDEX:
        start_pos = i
        break

if start_pos is None:
    raise ValueError(f"找不到 cue {START_CUE_INDEX}")

subs_slice = subs[start_pos:]

base_time = subs_slice[0].start  # 00:10:37.700
print("[info] original base time:", base_time)

# =========================
# 2. 平移時間軸 → 從 0 開始
# =========================
shifted_subs = []
for s in subs_slice:
    shifted_subs.append(
        srt.Subtitle(
            index=s.index,
            start=s.start - base_time,
            end=s.end - base_time,
            content=s.content
        )
    )

print("[info] shifted start:", shifted_subs[0].start)
print("[info] shifted end:", shifted_subs[-1].end)

# =========================
# 3. 讀取修正文字（空行分段）
# =========================
txt = corrected_txt.read_text(encoding="utf-8")
blocks = [line.strip() for line in txt.splitlines() if line.strip()]

print("[info] corrected blocks:", len(blocks))

# =========================
# 4. 計算裁剪後總時間
# =========================
total_duration = shifted_subs[-1].end
total_seconds = total_duration.total_seconds()

print("[info] total seconds:", total_seconds)

# =========================
# 5. 依文字長度比例重新分配時間
# =========================
lengths = [len(b.replace("\n", "")) for b in blocks]
total_len = sum(lengths)

new_subs = []
cursor = timedelta(seconds=0)

for i, (block, L) in enumerate(zip(blocks, lengths), 1):
    dur = total_seconds * (L / total_len)
    start = cursor
    end = cursor + timedelta(seconds=dur)
    cursor = end

    new_subs.append(
        srt.Subtitle(
            index=i,
            start=start,
            end=end,
            content=block
        )
    )

# =========================
# 6. 輸出新 SRT
# =========================
out_srt.parent.mkdir(parents=True, exist_ok=True)
out_srt.write_text(srt.compose(new_subs), encoding="utf-8")

print("[done] wrote:", out_srt)

[info] original base time: 0:10:37.700000
[info] shifted start: 0:00:00
[info] shifted end: 0:38:26.229000
[info] corrected blocks: 998
[info] total seconds: 2306.229
[done] wrote: /home/jovyan/work/mkdocs/My_Notes/課_四89_稅捐規避專題/逐字稿初稿/W16_final.srt
