# 袁倫祥醫師 GPT-SoVITS 語音克隆訓練

## 使用前準備
1. **Runtime > Change runtime type > GPU (T4)**
2. 依序執行每個 cell
3. 訓練完成後下載模型到本地 Mac

**預估時間**: 安裝 ~5 min, 模型下載 ~10 min, 預處理 ~15 min, SoVITS 訓練 ~30 min, GPT 訓練 ~60 min

In [None]:
# Step 1: 檢查 GPU 並安裝 GPT-SoVITS
import os
os.system("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader")

# Clone (跳過如果已存在)
if not os.path.exists("/content/GPT-SoVITS/.git"):
    os.system("git clone --depth 1 https://github.com/RVC-Boss/GPT-SoVITS.git /content/GPT-SoVITS")
os.chdir("/content/GPT-SoVITS")

# 安裝依賴 - 不用 tail 過濾，顯示完整錯誤
print("安裝 requirements.txt...")
os.system("pip install -r requirements.txt")

# 明確補安裝容易失敗的套件 (requirements.txt 中 41 個套件有些會因衝突而跳過)
print("\n補安裝關鍵套件...")
os.system("pip install onnxruntime jieba_fast cn2an pypinyin g2p_en jieba wordsegment "
          "x-transformers rotary-embedding-torch sentencepiece split-lang fast-langdetect "
          "ffmpeg-python PyYAML peft")
os.system("apt-get install -y -qq ffmpeg")

# 驗證 chinese2.py import chain (文字處理核心依賴)
print("\n驗證 import chain:")
import sys
sys.path.insert(0, '/content/GPT-SoVITS/GPT_SoVITS')
try:
    # 測試完整的 chinese text processing import chain
    import importlib
    for mod_name in ['cn2an', 'pypinyin', 'jieba_fast', 'onnxruntime', 'g2p_en',
                     'wordsegment', 'x_transformers', 'sentencepiece']:
        importlib.import_module(mod_name)
        print(f"  OK: {mod_name}")
    print("\n\u2705 安裝完成！")
except ImportError as e:
    print(f"\n\u26a0\ufe0f 缺少套件: {e}")
    print("嘗試修復...")
    missing = str(e).split("'")[1] if "'" in str(e) else str(e)
    os.system(f"pip install {missing}")

In [None]:
# Step 2: 下載訓練資料 (從 GitHub Release)
import os, glob

print("從 GitHub Release 下載訓練資料...")
os.system("wget -q https://github.com/lunhsiangyuan/yuan-voice-clone/releases/download/training-data-v1/yuan_training_data.zip -O /content/GPT-SoVITS/yuan_training_data.zip")

os.system("mkdir -p /content/GPT-SoVITS/training_data")
os.system("unzip -o /content/GPT-SoVITS/yuan_training_data.zip -d /content/GPT-SoVITS/training_data/")

wav_count = len(glob.glob("/content/GPT-SoVITS/training_data/audio/*.wav"))
list_files = glob.glob("/content/GPT-SoVITS/training_data/*.list")
print(f"音檔數量: {wav_count}")
print(f"標註檔案: {list_files}")
print("\n\u2705 訓練資料已就緒！")

In [None]:
# Step 3: 下載預訓練模型 (~3.4GB)
import os
os.chdir("/content/GPT-SoVITS")

os.system("pip install -q huggingface_hub")
print("下載 GPT-SoVITS 預訓練模型...")
ret = os.system("huggingface-cli download lj1995/GPT-SoVITS --local-dir GPT_SoVITS/pretrained_models --exclude '*.md' '.gitattributes' 'LICENSE*'")
if ret != 0:
    print("重試中...")
    os.system("huggingface-cli download lj1995/GPT-SoVITS --local-dir GPT_SoVITS/pretrained_models --exclude '*.md' '.gitattributes' 'LICENSE*'")

# 補下載 HuBERT 權重 (lj1995 repo 不含實際權重)
hubert_dir = "GPT_SoVITS/pretrained_models/chinese-hubert-base"
os.makedirs(hubert_dir, exist_ok=True)
if not (os.path.exists(f"{hubert_dir}/pytorch_model.bin") or os.path.exists(f"{hubert_dir}/model.safetensors")):
    print("\nHuBERT 權重缺失，直接下載...")
    os.system(f"wget -q https://huggingface.co/TencentGameMate/chinese-hubert-base/resolve/main/pytorch_model.bin -O {hubert_dir}/pytorch_model.bin")
    os.system(f"wget -q https://huggingface.co/TencentGameMate/chinese-hubert-base/resolve/main/config.json -O {hubert_dir}/config.json")
    os.system(f"wget -q https://huggingface.co/TencentGameMate/chinese-hubert-base/resolve/main/preprocessor_config.json -O {hubert_dir}/preprocessor_config.json")

# 補下載 BERT 權重
bert_dir = "GPT_SoVITS/pretrained_models/chinese-roberta-wwm-ext-large"
os.makedirs(bert_dir, exist_ok=True)
if not (os.path.exists(f"{bert_dir}/pytorch_model.bin") or os.path.exists(f"{bert_dir}/model.safetensors")):
    print("\nBERT 權重缺失，直接下載...")
    os.system(f"wget -q https://huggingface.co/hfl/chinese-roberta-wwm-ext-large/resolve/main/pytorch_model.bin -O {bert_dir}/pytorch_model.bin")
    os.system(f"wget -q https://huggingface.co/hfl/chinese-roberta-wwm-ext-large/resolve/main/config.json -O {bert_dir}/config.json")

# 驗證
print("\n模型驗證:")
checks = {
    'gsv-v2final-pretrained': 'GPT_SoVITS/pretrained_models/gsv-v2final-pretrained',
    'HuBERT weights': f'{hubert_dir}/pytorch_model.bin',
    'BERT weights': f'{bert_dir}/pytorch_model.bin',
}
all_ok = True
for name, path in checks.items():
    if os.path.exists(path):
        if os.path.isfile(path):
            size_mb = os.path.getsize(path) / 1024 / 1024
            print(f"  \u2713 {name} ({size_mb:.0f} MB)")
        else:
            print(f"  \u2713 {name} (dir)")
    else:
        safetensors = path.replace('pytorch_model.bin', 'model.safetensors')
        if os.path.exists(safetensors):
            size_mb = os.path.getsize(safetensors) / 1024 / 1024
            print(f"  \u2713 {name} ({size_mb:.0f} MB, safetensors)")
        else:
            print(f"  \u2717 MISSING: {name}")
            all_ok = False

if all_ok:
    print("\n\u2705 預訓練模型全部就緒！")
else:
    print("\n\u26a0\ufe0f 部分模型缺失，請檢查網路連線")

In [None]:
# Step 4: 預處理 (文字處理 + HuBERT + 語義特徵提取)
import os, glob, subprocess, sys, shutil
os.chdir("/content/GPT-SoVITS")

EXP_NAME = "yuan"
WAV_DIR = "/content/GPT-SoVITS/training_data"
OPT_DIR = f"/content/GPT-SoVITS/output/training/{EXP_NAME}"
if os.path.isdir(OPT_DIR):
    shutil.rmtree(OPT_DIR)
os.makedirs(OPT_DIR, exist_ok=True)

# === 標註檔轉絕對路徑 ===
orig_list = "/content/GPT-SoVITS/training_data/transcript_corrected.list"
if not os.path.exists(orig_list):
    lists = glob.glob("/content/GPT-SoVITS/training_data/*.list")
    orig_list = lists[0] if lists else None
    if not orig_list: raise FileNotFoundError("找不到標註檔!")

LIST_FILE = f"{OPT_DIR}/transcript_abs.list"
converted = 0
with open(orig_list, 'r') as fin, open(LIST_FILE, 'w') as fout:
    for line in fin:
        line = line.strip()
        if not line: continue
        parts = line.split('|')
        if len(parts) >= 4:
            wav_abs = os.path.join(WAV_DIR, parts[0])
            if os.path.exists(wav_abs):
                parts[0] = wav_abs
                fout.write('|'.join(parts) + '\n')
                converted += 1
print(f"標註檔: {converted} 筆")

# === 預訓練模型 ===
pretrained_base = "GPT_SoVITS/pretrained_models"
S2G_PATH = next((p for p in [
    f"{pretrained_base}/gsv-v2final-pretrained/s2G2333k.pth",
    f"{pretrained_base}/s2G488k.pth",
] if os.path.exists(p)), None)

# === 環境變數 (inp_wav_dir='' 讓腳本用絕對路徑) ===
env = os.environ.copy()
env.update({
    'inp_text': LIST_FILE, 'inp_wav_dir': '', 'exp_name': EXP_NAME,
    'opt_dir': OPT_DIR, 'is_half': 'True',
    'i_part': '0', 'all_parts': '1', '_CUDA_VISIBLE_DEVICES': '0',
})

def run_step(name, script, extra_env=None, timeout=900):
    print(f"\n--- {name} ---")
    e = env.copy()
    if extra_env: e.update(extra_env)
    result = subprocess.run([sys.executable, '-s', script],
        env=e, capture_output=True, text=True, timeout=timeout)
    if result.stdout:
        for line in result.stdout.strip().split('\n')[-15:]:
            print(f"  {line}")
    if result.returncode != 0:
        print(f"  FAILED (exit {result.returncode})")
        if result.stderr:
            for line in result.stderr.strip().split('\n')[-10:]:
                print(f"  {line}")
        return False
    return True

print("開始預處理...")
ok = run_step("1. 文字/BERT",
    "GPT_SoVITS/prepare_datasets/1-get-text.py",
    {'bert_pretrained_dir': f'{pretrained_base}/chinese-roberta-wwm-ext-large'})

if ok:
    ok = run_step("2. HuBERT",
        "GPT_SoVITS/prepare_datasets/2-get-hubert-wav32k.py",
        {'cnhubert_base_dir': f'{pretrained_base}/chinese-hubert-base', 'sv_path': ''})

if ok and S2G_PATH:
    ok = run_step("3. Semantic",
        "GPT_SoVITS/prepare_datasets/3-get-semantic.py",
        {'pretrained_s2G': S2G_PATH, 's2config_path': 'GPT_SoVITS/configs/s2.json'})

# === 重命名: 移除 -0 後綴 (訓練腳本期望不含 part number 的檔名) ===
print("\n後處理...")
for src_pattern, dst_name in [
    ('2-name2text-0.txt', '2-name2text.txt'),
    ('6-name2semantic-0.tsv', '6-name2semantic.tsv'),
]:
    src = os.path.join(OPT_DIR, src_pattern)
    dst = os.path.join(OPT_DIR, dst_name)
    if os.path.exists(src) and not os.path.exists(dst):
        os.rename(src, dst)
        print(f"  renamed {src_pattern} -> {dst_name}")

# === 驗證 ===
print("\n=== 預處理結果 ===")
all_ok = True
for f in ['2-name2text.txt', '6-name2semantic.tsv']:
    path = os.path.join(OPT_DIR, f)
    if os.path.exists(path) and os.path.getsize(path) > 10:
        lc = sum(1 for _ in open(path))
        print(f"  OK {f} ({os.path.getsize(path)} bytes, {lc} lines)")
    else:
        print(f"  FAIL {f}")
        all_ok = False

for d in ['3-bert', '4-cnhubert', '5-wav32k']:
    path = os.path.join(OPT_DIR, d)
    count = len(os.listdir(path)) if os.path.isdir(path) else 0
    status = "OK" if count > 0 else "FAIL"
    print(f"  {status} {d}/ ({count} files)")
    if count == 0: all_ok = False

if all_ok:
    print("\n\u2705 預處理完成！所有資料就緒")
else:
    print("\n\u26a0\ufe0f 預處理有問題，訓練可能失敗")

In [None]:
# Step 5: 訓練 SoVITS 模型 (~10 min on T4)
import os, json, subprocess, sys, glob
os.chdir("/content/GPT-SoVITS")

EXP_NAME = "yuan"
OPT_DIR = f"/content/GPT-SoVITS/output/training/{EXP_NAME}"
S2_LOG_DIR = f"{OPT_DIR}/logs_s2_v2"
os.makedirs(S2_LOG_DIR, exist_ok=True)
os.makedirs(f'SoVITS_weights/{EXP_NAME}', exist_ok=True)

pretrained_base = "GPT_SoVITS/pretrained_models"
S2G_PATH = next((p for p in [
    f"{pretrained_base}/gsv-v2final-pretrained/s2G2333k.pth",
    f"{pretrained_base}/s2G488k.pth",
] if os.path.exists(p)), '')
S2D_PATH = next((p for p in [
    f"{pretrained_base}/gsv-v2final-pretrained/s2D2333k.pth",
    f"{pretrained_base}/s2D488k.pth",
] if os.path.exists(p)), '')

for f in ['2-name2text.txt', '6-name2semantic.tsv']:
    path = os.path.join(OPT_DIR, f)
    if not os.path.exists(path) or os.path.getsize(path) < 10:
        print(f"ERROR: {f} 缺失或為空")
        raise SystemExit(1)
    print(f"OK: {f} ({os.path.getsize(path)} bytes)")

with open('GPT_SoVITS/configs/s2.json', 'r') as f:
    config = json.load(f)

# === train 區段 ===
config['train']['epochs'] = 10
config['train']['batch_size'] = 16
config['train']['gpu_numbers'] = '0'
config['train']['save_every_epoch'] = 2
config['train']['if_save_latest'] = 1
config['train']['if_save_every_weights'] = True
config['train']['pretrained_s2G'] = S2G_PATH
config['train']['pretrained_s2D'] = S2D_PATH
config['train']['half_weights_save_dir'] = f'SoVITS_weights/{EXP_NAME}'
# === data 區段 ===
config['data']['exp_dir'] = OPT_DIR
# === model 區段 ===
config['model']['version'] = 'v2'
# === top-level (process_ckpt.py 用 hps.save_weight_dir / hps.name) ===
config['s2_ckpt_dir'] = S2_LOG_DIR
config['name'] = EXP_NAME
config['save_weight_dir'] = f'SoVITS_weights/{EXP_NAME}'

config_path = f'{OPT_DIR}/s2_config.json'
with open(config_path, 'w') as f:
    json.dump(config, f, indent=2)

print(f"SoVITS: epochs={config['train']['epochs']}, batch={config['train']['batch_size']}, version=v2")
print(f"開始訓練...")

result = subprocess.run(
    [sys.executable, '-s', 'GPT_SoVITS/s2_train.py', '--config', config_path],
    capture_output=True, text=True, timeout=7200
)
if result.stdout:
    lines = result.stdout.strip().split('\n')
    print(f"\n--- stdout (last 30 of {len(lines)}) ---")
    for line in lines[-30:]:
        print(f"  {line}")
if result.returncode != 0:
    print(f"\nFAILED (exit {result.returncode})")
    if result.stderr:
        for line in result.stderr.strip().split('\n')[-30:]:
            print(f"  {line}")
else:
    print(f"\nExit code: {result.returncode}")

sovits_models = glob.glob(f'SoVITS_weights/{EXP_NAME}/*.pth')
print(f"\nSoVITS 模型: {len(sovits_models)}")
for m in sovits_models:
    print(f"  {m} ({os.path.getsize(m)/1024/1024:.1f} MB)")

In [None]:
# Step 6: 訓練 GPT 模型 (~60 min on T4)
import os, subprocess, sys, glob
os.chdir("/content/GPT-SoVITS")

EXP_NAME = "yuan"
OPT_DIR = f"/content/GPT-SoVITS/output/training/{EXP_NAME}"
S1_LOG_DIR = f"{OPT_DIR}/logs_s1"
os.makedirs(S1_LOG_DIR, exist_ok=True)
os.makedirs(f'GPT_weights/{EXP_NAME}', exist_ok=True)

pretrained_base = "GPT_SoVITS/pretrained_models"
S1_PATH = ''
for pat in [f"{pretrained_base}/gsv-v2final-pretrained/s1bert25hz*.ckpt",
            f"{pretrained_base}/s1bert25hz*.ckpt"]:
    matches = glob.glob(pat)
    if matches:
        S1_PATH = matches[0]
        break

SEMANTIC_PATH = f'{OPT_DIR}/6-name2semantic.tsv'
PHONEME_PATH = f'{OPT_DIR}/2-name2text.txt'

for f in [SEMANTIC_PATH, PHONEME_PATH]:
    if os.path.exists(f) and os.path.getsize(f) > 0:
        print(f"OK: {os.path.basename(f)} ({os.path.getsize(f)} bytes)")
    else:
        print(f"ERROR: {f} 缺失或為空")
        raise SystemExit(1)

import yaml
yaml_candidates = ['GPT_SoVITS/configs/s1longer-v2.yaml', 'GPT_SoVITS/configs/s1longer.yaml']
yaml_base = next((p for p in yaml_candidates if os.path.exists(p)), yaml_candidates[0])
with open(yaml_base, 'r') as f:
    config = yaml.safe_load(f)

# === 注入所有 runtime 必要的 config keys ===
config['train']['epochs'] = 20
config['train']['batch_size'] = 8
config['train']['save_every_n_epoch'] = 5
# WebUI runtime 注入的 keys (預設 yaml 不含)
config['train']['if_save_latest'] = True
config['train']['if_save_every_weights'] = True
config['train']['half_weights_save_dir'] = f'GPT_weights/{EXP_NAME}'
config['train']['exp_name'] = EXP_NAME
# top-level
config['train_semantic_path'] = SEMANTIC_PATH
config['train_phoneme_path'] = PHONEME_PATH
config['output_dir'] = S1_LOG_DIR
config['pretrained_s1'] = S1_PATH

config_path = f'{OPT_DIR}/s1_config.yaml'
with open(config_path, 'w') as f:
    yaml.dump(config, f)

print(f"\nGPT config: epochs={config['train']['epochs']}, batch={config['train']['batch_size']}")
print(f"  pretrained: {S1_PATH}")
print(f"  exp_name: {EXP_NAME}")
print(f"\n開始訓練...")

env = os.environ.copy()
env['_CUDA_VISIBLE_DEVICES'] = '0'
result = subprocess.run(
    [sys.executable, '-s', 'GPT_SoVITS/s1_train.py', '--config_file', config_path],
    env=env, capture_output=True, text=True, timeout=7200
)

if result.stdout:
    lines = result.stdout.strip().split('\n')
    print(f"\n--- stdout ({len(lines)} lines) ---")
    for line in lines[-30:]:
        print(f"  {line}")

if result.returncode != 0:
    print(f"\nFAILED (exit {result.returncode})")
    if result.stderr:
        for line in result.stderr.strip().split('\n')[-30:]:
            print(f"  {line}")
else:
    print(f"\nExit code: {result.returncode}")

gpt_models = glob.glob(f'GPT_weights/{EXP_NAME}/*.ckpt')
print(f"\nGPT 模型: {len(gpt_models)}")
for m in gpt_models:
    print(f"  {m} ({os.path.getsize(m)/1024/1024:.1f} MB)")

In [None]:
# Step 7: 上傳模型到臨時文件服務 + 下載
import os, subprocess, glob
os.chdir("/content/GPT-SoVITS")

# 找到最佳模型 (最高 epoch)
gpt_models = sorted(glob.glob("GPT_weights/yuan/*.ckpt"))
sovits_models = sorted(glob.glob("SoVITS_weights/yuan/*.pth"))

if not gpt_models or not sovits_models:
    print("ERROR: 找不到訓練模型！請確認 Step 5 和 Step 6 已成功執行。")
    raise SystemExit(1)

best_gpt = gpt_models[-1]
best_sovits = sovits_models[-1]
print(f"GPT: {best_gpt} ({os.path.getsize(best_gpt)/1024/1024:.1f} MB)")
print(f"SoVITS: {best_sovits} ({os.path.getsize(best_sovits)/1024/1024:.1f} MB)")

# 上傳到 0x0.st (匿名文件分享，保留 30 天)
urls = {}
for path, label in [(best_sovits, "SOVITS"), (best_gpt, "GPT")]:
    size_mb = os.path.getsize(path) / 1024 / 1024
    print(f"\n上傳 {label} ({size_mb:.0f} MB)...")
    r = subprocess.run(
        ["curl", "--progress-bar", "-F", f"file=@{path}", "https://0x0.st"],
        capture_output=True, text=True, timeout=600
    )
    if r.returncode == 0 and r.stdout.strip().startswith("http"):
        url = r.stdout.strip()
        urls[label] = url
        print(f"DOWNLOAD_{label}: {url}")
    else:
        print(f"ERROR: {label} 上傳失敗")
        print(f"  returncode: {r.returncode}")
        print(f"  stderr: {r.stderr[:300]}")

# 儲存 URLs 到檔案
with open("/content/download_urls.txt", "w") as f:
    for label, url in urls.items():
        f.write(f"{label}: {url}\n")

# 顯示下載指令
print("\n" + "=" * 60)
print("在本地 Mac 執行以下指令下載模型:")
print("=" * 60)
for label, url in urls.items():
    fname = os.path.basename(dict([(best_sovits, "SOVITS"), (best_gpt, "GPT")]
                                   .get(label, path) for path, l in [(best_sovits, "SOVITS"), (best_gpt, "GPT")] if l == label))
    print(f"  curl -L -o {os.path.basename(path)} {url}")
print("=" * 60)

# 備用：也用 files.download 嘗試
try:
    from google.colab import files
    print("\n也嘗試瀏覽器下載...")
    files.download(best_sovits)
    files.download(best_gpt)
except:
    print("(瀏覽器下載失敗，請用上方 curl 指令)")