In [1]:
#!/usr/bin/env python3
"""
quick_train_debug.py  –  Smoke-test vòng training

• Đọc finetune_config.yaml
• Lấy đúng N mini-batch đầu
• Tính loss, backward, một bước optimizer
• In loss_ctc, loss_att, grad_norm, LR
"""

import os, math, torch
from chunkformer_vpb.training.finetune_config import FinetuneConfig
from chunkformer_vpb.training.data_loader     import get_dataloaders
from chunkformer_vpb.training.optimizer       import build_model_and_optimizer
from chunkformer_vpb.training.finetune_utils  import compute_loss_batch_v1

CFG_PATH   = "../../config/finetune_config.yaml"
DEBUG_STEPS = 10          # số batch muốn test
DEVICE      = "cpu" # "cuda" if torch.cuda.is_available() else "cpu"

def main():
    cfg = FinetuneConfig.from_yaml(CFG_PATH)

    # để nhanh: batch nhỏ & shuffle false
    cfg.training.batch_size = 16
    cfg.training.shuffle    = False

    train_loader, _ = get_dataloaders(cfg)

    # total_steps = DEBUG_STEPS (đủ cho scheduler)
    model, _, optim, sched = build_model_and_optimizer(
        cfg, torch.device(DEVICE), total_steps=DEBUG_STEPS
    )
    model.to(DEVICE).train()

    for step, (feats, feat_lens, toks, tok_lens) in enumerate(train_loader, 1):
        if step > DEBUG_STEPS:
            break

        # ---------- LOG INPUT SHAPES ----------
        print(f"\n─── Batch {step} ─────────────────────────")
        print(f"feats shape    : {feats.shape}")          # [B, T_max, 80]
        print(f"feat_lens      : {feat_lens.tolist()}")   # list[B]
        print(f"toks shape     : {toks.shape}")           # [B, L_max]
        print(f"tok_lens       : {tok_lens.tolist()}")    # list[B]
        # print(f"]]]][[[[[ tok_min: {toks.min().item()}, tok_max: {toks.max().item()}")


        feats, feat_lens = feats.to(DEVICE), feat_lens.to(DEVICE)
        toks,  tok_lens  = toks.to(DEVICE),  tok_lens.to(DEVICE)

        # ---------- CALL LOSS  ----------
        loss, loss_ctc, loss_att = compute_loss_batch_v1(
            model, feats, feat_lens, toks, tok_lens, cfg, torch.device(DEVICE)
        )

        # ---------- BACKWARD & OPT ----------
        optim.zero_grad()
        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(),
                                                cfg.training.max_grad_norm)
        optim.step(); sched.step()
        lr_now = sched.get_last_lr()[0]

        # ---------- SUMMARY ----------
        print("\n============================")
        print(f"[{step}/{DEBUG_STEPS}] "
            f"loss={loss.item():.3f} (ctc={loss_ctc.item():.3f}, "
            f"att={loss_att.item():.3f})  grad={grad_norm:.2f}  lr={lr_now:.2e}")
        print("============================\n")

        if torch.isnan(loss):
            raise ValueError("❌ NaN loss phát hiện!")


    print("✅ Smoke-train hoàn tất!")

if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm
[2025-07-18 13:46:38] INFO: Checkpoint: loading from checkpoint ../../../chunkformer-large-vie/pytorch_model.bin for GPU



🧾 Loaded checkpoint from: ../../../chunkformer-large-vie/pytorch_model.bin
📦 Checkpoint keys: ['encoder.global_cmvn.mean', 'encoder.global_cmvn.istd', 'encoder.embed.out.weight', 'encoder.embed.out.bias', 'encoder.embed.conv.0.weight'] ... (total 813)
🔍 AED decoder head included in checkpoint? ✅ YES
📊 Model total params: 113,852,240, trainable: 113,852,240
!!!Vocab size: 6992

─── Batch 1 ─────────────────────────
feats shape    : torch.Size([16, 901, 80])
feat_lens      : [626, 573, 392, 282, 429, 901, 311, 417, 679, 229, 557, 417, 186, 467, 161, 614]
toks shape     : torch.Size([16, 28])
tok_lens       : [15, 15, 13, 4, 8, 28, 13, 7, 28, 5, 18, 9, 5, 14, 5, 22]
Origin loss_ctc shape = torch.Size([]), sum=4.148848533630371, mean=4.148848533630371
[DBG] loss_ctc = 4.1488
>>>>>>>>>>>>>>>>>>>  ys_out shape: torch.Size([16, 29]), logp shape: torch.Size([16, 29, 6992])
Origin loss_att torch.Size([]), sum=6.95413875579834, mean=6.95413875579834
[DBG] loss_att = 6.9541
[DBG] final loss = 0.