<a href="https://colab.research.google.com/github/mj3876/-/blob/main/DSP2025_speaker_identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# [디지탈 신호 처리] (2025)


---


### 딥러닝 프로젝트 : 딥러닝 기반 화자 식별 알고리즘 성능 향상

---
*   다양한 방법을 활용하여 모델 성능을 높이면 됩니다. (DSP 기법 활용, 데이터 전처리 및 증강, 모델 구조 개선, 학습 기법 개선, 손실 함수 개선)
* 데이터셋 : 1초 단위의 음성이 약 4,648개로, Train : Validation : Test = 8 : 1 : 1로 구분되어 있습니다. 여러분들은 테스트 세트에서 가장 좋은 성능 (Accuracy)을 보이는 것이 목표입니다.
* 환경 구성 : 런어스 혹은 깃허브 (https://github.com/JaeBinCHA7/DNN-based-Speaker-Identification-Tutorial) 에서 코드와 데이터셋을 다운로드 합니다. 다운로드한 데이터셋과 코드는 구글 드라이브에 올려주세요.
* GPU가 탑재된 로컬 PC가 없는 분들은.ipynb 를 활용해주시고, 가용할 수 있는 GPU가 있는 분들은 깃허브 코드를 참고해주시면 됩니다.
* 제출 : 여러분들이 학습 모델을 테스트셋에서 추론한 후 얻은 "test_predictions.csv"를 런어스 자료실에 업로드 해주세요.





---


### 구글 드라이브 마운트



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
%cd '/content/drive/MyDrive/dsp'

/content/drive/MyDrive/dsp


In [None]:
import zipfile
import os

zip_path = '/content/drive/MyDrive/dsp/dsp_dataset_2025 (3).zip'
extract_path = '/content/drive/MyDrive/dsp'
dataset_dir = os.path.join(extract_path, 'dataset')

if not os.path.exists(dataset_dir):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"'{zip_path}' extracted to '{extract_path}'")
else:
    print(f"'{dataset_dir}' already exists. Skipping extraction.")

'/content/drive/MyDrive/dsp/dataset' already exists. Skipping extraction.




---


### 필요한 라이브러리 설치하기


In [None]:
pip install tensorboardX



In [None]:
import argparse
import os
import time
import torch
import numpy as np
from tensorboardX import SummaryWriter
import torch.nn as nn
import torch.nn.functional as F
import os
import argparse
import torch
import time
import numpy as np
from torch.utils.data import DataLoader, Dataset
import random
import pandas as pd
from typing import Dict
import json
from tqdm.auto import tqdm
import librosa
import soundfile as sf



---


### 훈련 하이퍼 파라미터 설정



In [None]:
def build_args():
    p = argparse.ArgumentParser(description="Speaker Identification Trainer (per-experiment directories)")

    # Experiment
    p.add_argument("--exp_name", type=str, default="EXP1",#실험을 할때마다 EXP1이름 바꿔야
                   help="experiment name; outputs saved under ./logs/<exp_name>/")

    # Data
    # p.add_argument("--dataset_root", type=str, default="./dataset", help="root for split wavs")
    p.add_argument("--dataset_root", type=str, default="/content/local_dataset/", help="root for split wavs")
    p.add_argument("--train_csv", type=str, default="./dataset/train.csv")
    p.add_argument("--valid_csv", type=str, default="./dataset/valid.csv")
    p.add_argument("--test_csv", type=str, default="./dataset/test.csv")

    # Audio/Feature
    p.add_argument("--sr", type=int, default=16000)
    p.add_argument("--duration", type=float, default=1.0, help="seconds per sample (will pad/crop)")
    p.add_argument("--n_mels", type=int, default=80)
    p.add_argument("--n_fft", type=int, default=400)  # 25ms @16k
    p.add_argument("--hop_length", type=int, default=160)  # 10ms @16k

    # Train
    p.add_argument("--epochs", type=int, default=30)
    p.add_argument("--batch_size", type=int, default=512)
    p.add_argument("--lr", type=float, default=2e-3)
    p.add_argument("--min_lr", type=float, default=1e-5)
    p.add_argument("--weight_decay", type=float, default=1e-4)
    p.add_argument("--grad_clip", type=float, default=5.0)
    p.add_argument("--num_workers", type=int, default=0)
    p.add_argument("--seed", type=int, default=42)
    p.add_argument("--cpu", action="store_true")

    # Output root (experiment subdir is resolved inside Trainer)
    p.add_argument("--out_dir", type=str, default="./logs", help="root logs dir (experiment subdir will be created)")

    return p.parse_known_args()


---

### Helper functions

In [None]:
# Utils
def ensure_dir(p: str):
    os.makedirs(p, exist_ok=True)


def set_seed(seed: int = 42):#랜덤seed를 고정->재현성 확보
    random.seed(seed)#매번 같은 데이터 셔플이나 weight초기화
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)


def load_csv(csv_path: str) -> pd.DataFrame:
    if not os.path.exists(csv_path):#학습용csv파일 불러오고 csv에는 음성파일경로path와 화자idspeaker_id있어
        raise FileNotFoundError(f"CSV not found: {csv_path}")
    df = pd.read_csv(csv_path)
    required = {"path", "speaker_id"}
    if not required.issubset(df.columns):
        raise ValueError(f"{csv_path} must contain columns: {required}")
    return df


def build_label_map(train_df: pd.DataFrame, valid_df: pd.DataFrame) -> Dict[str, int]:
    spk_all = sorted(set(train_df["speaker_id"].astype(str)) | set(valid_df["speaker_id"].astype(str)))
    label_map = {spk: i for i, spk in enumerate(spk_all)}
    return label_map#화자id를 정수 라벨로 매핑->화자 분류에서 label encoding


def get_lr(optimizer: torch.optim.Optimizer) -> float:
    for pg in optimizer.param_groups:
        return float(pg.get("lr", 0.0))
    return 0.0
#현재 학습중인 optimizer의 학습률(lr) 반환

def fmt_pct(x: float) -> str:
    return f"{100.0 * x:6.2f}%"
#0.845 → ' 84.50%' 식으로 포맷팅

def print_epoch_header(epoch: int, total_epochs: int):
    bar = "=" * 66
    print(f"\n{bar}\n[Epoch {epoch:03d}/{total_epochs:03d}] START\n{bar}")

#epoch가 끝날 때 손실(loss)정확도(Acc)..등등 표형태 출력,csv기록
def print_epoch_summary(epoch: int, train_loss: float, train_acc: float,
                        val_loss: float, val_acc: float, best_acc: float,
                        lr: float, elapsed: float, ckpt_best: bool, csv_path: str):
    # ASCII Table summary
    bar = "-" * 66
    print(f"{bar}")
    print(f"| Split |   Loss   |   Acc    |")
    print(f"{bar}")
    print(f"| Train | {train_loss:8.4f} | {fmt_pct(train_acc)} |")
    print(f"| Valid | {val_loss:8.4f} | {fmt_pct(val_acc)} |")
    print(f"{bar}")
    print(f"| Best Acc: {fmt_pct(best_acc)} | LR: {lr:.3e} | Time: {elapsed:.1f}s |")
    print(f"{bar}")
    if ckpt_best:
        print("✓ New BEST model saved.")
    print("=" * 66)

    # CSV logging (append)
    row = dict(
        epoch=epoch,
        train_loss=float(train_loss),
        train_acc=float(train_acc),
        valid_loss=float(val_loss),
        valid_acc=float(val_acc),
        best_acc=float(best_acc),
        lr=float(lr),
        time_sec=float(elapsed),
    )
    header = not os.path.exists(csv_path)
    df = pd.DataFrame([row])
    ensure_dir(os.path.dirname(csv_path))
    df.to_csv(csv_path, mode="a", index=False, header=header)


# -----------------------------
# Audio -> Log-Mel features TIME에서 분석하지 않고 LOGMELL로 변환해서 처리
# -----------------------------데이터전처리코드 TIME보다 주파수영영에서 분석할 수 있는 코드 더많음
def load_wav(full_path: str, target_sr: int) -> np.ndarray:
    try:
        y, sr = sf.read(full_path, always_2d=False)
        if y.ndim > 1:
            y = np.mean(y, axis=1)  # mixdown to mono
        if sr != target_sr:
            y = librosa.resample(y, orig_sr=sr, target_sr=target_sr, res_type="kaiser_best")
        return y.astype(np.float32)
    except Exception:
        y, _ = librosa.load(full_path, sr=target_sr, mono=True)
        return y.astype(np.float32)


def fix_length(y: np.ndarray, num_samples: int) -> np.ndarray:
    if len(y) == num_samples:#신호 길이 일정하게 맞추기
        return y
    if len(y) > num_samples:#길면 자르고
        return y[:num_samples]
    pad = num_samples - len(y)#짧으면 0으로 패
    return np.pad(y, (0, pad), mode="constant")


def wav_to_logmel(#시간축 도메인을 Log-Mel Spectrogram으로 변환
        y: np.ndarray,
        sr: int,
        n_mels: int = 80,#멜 필터수
        n_fft: int = 400,  # 25 ms @ 16k FFT창 크기
        hop_length: int = 160,  # 10 ms @ 16k 프레임 간격
        fmin: int = 20,
        fmax: int = 7600,
) -> np.ndarray:
    S = librosa.feature.melspectrogram(
        y=y, sr=sr, n_fft=n_fft, hop_length=hop_length,
        n_mels=n_mels, fmin=fmin, fmax=fmax, power=2.0
    )
    S_db = librosa.power_to_db(S, ref=np.max)
    return S_db.astype(np.float32)  # [n_mels, T]


# Cepstral mean and variance normalization 정규화
def cmvn(x: np.ndarray, eps: float = 1e-5) -> np.ndarray:
    mu = x.mean(axis=1, keepdims=True)
    std = x.std(axis=1, keepdims=True)
    return (x - mu) / (std + eps)



---

### 데이터로더



In [None]:
# For training
class SpeakerDataset(Dataset):
    def __init__(
            self,
            csv_path: str,
            dataset_root: str = "./dataset",
            label_map: Dict[str, int] = None,
            target_sr: int = 16000,
            duration_sec: float = 3.0,
            n_mels: int = 80,
            n_fft: int = 400,
            hop_length: int = 160,
            apply_cmvn: bool = True,
    ):
        self.csv_path = csv_path#데이터경로바꿈,라벨 등 전처리
        self.df = load_csv(csv_path).copy()
        self.dataset_root = dataset_root
        self.label_map = label_map
        self.target_sr = target_sr
        self.duration_sec = duration_sec
        self.num_samples = int(round(duration_sec * target_sr))
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.apply_cmvn = apply_cmvn

        if self.label_map is None:
            raise ValueError("label_map must be provided (speaker_id -> class index)")

        self.df["abs_path"] = self.df["path"].apply(lambda p: os.path.join(dataset_root, p))
        self.df["label"] = self.df["speaker_id"].astype(str).map(self.label_map)

        before = len(self.df)
        self.df = self.df.dropna(subset=["label"])
        self.df["label"] = self.df["label"].astype(int)
        after = len(self.df)
        if after < before:
            print(f"[WARN] Dropped {before - after} rows due to unmapped speaker_id.")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]#주어진 인덱스(idx)의 파일 경로와 정수 라벨 가져오
        path = row["abs_path"]
        label = int(row["label"])

        y = load_wav(path, self.target_sr)
        y = fix_length(y, self.num_samples)
        m = wav_to_logmel(
            y, sr=self.target_sr, n_mels=self.n_mels,
            n_fft=self.n_fft, hop_length=self.hop_length
        )
        if self.apply_cmvn:
            m = cmvn(m)  # [n_mels, T]

        m = torch.from_numpy(m).unsqueeze(0)  # [1, n_mels, T]
        return m, label



In [None]:
# For test
class TestDataset(Dataset):#데이터셋 예측
    def __init__(
        self,
        csv_path: str,
        dataset_root: str = "./dataset",
        target_sr: int = 16000,#목표 샘플링 주파수
        duration_sec: float = 1.0,
        n_mels: int = 80,#스펙트럼 파라미터
        n_fft: int = 400,
        hop_length: int = 160,
        apply_cmvn: bool = True,
    ):
        self.df = load_csv(csv_path).copy()
        self.df["abs_path"] = self.df["path"].apply(lambda p: os.path.join(dataset_root, p))
        self.target_sr = target_sr
        self.num_samples = int(round(duration_sec * target_sr))
        self.n_mels = n_mels
        self.n_fft = n_fft
        self.hop_length = hop_length
        self.apply_cmvn = apply_cmvn

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx: int):
        row = self.df.iloc[idx]
        abs_path = row["abs_path"]
        filename = os.path.basename(row["path"])

        y = load_wav(abs_path, self.target_sr)
        y = fix_length(y, self.num_samples)
        m = wav_to_logmel(
            y, sr=self.target_sr, n_mels=self.n_mels,
            n_fft=self.n_fft, hop_length=self.hop_length
        )
        if self.apply_cmvn:
            m = cmvn(m)  # [n_mels, T]

        m = torch.from_numpy(m).unsqueeze(0)  # [1, n_mels, T]
        return m, filename


---

### 모델 선언

In [None]:
import torch#SIMPLECNN모델을 Conformer Block모델로 바꿔서 실행했습니다.
import torch.nn as nn
import torch.nn.functional as F

# --------------------------------------
# Swish Activation (PyTorch 버전)
# --------------------------------------
class Swish(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)


# --------------------------------------
# Conformer Block (Conv + Self-Attention + FFN)
# --------------------------------------
class ConformerBlock(nn.Module):
    def __init__(self, dim, ff_mult=4, num_heads=4, conv_exp=2, dropout=0.1):
        super().__init__()
        self.ffn1 = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, ff_mult * dim),
            Swish(),
            nn.Dropout(dropout),
            nn.Linear(ff_mult * dim, dim),
        )

        self.mha = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads, dropout=dropout, batch_first=True)
        self.ln_attn = nn.LayerNorm(dim)

        self.conv_module = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Conv1d(dim, conv_exp * dim, kernel_size=1),
            nn.GLU(dim=1),
            nn.Conv1d(conv_exp * dim // 2, conv_exp * dim // 2, kernel_size=15, padding=7, groups=conv_exp * dim // 2),
            nn.BatchNorm1d(conv_exp * dim // 2),
            Swish(),
            nn.Conv1d(conv_exp * dim // 2, dim, kernel_size=1),
            nn.Dropout(dropout),
        )

        self.ffn2 = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, ff_mult * dim),
            Swish(),
            nn.Dropout(dropout),
            nn.Linear(ff_mult * dim, dim),
        )

        self.norm = nn.LayerNorm(dim)

    def forward(self, x):
        # FeedForward 1
        x = x + 0.5 * self.ffn1(x)

        # Multihead Self-Attention
        attn_out, _ = self.mha(self.ln_attn(x), self.ln_attn(x), self.ln_attn(x))
        x = x + attn_out

        # Convolution Module
        residual = x
        x_conv = self.conv_module[0](x)
        x_conv = x_conv.transpose(1, 2)  # [B, dim, T]
        x_conv = self.conv_module[1:](x_conv)
        x = residual + x_conv.transpose(1, 2)

        # FeedForward 2
        x = x + 0.5 * self.ffn2(x)
        return self.norm(x)


# --------------------------------------
# Conformer Encoder (Conv + Transformer)
# --------------------------------------
class Conformer(nn.Module):
    def __init__(self, n_classes: int, input_dim=80, encoder_dim=128, num_layers=4, num_heads=4):
        super().__init__()
        # CNN-based subsampling
        self.conv_subsample = nn.Sequential(
            nn.Conv2d(1, encoder_dim, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(encoder_dim, encoder_dim, kernel_size=3, stride=2, padding=1),
            nn.ReLU()
        )

        # Flatten conv output → transformer input
        self.linear = nn.Linear((input_dim // 4) * encoder_dim, encoder_dim)

        # Stack of Conformer blocks
        self.blocks = nn.ModuleList([
            ConformerBlock(dim=encoder_dim, num_heads=num_heads)
            for _ in range(num_layers)
        ])

        # Pooling + classifier
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Linear(encoder_dim, n_classes)

    def forward(self, x):
        """
        x: [B, 1, n_mels, T]
        """
        x = self.conv_subsample(x)  # [B, C, n_mels/4, T/4]
        B, C, F, T = x.shape
        x = x.permute(0, 3, 1, 2).contiguous().view(B, T, C * F)
        x = self.linear(x)

        for block in self.blocks:
            x = block(x)

        x = x.transpose(1, 2)  # [B, D, T]
        x = self.pool(x).squeeze(-1)  # [B, D]
        logits = self.fc(x)
        return logits

---

### 학습 트레이너

In [None]:
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
torch.backends.cudnn.benchmark = True

class Trainer:
    def __init__(self, args):
        self.args = args
        set_seed(args.seed)

        # ---- Resolve experiment directories ----
        self.logs_root = os.path.abspath(args.out_dir)          # e.g., ./logs
        self.exp_dir = os.path.join(self.logs_root, args.exp_name)  # e.g., ./logs/my_exp
        self.tb_dir = os.path.join(self.exp_dir, "tensorboard")
        self.ckpt_dir = os.path.join(self.exp_dir, "checkpoints")
        ensure_dir(self.tb_dir)
        ensure_dir(self.ckpt_dir)

        # Save run config snapshot for reproducibility
        with open(os.path.join(self.exp_dir, "run_config.json"), "w") as f:
            json.dump(vars(args), f, indent=2, ensure_ascii=False)

        # TensorBoard writer under experiment dir
        self.writer = SummaryWriter(log_dir=self.tb_dir)

        if torch.cuda.is_available():
            self.device = torch.device("cuda")
            print(f"[INFO] Using GPU: {torch.cuda.get_device_name(0)}")
        else:
            self.device = torch.device("cpu")
            print("[WARNING] CUDA not available. Using CPU instead.")

        # ---- Data & Label map ----
        train_df = load_csv(args.train_csv)
        valid_df = load_csv(args.valid_csv)
        label_map = build_label_map(train_df, valid_df)
        self.label_map = label_map
        self.inv_label_map = {v: k for k, v in label_map.items()}

        # save label map in experiment dir
        with open(os.path.join(self.exp_dir, "label_map.json"), "w") as f:
            json.dump(self.label_map, f, indent=2, ensure_ascii=False)

        self.num_classes = len(self.label_map)
        print(f"[INFO] #speakers (classes) = {self.num_classes}")
        print(f"[INFO] Device: {self.device}")
        print(f"[INFO] Experiment dir: {self.exp_dir}")

        # ---- Datasets ----
        self.train_set = SpeakerDataset(
            csv_path=args.train_csv,
            dataset_root=args.dataset_root,
            label_map=self.label_map,
            target_sr=args.sr,
            duration_sec=args.duration,
            n_mels=args.n_mels,
            n_fft=args.n_fft,
            hop_length=args.hop_length,
            apply_cmvn=True,
        )
        self.valid_set = SpeakerDataset(
            csv_path=args.valid_csv,
            dataset_root=args.dataset_root,
            label_map=self.label_map,
            target_sr=args.sr,
            duration_sec=args.duration,
            n_mels=args.n_mels,
            n_fft=args.n_fft,
            hop_length=args.hop_length,
            apply_cmvn=True,
        )

        # ---- Dataloaders ----
        self.train_loader = DataLoader(
            self.train_set, batch_size=args.batch_size, shuffle=True,
            num_workers=args.num_workers, pin_memory=True, drop_last=True
        )
        self.valid_loader = DataLoader(
            self.valid_set, batch_size=args.batch_size, shuffle=False,
            num_workers=args.num_workers, pin_memory=True, drop_last=False
        )

        # ---- Model/Opt ---
        self.model = Conformer(n_classes=self.num_classes, input_dim=args.n_mels).to(self.device)#이부분 Conforme로 이름 바꾸었습니다
        self.criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

        warmup = torch.optim.lr_scheduler.LinearLR(self.optimizer, start_factor=0.1, total_iters=5)
        cosine = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max=args.epochs, eta_min=args.min_lr)
        self.scheduler = torch.optim.lr_scheduler.SequentialLR(self.optimizer, [warmup, cosine], milestones=[5])

        self.global_step = 0
        self.best_acc = 0.0

        # CSV metrics path (under experiment dir)
        self.metrics_csv = os.path.join(self.exp_dir, "metrics_log.csv")

        # tqdm common bar format
        self.bar_format = (
            "{l_bar}{bar:30}| {n_fmt}/{total_fmt} "
            "[{elapsed}<{remaining}, {rate_fmt}]"
        )

    def train_one_epoch(self, epoch: int):
        self.model.train()

        total_seen = 0
        total_correct = 0
        loss_sum = 0.0

        lr_now = get_lr(self.optimizer)
        pbar = tqdm(
            self.train_loader,
            desc=f"Train {epoch:03d} | lr={lr_now:.2e}",
            ncols=116,
            bar_format=self.bar_format,
            mininterval=0.5
        )

        tic = time.time()
        for mel, label in pbar:#mel logic을 받아서 1-40화자를 예측해서 어떤 화자에게서 정화도가 가장 높았
            bs = label.size(0)
            mel = mel.to(self.device)
            label = label.to(self.device)

            logits = self.model(mel)
            loss = self.criterion(logits, label)#전파

            self.optimizer.zero_grad(set_to_none=True)#검증
            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip)
            self.optimizer.step()

            with torch.no_grad():
                pred = logits.argmax(dim=1)
                correct = (pred == label).sum().item()

                total_seen += bs
                total_correct += correct
                loss_sum += loss.item() * bs

                elapsed = max(1e-9, time.time() - tic)
                ips = total_seen / elapsed  # items per second
                cur_loss = loss_sum / max(1, total_seen)
                cur_acc = total_correct / max(1, total_seen)

            pbar.set_postfix(
                loss=f"{cur_loss:7.4f}",
                acc=f"{cur_acc:6.4f}",
                ips=f"{ips:6.1f}/s",
                lr=f"{get_lr(self.optimizer):.2e}",
            )
            self.global_step += 1

        epoch_loss = loss_sum / max(1, total_seen)
        epoch_acc = total_correct / max(1, total_seen)
        return epoch_loss, epoch_acc

    @torch.no_grad()
    def evaluate(self, epoch: int):
        self.model.eval()

        total_seen = 0
        total_correct = 0
        loss_sum = 0.0

        pbar = tqdm(
            self.valid_loader,
            desc=f"Valid {epoch:03d}",
            ncols=116,
            bar_format=self.bar_format,
            mininterval=0.5
        )
        tic = time.time()
        for mel, label in pbar:
            bs = label.size(0)
            mel = mel.to(self.device)
            label = label.to(self.device)

            logits = self.model(mel)
            loss = self.criterion(logits, label)

            pred = logits.argmax(dim=1)
            correct = (pred == label).sum().item()

            total_seen += bs
            total_correct += correct
            loss_sum += loss.item() * bs

            elapsed = max(1e-9, time.time() - tic)
            ips = total_seen / elapsed
            cur_loss = loss_sum / max(1, total_seen)
            cur_acc = total_correct / max(1, total_seen)

            pbar.set_postfix(
                val_loss=f"{cur_loss:7.4f}",
                val_acc=f"{cur_acc:6.4f}",
                ips=f"{ips:6.1f}/s",
            )

        epoch_loss = loss_sum / max(1, total_seen)
        epoch_acc = total_correct / max(1, total_seen)
        return epoch_loss, epoch_acc

    def save_checkpoint(self, epoch: int):
        """Save only last.pth (always) and best.pth (when improved)."""
        state = {
            "epoch": epoch,
            "model_state": self.model.state_dict(),
            "optimizer_state": self.optimizer.state_dict(),
            "scheduler_state": self.scheduler.state_dict(),
            "best_acc": self.best_acc,
            "label_map": self.label_map,
            "args": vars(self.args),
        }
        best_path = os.path.join(self.ckpt_dir, "best.pth")
        torch.save(state, best_path)
        print(f"[Checkpoint] BEST updated → {best_path}")

    def run(self):
        total_epochs = self.args.epochs
        for epoch in range(1, total_epochs + 1):
            print_epoch_header(epoch, total_epochs)
            t0 = time.time()

            train_loss, train_acc = self.train_one_epoch(epoch)
            val_loss, val_acc = self.evaluate(epoch)

            # TensorBoard
            self.writer.add_scalar("train/loss", train_loss, epoch)
            self.writer.add_scalar("train/acc", train_acc, epoch)
            self.writer.add_scalar("valid/loss", val_loss, epoch)
            self.writer.add_scalar("valid/acc", val_acc, epoch)
            self.writer.add_scalar("train/lr", get_lr(self.optimizer), epoch)

            # Scheduler step AFTER logging current LR
            self.scheduler.step()
            cur_lr = get_lr(self.optimizer)

            # Best & CKPT
            is_best = val_acc > self.best_acc
            if is_best:
                self.best_acc = val_acc
                self.save_checkpoint(epoch)

            elapsed = time.time() - t0
            print_epoch_summary(
                epoch=epoch,
                train_loss=train_loss, train_acc=train_acc,
                val_loss=val_loss, val_acc=val_acc,
                best_acc=self.best_acc, lr=cur_lr,
                elapsed=elapsed, ckpt_best=is_best,
                csv_path=self.metrics_csv
            )

        print("[INFO] Training finished.")

---

### 테스터

In [None]:
def load_label_map(exp_dir: str) -> Dict[str, int]:
    lm_path = os.path.join(exp_dir, "label_map.json")
    if not os.path.exists(lm_path):
        raise FileNotFoundError(f"label_map.json not found at {lm_path}")
    with open(lm_path, "r") as f:
        return json.load(f)


def load_checkpoint(exp_dir: str, device: torch.device):
    ckpt_path = os.path.join(exp_dir, "checkpoints", "best.pth")
    if not os.path.exists(ckpt_path):
        raise FileNotFoundError(f"best.pth not found at {ckpt_path}")
    state = torch.load(ckpt_path, map_location=device)
    return state


def infer(args):
    # Resolve experiment dirs
    logs_root = os.path.abspath(args.out_dir)
    exp_dir = os.path.join(logs_root, args.exp_name)
    ensure_dir(exp_dir)

    # Load label map and inverse map
    label_map = load_label_map(exp_dir)
    inv_label_map = {v: k for k, v in label_map.items()}
    num_classes = len(inv_label_map)

    # Device
    device = torch.device("cuda" if torch.cuda.is_available() and not args.cpu else "cpu")
    print(f"[INFO] Device: {device}")
    print(f"[INFO] Experiment dir: {exp_dir}")

    # Dataset & loader
    dataset = TestDataset(
        csv_path=args.test_csv,
        dataset_root=args.dataset_root,
        target_sr=args.sr,
        duration_sec=args.duration,
        n_mels=args.n_mels,
        n_fft=args.n_fft,
        hop_length=args.hop_length,
        apply_cmvn=True,
    )
    loader = DataLoader(
        dataset, batch_size=args.batch_size, shuffle=False,
        num_workers=args.num_workers, pin_memory=True, drop_last=False,
    )

    # Model
    model = SimpleCNN(n_classes=num_classes).to(device)
    state = load_checkpoint(exp_dir, device)
    model.load_state_dict(state["model_state"], strict=True)
    model.eval()

    # Inference loop
    filenames = []
    preds = []

    with torch.no_grad():
        pbar = tqdm(loader, desc="Inference", ncols=116)
        for mel, filename in pbar:
            mel = mel.to(device)  # [B, 1, F, T]
            logits = model(mel)   # [B, C]
            pred_idx = logits.argmax(dim=1).cpu().numpy().tolist()
            pred_spk = [inv_label_map[int(i)] for i in pred_idx]

            filenames.extend(filename)
            preds.extend(pred_spk)

    # Save CSV (filename, pred_speaker_id)
    out_csv = os.path.join(exp_dir, "test_predictions.csv") # 저장된 파일을 런어스 자료실에 제출해주세요.
    df = pd.DataFrame({"filename": filenames, "speaker_id": preds})
    df.to_csv(out_csv, index=False)
    print(f"[INFO] Saved predictions → {out_csv}")


---

### 모델 학습 실행하기

In [None]:
# Colab 로컬로 데이터 복사
!mkdir -p /content/local_dataset
!cp -r /content/drive/MyDrive/dsp/dataset/* /content/local_dataset/
args = build_args()
trainer = Trainer(args[0])
trainer.run()

[INFO] Using GPU: NVIDIA A100-SXM4-80GB
[INFO] #speakers (classes) = 40
[INFO] Device: cuda
[INFO] Experiment dir: /content/drive/MyDrive/dsp/logs/EXP1

[Epoch 001/030] START


Train 001 | lr=2.00e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 001:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   3.6645 |   5.39% |
| Valid |   3.6411 |   2.80% |
------------------------------------------------------------------
| Best Acc:   2.80% | LR: 5.600e-04 | Time: 12.1s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 002/030] START


Train 002 | lr=5.60e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 002:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   3.2409 |  14.65% |
| Valid |   3.2549 |  10.97% |
------------------------------------------------------------------
| Best Acc:  10.97% | LR: 9.200e-04 | Time: 11.2s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 003/030] START


Train 003 | lr=9.20e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 003:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   2.8719 |  25.06% |
| Valid |   2.7358 |  31.18% |
------------------------------------------------------------------
| Best Acc:  31.18% | LR: 1.280e-03 | Time: 11.6s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 004/030] START


Train 004 | lr=1.28e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 004:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   2.5213 |  36.55% |
| Valid |   2.6094 |  32.90% |
------------------------------------------------------------------
| Best Acc:  32.90% | LR: 1.640e-03 | Time: 11.2s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 005/030] START


Train 005 | lr=1.64e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 005:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   2.1214 |  52.68% |
| Valid |   2.4982 |  32.90% |
------------------------------------------------------------------
| Best Acc:  32.90% | LR: 2.000e-03 | Time: 11.4s |
------------------------------------------------------------------

[Epoch 006/030] START




Train 006 | lr=2.00e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 006:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   1.6893 |  66.82% |
| Valid |   2.4319 |  36.99% |
------------------------------------------------------------------
| Best Acc:  36.99% | LR: 1.995e-03 | Time: 11.1s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 007/030] START


Train 007 | lr=1.99e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 007:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   1.3030 |  81.70% |
| Valid |   2.0505 |  54.62% |
------------------------------------------------------------------
| Best Acc:  54.62% | LR: 1.978e-03 | Time: 11.6s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 008/030] START


Train 008 | lr=1.98e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 008:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   1.0810 |  89.48% |
| Valid |   2.0441 |  56.13% |
------------------------------------------------------------------
| Best Acc:  56.13% | LR: 1.951e-03 | Time: 11.6s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 009/030] START


Train 009 | lr=1.95e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 009:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.9392 |  95.06% |
| Valid |   2.1012 |  53.76% |
------------------------------------------------------------------
| Best Acc:  56.13% | LR: 1.914e-03 | Time: 11.1s |
------------------------------------------------------------------

[Epoch 010/030] START


Train 010 | lr=1.91e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 010:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.8588 |  97.57% |
| Valid |   2.0359 |  53.76% |
------------------------------------------------------------------
| Best Acc:  56.13% | LR: 1.867e-03 | Time: 11.2s |
------------------------------------------------------------------

[Epoch 011/030] START


Train 011 | lr=1.87e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 011:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.8112 |  98.80% |
| Valid |   1.7085 |  62.37% |
------------------------------------------------------------------
| Best Acc:  62.37% | LR: 1.810e-03 | Time: 11.6s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 012/030] START


Train 012 | lr=1.81e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 012:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.7741 |  99.64% |
| Valid |   1.5174 |  72.47% |
------------------------------------------------------------------
| Best Acc:  72.47% | LR: 1.744e-03 | Time: 11.4s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 013/030] START


Train 013 | lr=1.74e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 013:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.7533 |  99.80% |
| Valid |   1.5104 |  72.90% |
------------------------------------------------------------------
| Best Acc:  72.90% | LR: 1.671e-03 | Time: 11.5s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 014/030] START


Train 014 | lr=1.67e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 014:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.7365 |  99.92% |
| Valid |   1.5378 |  73.12% |
------------------------------------------------------------------
| Best Acc:  73.12% | LR: 1.590e-03 | Time: 11.7s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 015/030] START


Train 015 | lr=1.59e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 015:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.7235 | 100.00% |
| Valid |   1.2864 |  80.22% |
------------------------------------------------------------------
| Best Acc:  80.22% | LR: 1.503e-03 | Time: 11.6s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 016/030] START


Train 016 | lr=1.50e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 016:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.7156 | 100.00% |
| Valid |   1.2328 |  82.15% |
------------------------------------------------------------------
| Best Acc:  82.15% | LR: 1.410e-03 | Time: 11.7s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 017/030] START


Train 017 | lr=1.41e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 017:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.7102 | 100.00% |
| Valid |   1.2087 |  82.37% |
------------------------------------------------------------------
| Best Acc:  82.37% | LR: 1.312e-03 | Time: 11.1s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 018/030] START


Train 018 | lr=1.31e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 018:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.7061 | 100.00% |
| Valid |   1.2007 |  80.86% |
------------------------------------------------------------------
| Best Acc:  82.37% | LR: 1.212e-03 | Time: 11.0s |
------------------------------------------------------------------

[Epoch 019/030] START


Train 019 | lr=1.21e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 019:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.7029 | 100.00% |
| Valid |   1.1911 |  81.94% |
------------------------------------------------------------------
| Best Acc:  82.37% | LR: 1.109e-03 | Time: 11.5s |
------------------------------------------------------------------

[Epoch 020/030] START


Train 020 | lr=1.11e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 020:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.7007 | 100.00% |
| Valid |   1.1908 |  82.58% |
------------------------------------------------------------------
| Best Acc:  82.58% | LR: 1.005e-03 | Time: 11.2s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 021/030] START


Train 021 | lr=1.01e-03:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 021:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6988 | 100.00% |
| Valid |   1.1874 |  81.94% |
------------------------------------------------------------------
| Best Acc:  82.58% | LR: 9.010e-04 | Time: 11.5s |
------------------------------------------------------------------

[Epoch 022/030] START


Train 022 | lr=9.01e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 022:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6974 | 100.00% |
| Valid |   1.1838 |  82.37% |
------------------------------------------------------------------
| Best Acc:  82.58% | LR: 7.981e-04 | Time: 11.6s |
------------------------------------------------------------------

[Epoch 023/030] START


Train 023 | lr=7.98e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 023:   0%|                              | 0/1 [00:00<?, ?it/s]

[Checkpoint] BEST updated → /content/drive/MyDrive/dsp/logs/EXP1/checkpoints/best.pth
------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6961 | 100.00% |
| Valid |   1.1776 |  83.01% |
------------------------------------------------------------------
| Best Acc:  83.01% | LR: 6.975e-04 | Time: 11.5s |
------------------------------------------------------------------
✓ New BEST model saved.

[Epoch 024/030] START


Train 024 | lr=6.98e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 024:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6952 | 100.00% |
| Valid |   1.1770 |  82.58% |
------------------------------------------------------------------
| Best Acc:  83.01% | LR: 6.003e-04 | Time: 11.5s |
------------------------------------------------------------------

[Epoch 025/030] START


Train 025 | lr=6.00e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 025:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6945 | 100.00% |
| Valid |   1.1823 |  81.51% |
------------------------------------------------------------------
| Best Acc:  83.01% | LR: 5.075e-04 | Time: 11.3s |
------------------------------------------------------------------

[Epoch 026/030] START


Train 026 | lr=5.08e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 026:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6939 | 100.00% |
| Valid |   1.1778 |  81.72% |
------------------------------------------------------------------
| Best Acc:  83.01% | LR: 4.202e-04 | Time: 11.1s |
------------------------------------------------------------------

[Epoch 027/030] START


Train 027 | lr=4.20e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 027:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6933 | 100.00% |
| Valid |   1.1768 |  81.94% |
------------------------------------------------------------------
| Best Acc:  83.01% | LR: 3.392e-04 | Time: 11.0s |
------------------------------------------------------------------

[Epoch 028/030] START


Train 028 | lr=3.39e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 028:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6930 | 100.00% |
| Valid |   1.1783 |  82.15% |
------------------------------------------------------------------
| Best Acc:  83.01% | LR: 2.656e-04 | Time: 11.0s |
------------------------------------------------------------------

[Epoch 029/030] START


Train 029 | lr=2.66e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 029:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6926 | 100.00% |
| Valid |   1.1806 |  81.29% |
------------------------------------------------------------------
| Best Acc:  83.01% | LR: 2.000e-04 | Time: 11.1s |
------------------------------------------------------------------

[Epoch 030/030] START


Train 030 | lr=2.00e-04:   0%|                              | 0/7 [00:00<?, ?it/s]

Valid 030:   0%|                              | 0/1 [00:00<?, ?it/s]

------------------------------------------------------------------
| Split |   Loss   |   Acc    |
------------------------------------------------------------------
| Train |   0.6923 | 100.00% |
| Valid |   1.1794 |  81.72% |
------------------------------------------------------------------
| Best Acc:  83.01% | LR: 1.433e-04 | Time: 11.3s |
------------------------------------------------------------------
[INFO] Training finished.


---

### 모델 테스트 실행하기

In [None]:
args = build_args()
infer(args[0])

[INFO] Device: cuda
[INFO] Experiment dir: /content/drive/MyDrive/dsp/logs/EXP1


ValueError: ./dataset/test.csv must contain columns: {'speaker_id', 'path'}

---

#### [결과 분석] 텐서보드 활용하기

In [None]:
# Initialize directories for logging and model storage
DIR_NAME = os.path.join(os.getcwd(), 'logs', f'{args[0].exp_name}')

%load_ext tensorboard
%tensorboard --logdir {DIR_NAME}