# CONFIG

In [5]:
!pip install soundfile optuna tqdm pyloudnorm

Collecting pyloudnorm
  Downloading pyloudnorm-0.1.1-py3-none-any.whl.metadata (5.6 kB)
Downloading pyloudnorm-0.1.1-py3-none-any.whl (9.6 kB)
Installing collected packages: pyloudnorm
Successfully installed pyloudnorm-0.1.1


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
import os
import torch

# --- Paths ---
# In Colab, we extract to the local VM disk (/content/data) for speed
BASE_DATA_DIR = "/content/data"
RAW_DIR = os.path.join(BASE_DATA_DIR, "01_raw")
INTERMEDIARY_DIR = os.path.join(BASE_DATA_DIR, "02_intermediary")
MODEL_DIR = "/content/drive/MyDrive/UFSC/topicos_especiais_aplicacoes/data/04_model" # Save models directly to Drive so you don't lose them
EVALUATION_DIR = "/content/drive/MyDrive/UFSC/topicos_especiais_aplicacoes/data/05_evaluation"

# Zip file configuration (Your Drive Path)
ZIP_FILENAME = "LA.zip"
# This points to the file in your Google Drive
ZIP_PATH = "/content/drive/MyDrive/UFSC/topicos_especiais_aplicacoes/data/DS_10283_3336/LA.zip"

# The resulting root directory after unzipping
# Based on ASVspoof structure
DATASET_ROOT = os.path.join(RAW_DIR, "LA")

# --- Protocol Paths ---
PATHS = {
    "train": {
        "audio": os.path.join(DATASET_ROOT, "ASVspoof2019_LA_train/flac"),
        "protocol": os.path.join(DATASET_ROOT, "ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt")
    },
    "dev": {
        "audio": os.path.join(DATASET_ROOT, "ASVspoof2019_LA_dev/flac"),
        "protocol": os.path.join(DATASET_ROOT, "ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.dev.trl.txt")
    },
    "eval": {
        "audio": os.path.join(DATASET_ROOT, "ASVspoof2019_LA_eval/flac"),
        "protocol": os.path.join(DATASET_ROOT, "ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.eval.trl.txt")
    }
}

# --- Audio Parameters ---
SAMPLE_RATE = 16000
DURATION = 4
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
LABEL_MAP = {"bonafide": 0, "spoof": 1}

# --- Training Hyperparameters ---
# Colab T4 GPUs have 16GB VRAM, so we can increase batch size slightly
BATCH_SIZE = 128
GRAD_ACCUM_STEPS = 2 # Effective batch size = 32
EVAL_BATCH_SIZE = 256 # Increased for speed

EPOCHS = 5 # Increased to allow convergence
N_TRIALS = 3 # Keep low for optimization speed

FINAL_TRAINING_EPOCHS = 50
PATIENCE = 10

# --- GPU Configuration ---
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# --- Data Subsetting ---
SAMPLES_PER_EPOCH = None
EVAL_SAMPLES = None

# DATASET

In [13]:
import os
import torch
import soundfile as sf
import numpy as np
import librosa
import pyloudnorm as pyln
import warnings

from torch.utils.data import Dataset

# Suppress warnings for cleaner logs
warnings.filterwarnings("ignore", category=UserWarning)

class ASVspoofDataset(Dataset):
    def __init__(self, df, target_length=64000, cache=True):
        self.df = df
        self.target_length = target_length
        self.cache = cache

        # Pre-convert labels to tensor
        self.labels = torch.tensor(df['label'].values, dtype=torch.long)
        self.filenames = df['filename'].values
        self.subsets = df['subset'].values

        # Setup Cache Directory
        self.cache_dir = os.path.join(INTERMEDIARY_DIR, "processed_cache")
        if self.cache:
            os.makedirs(self.cache_dir, exist_ok=True)
            print(f"Dataset Cache enabled at: {self.cache_dir}")

        # --- LOAD SILERO VAD (Stage 1) ---
        try:
            # We only need to load VAD if we are actually processing data (not just reading cache)
            # But since we might encounter a missing cache file, we load it anyway.
            print("Loading Silero VAD for robust silence removal...")
            self.vad_model, utils = torch.hub.load(
                repo_or_dir='snakers4/silero-vad',
                model='silero_vad',
                force_reload=False,
                trust_repo=True
            )
            (self.get_speech_timestamps, _, _, _, _) = utils
            self.vad_enabled = True
        except Exception as e:
            print(f"Warning: Failed to load Silero VAD ({e}). Fallback to Librosa trim.")
            self.vad_enabled = False

    def __len__(self):
        return len(self.df)

    def _apply_mu_law(self, x, mu=255):
        """ Stage 3: Mu-Law Companding """
        x_tensor = torch.as_tensor(x, dtype=torch.float32)
        numerator = torch.sign(x_tensor) * torch.log1p(mu * torch.abs(x_tensor))
        denominator = np.log1p(mu).astype(np.float32)
        return numerator / denominator

    def _standardize_loudness(self, audio, sr):
        """ Stage 2: EBU R128 Normalization """
        try:
            meter = pyln.Meter(sr)
            loudness = meter.integrated_loudness(audio)
            if not np.isinf(loudness):
                audio = pyln.normalize.loudness(audio, loudness, -23.0)
        except Exception:
            pass
        return audio.astype(np.float32)

    def _process_temporal_structure(self, audio, sr):
        """ Stage 1: VAD and Audio Folding """
        if self.vad_enabled and len(audio) > 512:
            try:
                wav_tensor = torch.from_numpy(audio).float()
                timestamps = self.get_speech_timestamps(wav_tensor, self.vad_model, sampling_rate=sr)
                if len(timestamps) > 0:
                    speech_chunks = [audio[ts['start']:ts['end']] for ts in timestamps]
                    audio = np.concatenate(speech_chunks)
            except Exception:
                pass

        # Folding / Looping
        if len(audio) < self.target_length:
            repeat_count = (self.target_length // len(audio)) + 1
            audio = np.tile(audio, repeat_count)

        # Cropping
        if len(audio) > self.target_length:
            start = np.random.randint(0, len(audio) - self.target_length)
            audio = audio[start : start + self.target_length]

        return audio.astype(np.float32)

    def extract_robust_features(self, y, sr):
        """ Stage 4: Advanced Bio-Physical Features """
        epsilon = 1e-10

        # Feature 1: TEO
        teo = y[1:-1]**2 - y[:-2] * y[2:]
        teo_log = np.log(np.abs(teo) + epsilon)
        teo_mean = np.mean(teo_log)

        # Feature 2: PCEN Flux
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=64, fmax=8000)
        S_pcen = librosa.pcen(S * (2**31))
        flux = librosa.onset.onset_strength(S=S_pcen, sr=sr)
        pcen_flux_mean = np.mean(flux)

        # Feature 3: Unvoiced Length Proxy
        zcr = librosa.feature.zero_crossing_rate(y=y)[0]
        rms = librosa.feature.rms(y=y)[0]
        rms_norm = (rms - np.min(rms)) / (np.max(rms) - np.min(rms) + epsilon)

        unvoiced_mask = (zcr > 0.1) & (rms_norm < 0.2)
        padded = np.pad(unvoiced_mask, (1, 1), 'constant')
        diff = np.diff(padded.astype(int))
        lengths = np.where(diff == -1)[0] - np.where(diff == 1)[0]
        unvoiced_len = np.mean(lengths) if len(lengths) > 0 else 0.0

        features = np.array([teo_mean, pcen_flux_mean, unvoiced_len], dtype=np.float32)
        features = (features - np.mean(features)) / (np.std(features) + epsilon)
        return torch.from_numpy(features).float()

    def _process_audio(self, filename, subset):
        """Full processing pipeline logic, extracted for caching."""
        folder_name = f"ASVspoof2019_LA_{subset}"
        file_path = os.path.join(DATASET_ROOT, folder_name, "flac", f"{filename}.flac")

        try:
            audio_np, sr = sf.read(file_path, dtype='float32')

            # 1. Temporal (VAD + Folding)
            audio_np = self._process_temporal_structure(audio_np, sr)

            # 2. Standardization (Loudness)
            audio_linear = self._standardize_loudness(audio_np, sr)

            # 3. Features (from linear audio)
            extra_features = self.extract_robust_features(audio_linear, sr)

            # 4. Input Branch (Mu-Law)
            waveform = self._apply_mu_law(audio_linear, mu=255)

            if waveform.ndim == 1:
                waveform = waveform.unsqueeze(0)
            else:
                waveform = waveform.t()

        except Exception as e:
            # Fallback for corrupted files
            waveform = torch.zeros(1, self.target_length, dtype=torch.float32)
            extra_features = torch.zeros(3, dtype=torch.float32)

        # Pad/Truncate (Final Check)
        _, w_len = waveform.shape
        if w_len < self.target_length:
            padding = self.target_length - w_len
            waveform = torch.nn.functional.pad(waveform, (0, padding))
        elif w_len > self.target_length:
            waveform = waveform[:, :self.target_length]

        return waveform, extra_features

    def __getitem__(self, idx):
        filename = self.filenames[idx]
        subset = self.subsets[idx]
        label = self.labels[idx]

        # --- Cache Logic ---
        if self.cache:
            cache_path = os.path.join(self.cache_dir, f"{filename}.pt")
            if os.path.exists(cache_path):
                try:
                    # HIT: Load from disk
                    data = torch.load(cache_path)
                    return data['waveform'], label, data['features']
                except Exception:
                    # If load fails, recompute
                    pass

        # MISS: Compute fresh
        waveform, extra_features = self._process_audio(filename, subset)

        # Save to cache
        if self.cache:
            try:
                # Save as a dict to keep files organized
                torch.save({'waveform': waveform, 'features': extra_features}, cache_path)
            except Exception:
                pass

        return waveform, label, extra_features

# INGESTION

In [8]:
import os
import pandas as pd
import zipfile
import shutil

def prepare_dataset_files():
    """Checks for dataset existence and unzips if necessary."""
    # Check if files already extracted
    if not os.path.exists(PATHS['train']['protocol']):
        print(f"Dataset not found at {DATASET_ROOT}")

        if os.path.exists(ZIP_PATH):
            print(f"Found zip file in Drive at: {ZIP_PATH}")

            # Create local raw directory
            os.makedirs(RAW_DIR, exist_ok=True)

            # Extract directly from Drive to Local VM
            print(f"Extracting to local VM: {RAW_DIR} ...")
            # Using !unzip is often faster in Colab cells, but zipfile is portable python
            !unzip -q -o "{ZIP_PATH}" -d "{RAW_DIR}"
            # with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
            #     zip_ref.extractall(RAW_DIR)
            print("Extraction complete.")
        else:
            raise FileNotFoundError(f"Zip file not found at {ZIP_PATH}. Check your Drive path.")
    else:
        print(f"Raw dataset files found at {DATASET_ROOT}.")

def parse_asvspoof_protocol(protocol_path, audio_dir, subset_name):
    """Parses ASVspoof protocol text files."""
    if not os.path.exists(protocol_path):
        print(f"Warning: Protocol file not found: {protocol_path}")
        return pd.DataFrame()

    data = []
    with open(protocol_path, 'r') as f:
        lines = f.readlines()

    for line in lines:
        parts = line.strip().split(' ')
        if len(parts) < 5: continue
        filename = parts[1]
        label_str = parts[4]
        file_path = os.path.join(audio_dir, f"{filename}.flac")

        data.append({
            "path": file_path,
            "filename": filename,
            "label_str": label_str,
            "label": LABEL_MAP.get(label_str, -1),
            "subset": subset_name
        })
    return pd.DataFrame(data)

def ingest_datasets():
    # Define cache paths
    os.makedirs(INTERMEDIARY_DIR, exist_ok=True)
    cache_train = os.path.join(INTERMEDIARY_DIR, "train.csv")
    cache_dev = os.path.join(INTERMEDIARY_DIR, "dev.csv")
    cache_eval = os.path.join(INTERMEDIARY_DIR, "eval.csv")

    # CHECK: If cached files exist, load them and skip processing
    if os.path.exists(cache_train) and os.path.exists(cache_dev) and os.path.exists(cache_eval):
        print(f"--- Found cached data in {INTERMEDIARY_DIR} ---")
        print("Loading from CSVs...")
        train_df = pd.read_csv(cache_train)
        dev_df = pd.read_csv(cache_dev)
        eval_df = pd.read_csv(cache_eval)
        print("Loaded successfully.")
        return train_df, dev_df, eval_df

    # PROCESS: If no cache, run full ingestion
    print("--- No cache found. Starting raw ingestion ---")

    # 1. Unzip if needed
    prepare_dataset_files()

    # 2. Parse Protocols
    print(f"Parsing Train set...")
    train_df = parse_asvspoof_protocol(PATHS["train"]["protocol"], PATHS["train"]["audio"], "train")

    print(f"Parsing Dev set...")
    dev_df = parse_asvspoof_protocol(PATHS["dev"]["protocol"], PATHS["dev"]["audio"], "dev")

    print(f"Parsing Eval set...")
    eval_df = parse_asvspoof_protocol(PATHS["eval"]["protocol"], PATHS["eval"]["audio"], "eval")

    # 3. Save to Intermediary Folder
    print(f"--- Saving intermediary files to {INTERMEDIARY_DIR} ---")

    train_df.to_csv(cache_train, index=False)
    dev_df.to_csv(cache_dev, index=False)
    eval_df.to_csv(cache_eval, index=False)
    print("Saved train.csv, dev.csv, and eval.csv")

    return train_df, dev_df, eval_df

if __name__ == "__main__":
    ingest_datasets()

--- No cache found. Starting raw ingestion ---
Dataset not found at /content/data/01_raw/LA
Found zip file in Drive at: /content/drive/MyDrive/UFSC/topicos_especiais_aplicacoes/data/DS_10283_3336/LA.zip
Extracting to local VM: /content/data/01_raw ...
Extraction complete.
Parsing Train set...
Parsing Dev set...
Parsing Eval set...
--- Saving intermediary files to /content/data/02_intermediary ---
Saved train.csv, dev.csv, and eval.csv


# MODEL

In [9]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# --- One-Class Learning Loss: OC-Softmax ---
class OCSoftmax(nn.Module):
    """
    One-Class Softmax Loss (from ASVspoof 2019 baseline strategies).
    Encourages real speech (class 0) to be compact, and spoof (class 1) to be distant.
    """
    def __init__(self, feat_dim=2, r_real=0.9, r_fake=0.5, alpha=20.0):
        super(OCSoftmax, self).__init__()
        self.feat_dim = feat_dim
        self.r_real = r_real
        self.r_fake = r_fake
        self.alpha = alpha

        # Center for the "Real" class in the embedding space
        self.center = nn.Parameter(torch.randn(1, self.feat_dim))
        nn.init.kaiming_uniform_(self.center, 0.25)
        self.softplus = nn.Softplus()

    def forward(self, embeddings, labels):
        """
        embeddings: (Batch, feat_dim) - Output from the bottleneck layer
        labels: (Batch,) - 0 for Real, 1 for Spoof
        """
        # Normalize embeddings and center to hypersphere
        w = F.normalize(self.center, p=2, dim=1)
        x = F.normalize(embeddings, p=2, dim=1)

        # Cosine similarity between embeddings and center
        scores = x.mm(w.t()).squeeze() # (Batch,)

        # Bias the scores:
        # For Real (0): we want score > r_real
        # For Spoof (1): we want score < r_fake
        # We construct a margin-based loss

        # Target scores based on label
        # If label=0 (Real), margin = r_real. If label=1 (Spoof), margin = r_fake
        margins = torch.where(labels == 0, self.r_real, self.r_fake)

        # Logit calculation for OC-Softmax
        # Real: alpha * (r_real - score) -> Minimize this (make score large)
        # Fake: alpha * (score - r_fake) -> Minimize this (make score small)

        # Note: Original OC-Softmax formulation varies. This is a simplified metric learning version.
        # Ideally, we return the Cross Entropy of the modified logits.

        # Standard implementation creates 2-class logits from the similarity score
        # Class 0 Logit (Realness): -|score - center|
        # Class 1 Logit (Spoofness): |score - center|

        # Let's stick to the simplest effective implementation for this timeframe:
        # 1-Class Objective: Minimize distance to center for Real, Maximize for Fake

        dist = 1.0 - scores # Cosine distance (0 to 2)

        # Hinge Loss equivalent for OCL
        loss_real = self.softplus(self.alpha * (dist - (1 - self.r_real))) # Penalize if dist > (1-r_real)
        loss_fake = self.softplus(self.alpha * ((1 - self.r_fake) - dist)) # Penalize if dist < (1-r_fake)

        loss = torch.where(labels == 0, loss_real, loss_fake).mean()

        return loss, scores

# --- Model Components ---

class SincConv_fast(nn.Module):
    def __init__(self, out_channels, kernel_size):
        super().__init__()
        if kernel_size % 2 == 0: kernel_size += 1
        self.conv = nn.Conv1d(1, out_channels, kernel_size=kernel_size, stride=1, padding=kernel_size//2, bias=False)
        self.bn = nn.BatchNorm1d(out_channels)
        self.leaky_relu = nn.LeakyReLU(0.2, inplace=True)

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.leaky_relu(x)
        return x

class SEBlock(nn.Module):
    def __init__(self, channels, reduction=16):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool1d(1)
        self.fc = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channels // reduction, channels, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1)
        return x * y.expand_as(x)

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channels)
        self.se = SEBlock(out_channels)
        self.leaky_relu = nn.LeakyReLU(0.2, inplace=True)

        self.downsample = None
        if in_channels != out_channels:
            self.downsample = nn.Conv1d(in_channels, out_channels, kernel_size=1, bias=False)

    def forward(self, x):
        residual = x
        out = self.leaky_relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out = self.se(out)
        if self.downsample:
            residual = self.downsample(x)
        out += residual
        out = self.leaky_relu(out)
        return out

class RawNetOCL(nn.Module):
    """
    RawNet modified to accept Extra Features and output Embedding for OCL.
    """
    def __init__(self, d_args):
        super().__init__()

        # Raw Audio Branch
        self.sinc_layer = SincConv_fast(out_channels=128, kernel_size=251)
        self.pool_sinc = nn.MaxPool1d(3)
        self.block0 = ResidualBlock(128, 128)
        self.pool0 = nn.MaxPool1d(3)
        self.block1 = ResidualBlock(128, 256)
        self.pool1 = nn.MaxPool1d(3)
        self.block2 = ResidualBlock(256, 512)
        self.pool2 = nn.MaxPool1d(3)
        self.block3 = ResidualBlock(512, 512)
        self.pool3 = nn.MaxPool1d(3)
        self.avg_pool = nn.AdaptiveAvgPool1d(1)

        # Feature Branch (Shimmer, ZCR, Centroid)
        # Simple MLP to upscale features to match embedding space
        self.feature_mlp = nn.Sequential(
            nn.Linear(3, 32),
            nn.ReLU(),
            nn.Linear(32, 128),
            nn.ReLU()
        )

        # Fusion & Bottleneck
        # 256 (from RawNet fc1) + 128 (from Features) = 384
        self.fc1 = nn.Linear(512, 256)
        self.bn_fc = nn.BatchNorm1d(256)
        self.act_fc = nn.LeakyReLU(0.2)

        # Final Embedding Layer (dimension 64 for OCL)
        self.bottleneck = nn.Linear(256 + 128, 64)

    def forward(self, x_raw, x_feat):
        # 1. Raw Audio Path
        if x_raw.dim() == 2: x_raw = x_raw.unsqueeze(1)
        x = self.pool_sinc(self.sinc_layer(x_raw))
        x = self.pool0(self.block0(x))
        x = self.pool1(self.block1(x))
        x = self.pool2(self.block2(x))
        x = self.pool3(self.block3(x))
        x = self.avg_pool(x).flatten(1)
        x_raw_emb = self.act_fc(self.bn_fc(self.fc1(x))) # 256 dim

        # 2. Handcrafted Feature Path
        x_feat_emb = self.feature_mlp(x_feat) # 128 dim

        # 3. Concatenate
        combined = torch.cat((x_raw_emb, x_feat_emb), dim=1) # 384 dim

        # 4. Bottleneck (Embedding for OCL)
        embedding = self.bottleneck(combined) # 64 dim

        return embedding

# TRAINING

In [14]:
import os
import time
import json
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, Subset, WeightedRandomSampler
import optuna
import gc
from tqdm import tqdm

# from ingestion import ingest_datasets
# from dataset import ASVspoofDataset
# from model import RawNet, FocalLoss

# Define path for saving parameters
BEST_PARAMS_PATH = os.path.join(MODEL_DIR, "best_params_ocl.json")
BEST_MODEL_PATH = os.path.join(MODEL_DIR, "oc_loss_best.pth")

def get_balanced_loader(dataset, batch_size):
    targets = dataset.labels
    class_counts = torch.bincount(targets)
    class_weights = 1. / class_counts.float()
    sample_weights = class_weights[targets]
    sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights), replacement=True)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False, sampler=sampler, num_workers=0, pin_memory=True)

def train_epoch(model, loader, optimizer, criterion, device, scaler, accum_steps):
    model.train()
    running_loss = 0.0

    pbar = tqdm(loader, desc="Training", unit="batch", leave=False)
    optimizer.zero_grad(set_to_none=True)

    for i, (inputs, labels, extra_feats) in enumerate(pbar):
        inputs = inputs.to(device, non_blocking=True)
        extra_feats = extra_feats.to(device, non_blocking=True) # Send features to GPU
        labels = labels.to(device, non_blocking=True)

        with torch.amp.autocast('cuda'):
            # Forward pass now takes two inputs
            embeddings = model(inputs, extra_feats)

            # OCL Loss returns (loss, scores)
            loss, _ = criterion(embeddings, labels)

            loss = loss / accum_steps

        scaler.scale(loss).backward()

        if (i + 1) % accum_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad(set_to_none=True)

        loss_val = loss.item() * accum_steps
        running_loss += loss_val
        pbar.set_postfix(loss=f"{loss_val:.4f}")

    if (i + 1) % accum_steps != 0:
        scaler.step(optimizer)
        scaler.update()
        optimizer.zero_grad(set_to_none=True)

    return running_loss / len(loader)

def validate_epoch(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    pbar = tqdm(loader, desc="Validating", unit="batch", leave=False)

    with torch.no_grad():
        for inputs, labels, extra_feats in pbar:
            inputs = inputs.to(device, non_blocking=True)
            extra_feats = extra_feats.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            with torch.amp.autocast('cuda'):
                embeddings = model(inputs, extra_feats)
                loss, _ = criterion(embeddings, labels)

            loss_val = loss.item()
            running_loss += loss_val
            pbar.set_postfix(loss=f"{loss_val:.4f}")

    return running_loss / len(loader)

def run_final_training(train_dataset, dev_dataset, params):
    print("\n" + "="*40)
    print(f"FINAL TRAINING STARTED (OCL + Features)")
    print(f"Params: {params}")
    print("="*40)

    gc.collect()
    if torch.cuda.is_available(): torch.cuda.empty_cache()

    model = RawNetOCL(d_args={}).to(DEVICE)
    # OC-Softmax has trainble parameters (centers), add them to optimizer!
    criterion = OCSoftmax(feat_dim=64).to(DEVICE)

    # Combine parameters from model AND loss function
    all_params = list(model.parameters()) + list(criterion.parameters())
    optimizer = optim.Adam(all_params, lr=params['lr'], weight_decay=params['weight_decay'])

    scaler = torch.amp.GradScaler('cuda')

    train_loader = get_balanced_loader(train_dataset, BATCH_SIZE)
    dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

    best_val_loss = float('inf')
    early_stop_counter = 0
    patience = PATIENCE

    for epoch in range(FINAL_TRAINING_EPOCHS):
        start = time.time()
        t_loss = train_epoch(model, train_loader, optimizer, criterion, DEVICE, scaler, GRAD_ACCUM_STEPS)
        v_loss = validate_epoch(model, dev_loader, criterion, DEVICE)
        duration = time.time() - start

        print(f"Final Ep {epoch+1}/{FINAL_TRAINING_EPOCHS} | Train Loss: {t_loss:.4f} | Val Loss: {v_loss:.4f} | Time: {duration:.1f}s")

        if v_loss < best_val_loss:
            best_val_loss = v_loss
            early_stop_counter = 0
            torch.save(model.state_dict(), BEST_MODEL_PATH)
            # Also save the loss function state (it has learned centers!)
            torch.save(criterion.state_dict(), os.path.join(MODEL_DIR, "oc_loss_best.pth"))
            print(f"  -> Model & Centers Saved")
        else:
            early_stop_counter += 1
            if early_stop_counter >= patience:
                print("Early stopping.")
                break

    print("Final training complete.")

def main():
    print(f"Running on: {DEVICE}")
    os.makedirs(MODEL_DIR, exist_ok=True)

    try:
        df_train, df_dev, df_eval = ingest_datasets()
    except Exception as e:
        print(f"Ingestion failed: {e}")
        return

    print("Initializing Datasets (with Feature Extraction)...")
    train_dataset = ASVspoofDataset(df_train, target_length=SAMPLES_PER_TRACK)
    dev_dataset = ASVspoofDataset(df_dev, target_length=SAMPLES_PER_TRACK)

    best_params = None
    if os.path.exists(BEST_PARAMS_PATH):
        print(f"\n--- Found existing best params ---")
        with open(BEST_PARAMS_PATH, 'r') as f:
            best_params = json.load(f)
    else:
        print("\n--- Starting Optuna Optimization ---")
        def objective(trial):
            gc.collect()
            if torch.cuda.is_available(): torch.cuda.empty_cache()

            lr = trial.suggest_float("lr", 1e-4, 1e-3, log=True)
            weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-3, log=True)

            model = RawNetOCL(d_args={}).to(DEVICE)
            criterion = OCSoftmax(feat_dim=64).to(DEVICE)

            all_params = list(model.parameters()) + list(criterion.parameters())
            optimizer = optim.Adam(all_params, lr=lr, weight_decay=weight_decay)
            scaler = torch.amp.GradScaler('cuda')

            train_loader = get_balanced_loader(train_dataset, BATCH_SIZE)
            dev_loader = DataLoader(dev_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

            for epoch in range(EPOCHS):
                t_loss = train_epoch(model, train_loader, optimizer, criterion, DEVICE, scaler, GRAD_ACCUM_STEPS)
                v_loss = validate_epoch(model, dev_loader, criterion, DEVICE)
                trial.report(v_loss, epoch)
                if trial.should_prune(): raise optuna.TrialPruned()
            return v_loss

        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=N_TRIALS)
        best_params = study.best_params
        with open(BEST_PARAMS_PATH, 'w') as f: json.dump(best_params, f)

    if best_params:
        run_final_training(train_dataset, dev_dataset, best_params)

if __name__ == "__main__":
    main()

Running on: cuda
--- Found cached data in /content/data/02_intermediary ---
Loading from CSVs...
Loaded successfully.
Initializing Datasets (with Feature Extraction)...
Dataset Cache enabled at: /content/data/02_intermediary/processed_cache
Loading Silero VAD for robust silence removal...


Using cache found in /root/.cache/torch/hub/snakers4_silero-vad_master
Using cache found in /root/.cache/torch/hub/snakers4_silero-vad_master
[I 2025-12-05 14:22:01,242] A new study created in memory with name: no-name-5976b888-3c6b-4e85-be2d-81f559086675


Dataset Cache enabled at: /content/data/02_intermediary/processed_cache
Loading Silero VAD for robust silence removal...

--- Starting Optuna Optimization ---


[I 2025-12-05 15:31:49,978] Trial 0 finished with value: 0.8296503675222786 and parameters: {'lr': 0.0004515299023889027, 'weight_decay': 0.0006908754638688816}. Best is trial 0 with value: 0.8296503675222786.
[I 2025-12-05 15:42:06,247] Trial 1 finished with value: 0.7134209003038122 and parameters: {'lr': 0.000254676824060447, 'weight_decay': 0.0003000728262968881}. Best is trial 1 with value: 0.7134209003038122.
[I 2025-12-05 15:50:49,091] Trial 2 finished with value: 0.6999015957023027 and parameters: {'lr': 0.00022221315468268574, 'weight_decay': 5.50455069986499e-05}. Best is trial 2 with value: 0.6999015957023027.



FINAL TRAINING STARTED (OCL + Features)
Params: {'lr': 0.00022221315468268574, 'weight_decay': 5.50455069986499e-05}




Final Ep 1/50 | Train Loss: 1.8675 | Val Loss: 2.1463 | Time: 103.7s
  -> Model & Centers Saved




Final Ep 2/50 | Train Loss: 0.9467 | Val Loss: 1.3449 | Time: 103.4s
  -> Model & Centers Saved




Final Ep 3/50 | Train Loss: 0.7143 | Val Loss: 1.0867 | Time: 103.9s
  -> Model & Centers Saved




Final Ep 4/50 | Train Loss: 0.4092 | Val Loss: 1.5310 | Time: 103.6s




Final Ep 5/50 | Train Loss: 0.3049 | Val Loss: 2.3571 | Time: 103.4s




Final Ep 6/50 | Train Loss: 0.2673 | Val Loss: 0.7442 | Time: 103.7s
  -> Model & Centers Saved




Final Ep 7/50 | Train Loss: 0.2106 | Val Loss: 0.5715 | Time: 103.4s
  -> Model & Centers Saved




Final Ep 8/50 | Train Loss: 0.1968 | Val Loss: 0.3561 | Time: 103.5s
  -> Model & Centers Saved




Final Ep 9/50 | Train Loss: 0.1427 | Val Loss: 1.1288 | Time: 103.6s




Final Ep 10/50 | Train Loss: 0.1481 | Val Loss: 1.0792 | Time: 103.4s




Final Ep 11/50 | Train Loss: 0.1409 | Val Loss: 0.9539 | Time: 103.4s




Final Ep 12/50 | Train Loss: 0.1543 | Val Loss: 0.9401 | Time: 103.7s




Final Ep 13/50 | Train Loss: 0.1212 | Val Loss: 0.4731 | Time: 103.5s




Final Ep 14/50 | Train Loss: 0.1419 | Val Loss: 0.8466 | Time: 103.5s




Final Ep 15/50 | Train Loss: 0.0958 | Val Loss: 0.4215 | Time: 103.6s




Final Ep 16/50 | Train Loss: 0.1204 | Val Loss: 0.7621 | Time: 103.4s




Final Ep 17/50 | Train Loss: 0.1477 | Val Loss: 0.5525 | Time: 103.3s




Final Ep 18/50 | Train Loss: 0.0831 | Val Loss: 0.3489 | Time: 103.6s
  -> Model & Centers Saved




Final Ep 19/50 | Train Loss: 0.0958 | Val Loss: 0.3388 | Time: 103.4s
  -> Model & Centers Saved




Final Ep 20/50 | Train Loss: 0.1032 | Val Loss: 0.6013 | Time: 103.4s




Final Ep 21/50 | Train Loss: 0.0914 | Val Loss: 0.4299 | Time: 103.8s




Final Ep 22/50 | Train Loss: 0.0910 | Val Loss: 1.0005 | Time: 103.5s




Final Ep 23/50 | Train Loss: 0.1164 | Val Loss: 0.4083 | Time: 103.6s




Final Ep 24/50 | Train Loss: 0.0932 | Val Loss: 0.4427 | Time: 103.4s




Final Ep 25/50 | Train Loss: 0.0969 | Val Loss: 0.3468 | Time: 103.6s




Final Ep 26/50 | Train Loss: 0.1011 | Val Loss: 0.3373 | Time: 103.9s
  -> Model & Centers Saved




Final Ep 27/50 | Train Loss: 0.0891 | Val Loss: 0.4420 | Time: 103.8s




Final Ep 28/50 | Train Loss: 0.0847 | Val Loss: 0.2781 | Time: 103.9s
  -> Model & Centers Saved




Final Ep 29/50 | Train Loss: 0.0962 | Val Loss: 0.1963 | Time: 103.9s
  -> Model & Centers Saved




Final Ep 30/50 | Train Loss: 0.0939 | Val Loss: 0.2725 | Time: 103.6s




Final Ep 31/50 | Train Loss: 0.0864 | Val Loss: 0.4077 | Time: 103.6s




Final Ep 32/50 | Train Loss: 0.0861 | Val Loss: 0.3446 | Time: 103.7s




Final Ep 33/50 | Train Loss: 0.0863 | Val Loss: 0.3718 | Time: 103.4s




Final Ep 34/50 | Train Loss: 0.1064 | Val Loss: 0.3335 | Time: 103.5s




Final Ep 35/50 | Train Loss: 0.0876 | Val Loss: 0.8637 | Time: 103.9s




Final Ep 36/50 | Train Loss: 0.0792 | Val Loss: 0.3055 | Time: 103.5s




Final Ep 37/50 | Train Loss: 0.0873 | Val Loss: 0.3465 | Time: 103.6s




Final Ep 38/50 | Train Loss: 0.0826 | Val Loss: 0.2468 | Time: 103.8s




Final Ep 39/50 | Train Loss: 0.0840 | Val Loss: 0.1910 | Time: 103.5s
  -> Model & Centers Saved




Final Ep 40/50 | Train Loss: 0.0774 | Val Loss: 0.2530 | Time: 103.8s




Final Ep 41/50 | Train Loss: 0.0680 | Val Loss: 0.1979 | Time: 103.4s




Final Ep 42/50 | Train Loss: 0.0810 | Val Loss: 0.5140 | Time: 103.5s




Final Ep 43/50 | Train Loss: 0.0999 | Val Loss: 0.2179 | Time: 103.8s




Final Ep 44/50 | Train Loss: 0.0790 | Val Loss: 0.3879 | Time: 103.7s




Final Ep 45/50 | Train Loss: 0.0923 | Val Loss: 0.2023 | Time: 103.5s




Final Ep 46/50 | Train Loss: 0.0825 | Val Loss: 0.1898 | Time: 103.6s
  -> Model & Centers Saved




Final Ep 47/50 | Train Loss: 0.0749 | Val Loss: 0.3722 | Time: 103.6s




Final Ep 48/50 | Train Loss: 0.1048 | Val Loss: 0.3428 | Time: 103.6s




Final Ep 49/50 | Train Loss: 0.0797 | Val Loss: 0.2936 | Time: 103.7s




Final Ep 50/50 | Train Loss: 0.0733 | Val Loss: 0.2666 | Time: 103.5s
Final training complete.


In [17]:
import os
import time
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, confusion_matrix
from torch.utils.data import DataLoader, Subset
from tqdm import tqdm

os.makedirs(EVALUATION_DIR, exist_ok=True)

def compute_eer(bonafide_scores, spoof_scores):
    """
    Computes EER given scores where:
    High Score = Bonafide (Similarity to Real Center)
    Low Score = Spoof
    """
    # y_true: 1 for Bonafide, 0 for Spoof (Standard for similarity-based ROC)
    y_true = np.concatenate([np.ones(len(bonafide_scores)), np.zeros(len(spoof_scores))])
    y_scores = np.concatenate([bonafide_scores, spoof_scores])

    fpr, tpr, thresholds = roc_curve(y_true, y_scores, pos_label=1)

    # EER is where False Rejection Rate (1-TPR) equals False Acceptance Rate (FPR)
    fnr = 1 - tpr
    eer_threshold_idx = np.nanargmin(np.absolute((fnr - fpr)))
    eer = fpr[eer_threshold_idx]
    threshold = thresholds[eer_threshold_idx]

    return eer, threshold

def plot_confusion_matrix(y_true, y_pred, save_path):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=['Bonafide (0)', 'Spoof (1)'],
                yticklabels=['Bonafide (0)', 'Spoof (1)'])
    plt.title('Confusion Matrix - ASVspoof OCL Evaluation')
    plt.ylabel('Actual Label')
    plt.xlabel('Predicted Label')
    plt.savefig(save_path)
    plt.close()

def evaluate_model():
    print(f"--- Starting Evaluation on {DEVICE} (OCL + Robust Features) ---")

    # 1. Ensure Data Exists
    csv_path = os.path.join(BASE_DATA_DIR, "02_intermediary", "eval.csv")
    if not os.path.exists(csv_path):
        print("Eval CSV not found. Running ingestion...")
        _, _, df_eval = ingest_datasets()
    else:
        df_eval = pd.read_csv(csv_path)

    # 2. Initialize Dataset (Automatically uses VAD/R128/Mu-Law/Features)
    print("Initializing Dataset pipeline...")
    full_eval_dataset = ASVspoofDataset(df_eval, target_length=SAMPLES_PER_TRACK, cache=True)

    if EVAL_SAMPLES is not None and EVAL_SAMPLES < len(full_eval_dataset):
        indices = list(range(EVAL_SAMPLES))
        eval_dataset = Subset(full_eval_dataset, indices)
        print(f"Subset selected: {len(eval_dataset)} samples.")
    else:
        eval_dataset = full_eval_dataset
        print(f"Full evaluation set: {len(eval_dataset)} samples.")

    # 3. DataLoader (Batching for speed)
    eval_loader = DataLoader(eval_dataset, batch_size=EVAL_BATCH_SIZE, shuffle=False, num_workers=0)

    # 4. Load Model & Loss Centers
    model = RawNetOCL(d_args={}).to(DEVICE)
    loss_fn = OCSoftmax(feat_dim=64).to(DEVICE)

    model_path = os.path.join(MODEL_DIR, "raw_tf_net_best.pth")
    loss_path = os.path.join(MODEL_DIR, "oc_loss_best.pth")

    if not os.path.exists(model_path):
        print(f"CRITICAL: Model weights not found at {model_path}")
        return

    print(f"Loading weights from {model_path}...")
    model.load_state_dict(torch.load(model_path, map_location=DEVICE))

    if os.path.exists(loss_path):
        loss_fn.load_state_dict(torch.load(loss_path, map_location=DEVICE))
        print("Loaded OCL Centers (Crucial for scoring).")
    else:
        print("WARNING: Loss centers not found. Scores will be based on random centers (Results will be garbage).")

    model.eval()

    bonafide_scores = []
    spoof_scores = []
    inference_times = []

    pbar = tqdm(eval_loader, desc="Evaluating", unit="batch")

    with torch.no_grad():
        for inputs, labels, extra_feats in pbar:
            inputs = inputs.to(DEVICE)
            extra_feats = extra_feats.to(DEVICE)

            # Measure Inference Time
            start_time = time.time()
            embeddings = model(inputs, extra_feats)
            loss, scores = loss_fn(embeddings, labels.to(DEVICE)) # Scores = Similarity to Bonafide Center
            end_time = time.time()

            batch_time = (end_time - start_time) * 1000
            inference_times.extend([batch_time / len(inputs)] * len(inputs)) # Avg per sample

            # Separate scores by label
            scores_np = scores.cpu().numpy()
            labels_np = labels.cpu().numpy()

            # Label 0 = Bonafide, 1 = Spoof
            bonafide_scores.extend(scores_np[labels_np == 0])
            spoof_scores.extend(scores_np[labels_np == 1])

    if len(bonafide_scores) == 0 or len(spoof_scores) == 0:
        print("Error: Dataset missing one of the classes (Bonafide or Spoof). Cannot compute EER.")
        return

    bonafide_scores = np.array(bonafide_scores)
    spoof_scores = np.array(spoof_scores)

    # 5. Compute Metrics
    eer, threshold = compute_eer(bonafide_scores, spoof_scores)

    # Confusion Matrix Logic
    # 0 = Bonafide, 1 = Spoof
    y_true_cm = np.concatenate([np.zeros(len(bonafide_scores)), np.ones(len(spoof_scores))])
    y_scores_cm = np.concatenate([bonafide_scores, spoof_scores])

    # Decision: Score >= Threshold means "Similar to Real" -> Predict Bonafide (0)
    y_pred_cm = np.where(y_scores_cm >= threshold, 0, 1)

    avg_inference_time = np.mean(inference_times)

    # 6. Report Generation
    report_lines = []
    report_lines.append("\n" + "="*40)
    report_lines.append("   ROBUST PIPELINE EVALUATION REPORT   ")
    report_lines.append("="*40)

    report_lines.append(f"Model: RawNetOCL + TEO/PCEN Features")
    report_lines.append(f"Total Samples: {len(eval_dataset)}")
    report_lines.append(f"Bonafide Samples: {len(bonafide_scores)}")
    report_lines.append(f"Spoof Samples: {len(spoof_scores)}")
    report_lines.append("-" * 30)

    tn, fp, fn, tp = confusion_matrix(y_true_cm, y_pred_cm).ravel()

    # Metrics Definitions:
    # False Reject (FRR): Bonafide (0) classified as Spoof (1) -> FP in this CM setup
    frr = fp / (tn + fp) if (tn + fp) > 0 else 0.0

    # False Accept (FAR): Spoof (1) classified as Bonafide (0) -> FN in this CM setup
    far = fn / (fn + tp) if (fn + tp) > 0 else 0.0

    report_lines.append(f"[Metric] Equal Error Rate (EER): {eer:.4%}")
    report_lines.append(f"[Metric] FAR (False Accept Rate): {far:.4%}")
    report_lines.append(f"[Metric] FRR (False Reject Rate): {frr:.4%}")
    report_lines.append(f"[Metric] Avg Inference Latency: {avg_inference_time:.2f} ms")
    report_lines.append(f"[Info] Optimal Threshold (Cosine Sim): {threshold:.4f}")

    report_lines.append("-" * 30)
    report_lines.append("Performance Targets Check:")
    report_lines.append(f"[*] EER <= 5%? {'PASSED' if eer <= 0.05 else 'FAILED'}")
    report_lines.append(f"[*] FAR < 1%? {'PASSED' if far < 0.01 else 'FAILED'}")
    report_lines.append(f"[*] Latency < 100ms? {'PASSED' if avg_inference_time < 100 else 'FAILED'}")

    final_report = "\n".join(report_lines)

    # Output
    print(final_report)

    report_path = os.path.join(EVALUATION_DIR, "evaluation_report.txt")
    with open(report_path, "w") as f:
        f.write(final_report)
    print(f"\n[Saved] Report saved to: {report_path}")

    cm_path = os.path.join(EVALUATION_DIR, "confusion_matrix.png")
    plot_confusion_matrix(y_true_cm, y_pred_cm, cm_path)
    print(f"[Saved] Confusion Matrix saved to: {cm_path}")

if __name__ == "__main__":
    evaluate_model()

--- Starting Evaluation on cuda (OCL + Robust Features) ---
Initializing Dataset pipeline...
Dataset Cache enabled at: /content/data/02_intermediary/processed_cache
Loading Silero VAD for robust silence removal...


Using cache found in /root/.cache/torch/hub/snakers4_silero-vad_master


Full evaluation set: 71237 samples.
Loading weights from /content/drive/MyDrive/UFSC/topicos_especiais_aplicacoes/data/04_model/raw_tf_net_best.pth...
Loaded OCL Centers (Crucial for scoring).


Evaluating: 100%|██████████| 279/279 [1:24:46<00:00, 18.23s/batch]



   ROBUST PIPELINE EVALUATION REPORT   
Model: RawNetOCL + TEO/PCEN Features
Total Samples: 71237
Bonafide Samples: 7355
Spoof Samples: 63882
------------------------------
[Metric] Equal Error Rate (EER): 50.3663%
[Metric] FAR (False Accept Rate): 50.3663%
[Metric] FRR (False Reject Rate): 50.3603%
[Metric] Avg Inference Latency: 1.17 ms
[Info] Optimal Threshold (Cosine Sim): -0.0090
------------------------------
Performance Targets Check:
[*] EER <= 5%? FAILED
[*] FAR < 1%? FAILED
[*] Latency < 100ms? PASSED

[Saved] Report saved to: /content/drive/MyDrive/UFSC/topicos_especiais_aplicacoes/data/05_evaluation/evaluation_report.txt
[Saved] Confusion Matrix saved to: /content/drive/MyDrive/UFSC/topicos_especiais_aplicacoes/data/05_evaluation/confusion_matrix.png
