### Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
# from einops import rearrange # Not needed for CNNs
# from einops.layers.torch import Rearrange # Not needed for CNNs
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
import wandb 
from dataclasses import dataclass, field  # Import field for default_factory

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# WandB login (assuming you have your API key set up)
from kaggle_secrets import UserSecretsClient

user_secrets = UserSecretsClient()
try:
    secret_value_0 = user_secrets.get_secret("wandb_api_key")
    wandb.login(key=secret_value_0)
    print("WandB login successful using wandb_api_key.")
except Exception as e:
    print(f"Failed to login to WandB: {e}. Please ensure WANDB_API_KEY is set.")
    raise


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mnekloyh[0m ([33mnekloyh-none[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


WandB login successful using wandb_api_key.


### Configurations

In [2]:
@dataclass
class Config:
    # Data processing parameters
    SEED: int = 42
    SR: int = 16000
    N_FFT: int = 2048
    HOP_LENGTH: int = 512
    N_MELS: int = 128
    FMIN: float = 0.0
    FMAX: float = 8000.0
    NUM_TIME_MASKS: int = 2
    NUM_FREQ_MASKS: int = 2
    TIME_MASK_MAX_WIDTH: int = 30
    FREQ_MASK_MAX_WIDTH: int = 15
    MASK_REPLACEMENT_VALUE: float = -80.0
    NORM_EPSILON: float = 1e-6
    LOUDNESS_LUFS: float = -23.0
    USE_GLOBAL_NORMALIZATION: bool = True
    USE_RANDOM_CROPPING: bool = True
    CACHE_DIR_BASE: str = "/kaggle/input/cnn-3s-dataset"
    DATASET_SUBDIR: str = "cnn_3s_dataset"
    train_dir: str = "train"
    val_dir: str = "val"
    test_dir: str = "test"
    metadata_file: str = "kaggle_metadata.csv"

    # Model architecture
    img_size: int = 224
    num_classes: int = 2
    in_channels: int = 1
    dropout: float = 0.1

    # CNN specific architecture parameters
    cnn_conv_channels: list[int] = field(default_factory=list)
    cnn_pool_after_conv: list[bool] = field(default_factory=list)
    linear_output_units_1st_fc: int = 512  # Fixed: Added missing attribute

    # Training parameters
    learning_rate: float = 1e-4
    batch_size: int = 32
    epochs: int = 20
    weight_decay: float = 1e-4
    num_workers: int = 4

    # Data augmentation
    apply_augmentation: bool = True
    augmentation_prob: float = 0.5
    audio_length_seconds: float = 3.0
    overlap_ratio: float = 0.5

    model_size: str = ""
    dataset_name: str = ""

    def validate(self):
        assert self.learning_rate > 0, "learning_rate must be positive"
        assert self.batch_size > 0, "batch_size must be positive"
        assert self.epochs > 0, "epochs must be positive"
        assert self.num_workers >= 0, "num_workers must be non-negative"
        assert len(self.cnn_conv_channels) == len(self.cnn_pool_after_conv), (
            "cnn_conv_channels and cnn_pool_after_conv must have the same length"
        )

    def get_full_cache_dir(self):
        return os.path.join(self.CACHE_DIR_BASE, self.DATASET_SUBDIR)


In [3]:
BASE_CONFIG = Config()

# L·∫•y t·∫•t c·∫£ c√°c tham s·ªë t·ª´ BASE_CONFIG ngo·∫°i tr·ª´ c√°c tham s·ªë m√† ch√∫ng ta mu·ªën ghi ƒë√® ri√™ng cho t·ª´ng lo·∫°i model
base_params = {
    f.name: getattr(BASE_CONFIG, f.name)
    for f in BASE_CONFIG.__dataclass_fields__.values()
    if f.init and f.name not in ["model_size", "dataset_name", "cnn_conv_channels", 
                                 "cnn_pool_after_conv", "linear_output_units_1st_fc"]
}

ALL_MODEL_CONFIGS = {
    "CNN_Small": Config(
        **base_params,
        model_size="CNN_Small",
        dataset_name="cnn_3s_dataset",
        cnn_conv_channels=[32, 64, 128],  # Reduced channels
        cnn_pool_after_conv=[True, True, True],
        linear_output_units_1st_fc=192,  # Reduced FC units
    ),
    # CNN Large: Target 5-7M parameters
    "CNN_Large": Config(
        **base_params,
        model_size="CNN_Large",
        dataset_name="cnn_3s_dataset",
        cnn_conv_channels=[64, 128, 256, 512, 512],  # More layers and channels
        cnn_pool_after_conv=[True, True, True, True, False],
        linear_output_units_1st_fc=192,  # Larger FC units
    ),
}

### Model Definition

In [4]:
class CNN_Audio(nn.Module):
    def __init__(self, img_size: int, in_channels: int, num_classes: int,
                 linear_output_units_1st_fc: int,
                 cnn_conv_channels: list[int], cnn_pool_after_conv: list[bool], 
                 dropout: float = 0.3):
        super(CNN_Audio, self).__init__()
        self.in_channels = in_channels
        self.cnn_conv_channels = cnn_conv_channels
        self.cnn_pool_after_conv = cnn_pool_after_conv
        self.img_size = img_size
        self.dropout = dropout
        self.num_classes = num_classes

        # Build convolutional layers with proper architecture
        layers = []
        in_dim = self.in_channels
        
        for i, out_dim in enumerate(self.cnn_conv_channels):
            # Convolutional block
            layers.append(nn.Conv2d(in_dim, out_dim, kernel_size=3, padding=1, bias=False))
            layers.append(nn.BatchNorm2d(out_dim))
            layers.append(nn.ReLU(inplace=True))
            
            # Optional pooling
            if self.cnn_pool_after_conv[i]:
                layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            
            # Dropout for regularization
            layers.append(nn.Dropout2d(self.dropout))
            in_dim = out_dim
        
        self.conv_layers = nn.Sequential(*layers)

        # Calculate flattened size dynamically
        with torch.no_grad():
            dummy_input = torch.randn(1, self.in_channels, self.img_size, self.img_size)
            dummy_output = self.conv_layers(dummy_input)
            self.flattened_size = dummy_output.view(1, -1).size(1)

        # Adaptive average pooling to reduce feature map size
        self.adaptive_pool = nn.AdaptiveAvgPool2d((4, 4))
        
        # Calculate size after adaptive pooling
        with torch.no_grad():
            dummy_pooled = self.adaptive_pool(dummy_output)
            self.pooled_size = dummy_pooled.view(1, -1).size(1)

        # Classifier with proper architecture
        self.classifier = nn.Sequential(
            nn.Linear(self.pooled_size, linear_output_units_1st_fc),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(linear_output_units_1st_fc, linear_output_units_1st_fc // 2),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout),
            nn.Linear(linear_output_units_1st_fc // 2, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

### Dataset

In [5]:
class AudioDataset(Dataset):
    def __init__(self, cache_dir: str, set_type: str, n_mels: int, config: Config):
        self.cache_path = os.path.join(cache_dir, getattr(config, f"{set_type}_dir"))
        self.metadata_path = os.path.join(self.cache_path, config.metadata_file)
        self.n_mels = n_mels
        self.training = set_type == "train"
        self.config = config

        if not os.path.exists(self.metadata_path):
            raise FileNotFoundError(f"Metadata file not found: {self.metadata_path}")
        self.metadata = pd.read_csv(self.metadata_path)

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        row = self.metadata.iloc[idx]
        npy_path = os.path.join(self.cache_path, row["npy_path"])
        label = int(row["label"])

        try:
            if not os.path.exists(npy_path):
                raise FileNotFoundError(f"Spectrogram file not found: {npy_path}")
            spectrogram = np.load(npy_path)
            spectrogram = self._preprocess_spectrogram(spectrogram)
        except Exception as e:
            print(f"Error loading {npy_path}: {e}")
            return None

        return spectrogram, torch.tensor(label).long()

    def _preprocess_spectrogram(self, spec):
        if isinstance(spec, np.ndarray):
            spec = torch.from_numpy(spec).float()

        if spec.ndim == 2:
            spec = spec.unsqueeze(0)
        elif spec.ndim == 4:
            spec = spec.squeeze(0)

        if spec.shape[-2:] != (self.config.img_size, self.config.img_size):
            spec = F.interpolate(
                spec.unsqueeze(0),
                size=(self.config.img_size, self.config.img_size),
                mode="bilinear",
                align_corners=False,
            ).squeeze(0)

        if self.config.USE_GLOBAL_NORMALIZATION:
            mean = spec.mean()
            std = spec.std() + self.config.NORM_EPSILON
            spec = (spec - mean) / std

        # Apply augmentation during training
        if self.training and self.config.apply_augmentation:
            # Frequency masking
            for _ in range(self.config.NUM_FREQ_MASKS):
                freq_mask_width = torch.randint(
                    0, self.config.FREQ_MASK_MAX_WIDTH, (1,)
                ).item()
                freq_start = torch.randint(
                    0, max(1, spec.shape[-2] - freq_mask_width), (1,)
                ).item()
                spec[:, freq_start : freq_start + freq_mask_width, :] = (
                    self.config.MASK_REPLACEMENT_VALUE
                )
            
            # Time masking
            for _ in range(self.config.NUM_TIME_MASKS):
                time_mask_width = torch.randint(
                    0, self.config.TIME_MASK_MAX_WIDTH, (1,)
                ).item()
                time_start = torch.randint(
                    0, max(1, spec.shape[-1] - time_mask_width), (1,)
                ).item()
                spec[:, :, time_start : time_start + time_mask_width] = (
                    self.config.MASK_REPLACEMENT_VALUE
                )

        return spec


def custom_collate_fn(batch):
    """Custom collate function to handle None values and invalid shapes"""
    valid_batch = [item for item in batch if item is not None]

    if not valid_batch:
        print("Warning: Empty batch after filtering")
        return torch.empty(0, 1, 224, 224), torch.empty(0, dtype=torch.long)

    data_list, label_list = zip(*valid_batch)
    expected_shape = (1, 224, 224)
    valid_data = []
    valid_labels = []

    for data, label in zip(data_list, label_list):
        if isinstance(data, torch.Tensor) and data.shape == expected_shape:
            valid_data.append(data)
            valid_labels.append(label)
        else:
            print(
                f"Warning: Skipping invalid shape {data.shape if hasattr(data, 'shape') else type(data)} for data with label {label}"
            )

    if not valid_data:
        print("Warning: No valid data in batch")
        return torch.empty(0, 1, 224, 224), torch.empty(0, dtype=torch.long)

    return torch.stack(valid_data, dim=0), torch.stack(valid_labels, dim=0)

### Training

In [6]:
def validate_dataset(dataset, name):
    invalid_count = 0
    invalid_files = []
    for idx in range(len(dataset)):
        row = dataset.metadata.iloc[idx]
        npy_path = os.path.join(dataset.cache_path, row["npy_path"])
        if not os.path.exists(npy_path):
            invalid_count += 1
            invalid_files.append(npy_path)
    if invalid_count > 0:
        print(f"Warning: {invalid_count} invalid samples found in {name} dataset")
        for f in invalid_files[:5]:
            print(f" - Missing file: {f}")
        if len(invalid_files) > 5:
            print(f" ... and {len(invalid_files) - 5} more")
    return invalid_count


def train_model(
    model, train_loader, val_loader, optimizer, criterion, device, num_epochs, run_name
):
    model.to(device)
    best_val_f1 = -1
    patience = 5
    patience_counter = 0
    warmup_epochs = 3

    # ƒê√£ c·∫≠p nh·∫≠t: ƒê·∫£m b·∫£o T_max ƒë∆∞·ª£c t√≠nh to√°n ch√≠nh x√°c
    # L·ªãch tr√¨nh Cosine Annealing cho ph·∫ßn sau c·ªßa qu√° tr√¨nh ƒë√†o t·∫°o
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=num_epochs - warmup_epochs, eta_min=1e-6
    )
    # L·ªãch tr√¨nh Warmup cho c√°c epoch ƒë·∫ßu ti√™n
    warmup_scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer,
        lr_lambda=lambda epoch: (epoch + 1) / warmup_epochs
        if epoch < warmup_epochs
        else 1.0,
    )

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        batch_count = 0

        pbar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/{num_epochs}")
        for batch_idx, batch in enumerate(pbar):
            if batch is None or len(batch[0]) == 0:
                print(f"Warning: Skipping empty batch at index {batch_idx}")
                continue

            data, labels = batch
            data, labels = data.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, labels)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            total_loss += loss.item()
            batch_count += 1

            pbar.set_postfix({"loss": f"{loss.item():.4f}"})

        if batch_count == 0:
            print(f"Error: No valid batches in epoch {epoch + 1}")
            continue

        if epoch < warmup_epochs:
            warmup_scheduler.step()
        else:
            scheduler.step()

        val_loss, val_preds, val_labels, val_probs = evaluate_model(
            model, val_loader, criterion, device
        )
        val_acc = accuracy_score(val_labels, val_preds)
        val_f1 = f1_score(val_labels, val_preds, average="binary")
        val_roc_auc = roc_auc_score(val_labels, val_probs[:, 1])

        print(
            f"Epoch {epoch + 1}: Train Loss: {total_loss / batch_count:.4f}, "
            f"Val Loss: {val_loss:.4f}, Val F1: {val_f1:.4f}"
        )

        wandb.log(
            {
                "epoch": epoch,
                "train_loss": total_loss / batch_count,
                "val_loss": val_loss,
                "val_f1": val_f1,
                "val_accuracy": val_acc,
                "val_roc_auc": val_roc_auc,
                "learning_rate": optimizer.param_groups[0]["lr"],
                "warmup_phase": epoch < warmup_epochs,
            }
        )

        if val_f1 > best_val_f1:
            best_val_f1 = val_f1
            patience_counter = 0
            model_save_path = f"best_model_{run_name}.pth"
            torch.save(
                {
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                    "scheduler_state_dict": scheduler.state_dict(),
                    "epoch": epoch,
                    "best_val_f1": best_val_f1,
                },
                model_save_path,
            )
            print(f"Saved best model with F1: {best_val_f1:.4f}")
        else:
            patience_counter += 1

        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch + 1}")
            break

    return model


def evaluate_model(model, loader, criterion, device, return_cm=False):
    """Enhanced evaluation function"""
    model.eval()
    total_loss = 0
    all_preds = []
    all_labels = []
    all_probs = []

    with torch.no_grad():
        pbar = tqdm(loader, desc="Evaluating", leave=False)
        for batch in pbar:
            if batch is None or len(batch[0]) == 0:
                continue
            
            data, labels = batch
            if -1 in labels.cpu().numpy():
                continue

            data, labels = data.to(device), labels.to(device)
            outputs = model(data)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            probs = torch.softmax(outputs, dim=1)
            _, preds = torch.max(outputs, 1)

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())

    if len(all_labels) < 2:
        print("Warning: Too few samples for reliable evaluation")
        return (
            float("inf"),
            [],
            [],
            np.array([]),
            np.zeros((2, 2)) if return_cm else None,
        )

    avg_loss = total_loss / len(loader) if len(loader) > 0 else 0.0
    
    if return_cm:
        cm = confusion_matrix(all_labels, all_preds)
        return avg_loss, all_preds, all_labels, np.array(all_probs), cm
    
    return avg_loss, all_preds, all_labels, np.array(all_probs)


def plot_confusion_matrix(cm, run_name, save_dir="results"):
    """Plot and save confusion matrix"""
    os.makedirs(save_dir, exist_ok=True)
    fig, ax = plt.subplots(figsize=(8, 6))
    sns.heatmap(
        cm,
        annot=True,
        fmt="d",
        cmap="Blues",
        xticklabels=["Real", "Fake"],
        yticklabels=["Real", "Fake"],
        ax=ax,
    )
    ax.set_xlabel("Predicted")
    ax.set_ylabel("True")
    ax.set_title(f"Confusion Matrix - {run_name}")
    cm_plot_path = os.path.join(save_dir, f"cm_{run_name}.png")
    fig.savefig(cm_plot_path, dpi=150, bbox_inches="tight")
    plt.close(fig)
    return cm_plot_path


def run_training(training_params):
    """Main training function"""
    torch.manual_seed(Config.SEED)
    np.random.seed(Config.SEED)

    model_size = training_params["model_size"]
    epochs = training_params["epochs"]
    learning_rate = training_params["learning_rate"]
    batch_size = training_params["batch_size"]
    num_workers = training_params["num_workers"]

    if model_size not in ALL_MODEL_CONFIGS:
        print(f"Error: Model size '{model_size}' not found in ALL_MODEL_CONFIGS.")
        return

    config = ALL_MODEL_CONFIGS[model_size]
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")

    # Initialize improved model
    model = CNN_Audio(
        img_size=config.img_size,
        in_channels=config.in_channels,
        num_classes=config.num_classes,
        linear_output_units_1st_fc=config.linear_output_units_1st_fc,
        cnn_conv_channels=config.cnn_conv_channels,
        cnn_pool_after_conv=config.cnn_pool_after_conv,
        dropout=config.dropout,
    )
    model = model.to(device)

    param_count = sum(p.numel() for p in model.parameters())
    print(f"Configuring {model_size} model with {param_count:,} parameters...")

    # S·ª≠ d·ª•ng h√†m get_full_cache_dir ƒë·ªÉ t·∫°o ƒë∆∞·ªùng d·∫´n dataset
    model_cache_dir = config.get_full_cache_dir()
    print(f"Loading data from: {model_cache_dir}")

    # Kh·ªüi t·∫°o dataset v·ªõi 'config' ho√†n ch·ªânh
    train_dataset = AudioDataset(model_cache_dir, "train", config.N_MELS, config)
    val_dataset = AudioDataset(model_cache_dir, "val", config.N_MELS, config)
    test_dataset = AudioDataset(model_cache_dir, "test", config.N_MELS, config)

    for dataset, name in [
        (train_dataset, "train"),
        (val_dataset, "val"),
        (test_dataset, "test"),
    ]:
        invalid_count = validate_dataset(dataset, name)
        if invalid_count == len(dataset):
            print(f"Error: All samples in {name} dataset are invalid")
            return

    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        collate_fn=custom_collate_fn,
        pin_memory=True,
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        collate_fn=custom_collate_fn,
        pin_memory=True,
    )
    test_loader = DataLoader(
        test_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        collate_fn=custom_collate_fn,
        pin_memory=True,
    )

    print(f"Train samples: {len(train_dataset)}")
    print(f"Validation samples: {len(val_dataset)}")
    print(f"Test samples: {len(test_dataset)}")
    print(f"Using Batch size: {batch_size}")

    class_counts = np.bincount(
        [
            train_dataset[i][1].item()
            for i in range(len(train_dataset))
            if train_dataset[i] is not None
        ]
    )
    if 0 in class_counts:
        print(
            f"Error: Class {np.argwhere(class_counts == 0).flatten()} has no samples in training dataset"
        )
        return
    class_weights = torch.tensor(
        [1.0 / max(count, 1e-6) for count in class_counts], dtype=torch.float
    ).to(device)
    criterion = nn.CrossEntropyLoss(weight=class_weights)
    optimizer = optim.Adam(
        model.parameters(), lr=learning_rate, weight_decay=config.weight_decay
    )

    run_name = f"{model_size}_{config.dataset_name}_{datetime.now().strftime('%H%M%S')}"
    wandb.init(project="audio-deepfake-detection", name=run_name, config=training_params) # Logging training_params

    trained_model = train_model(
        model,
        train_loader,
        val_loader,
        optimizer,
        criterion,
        device,
        epochs,
        run_name
    )
    
    print(f"\n--- Evaluating {model_size} on Test Set ({config.dataset_name}) ---")
    test_loss, test_preds, test_labels, test_probs, test_cm = evaluate_model(
        trained_model, test_loader, criterion, device, return_cm=True
    )

    test_acc = accuracy_score(test_labels, test_preds)
    test_f1 = f1_score(test_labels, test_preds, average="binary")
    test_roc_auc = roc_auc_score(test_labels, test_probs[:, 1])

    print(f"Test Loss: {test_loss:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Test F1-score: {test_f1:.4f}")
    print(f"Test ROC AUC: {test_roc_auc:.4f}")

    cm_plot_path = plot_confusion_matrix(test_cm, run_name=run_name, save_dir="results")
    wandb.log(
        {
            "test_loss": test_loss,
            "test_accuracy": test_acc,
            "test_f1_score": test_f1,
            "test_roc_auc": test_roc_auc,
            "confusion_matrix": wandb.Image(cm_plot_path),
        }
    )
    
    wandb.finish()
    
    return trained_model

### Define training parameters for CNN models

In [7]:
training_params_small = {
    "model_size": "CNN_Small",
    "epochs": 20,
    "learning_rate": 1e-4,
    "batch_size": 32,
    "num_workers": 4,
}

training_params_large = {
    "model_size": "CNN_Large",
    "epochs": 20,
    "learning_rate": 1e-4,
    "batch_size": 32,
    "num_workers": 4,
}

print("=== Training CNN_Small ===")
trained_model_small = run_training(training_params_small)

print("\n=== Training CNN_Large ===")
trained_model_large = run_training(training_params_large)


=== Training CNN_Small ===
Using device: cuda
Configuring CNN_Small model with 505,026 parameters...
Loading data from: /kaggle/input/cnn-3s-dataset/cnn_3s_dataset
Train samples: 102896
Validation samples: 6996
Test samples: 14066
Using Batch size: 32


[34m[1mwandb[0m: Tracking run with wandb version 0.19.9
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250609_102208-7lprh2ra[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mCNN_Small_cnn_3s_dataset_102208[0m
[34m[1mwandb[0m: ‚≠êÔ∏è View project at [34m[4mhttps://wandb.ai/nekloyh-none/audio-deepfake-detection[0m
[34m[1mwandb[0m: üöÄ View run at [34m[4mhttps://wandb.ai/nekloyh-none/audio-deepfake-detection/runs/7lprh2ra[0m
Epoch 1/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:25<00:00, 15.62it/s, loss=0.4595]
                                                             

Epoch 1: Train Loss: 0.6231, Val Loss: 0.6531, Val F1: 0.7382
Saved best model with F1: 0.7382


Epoch 2/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:24<00:00, 15.72it/s, loss=0.6416]
                                                             

Epoch 2: Train Loss: 0.4416, Val Loss: 0.4796, Val F1: 0.8204
Saved best model with F1: 0.8204


Epoch 3/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:26<00:00, 15.54it/s, loss=0.3731]
                                                             

Epoch 3: Train Loss: 0.3627, Val Loss: 0.2643, Val F1: 0.8773
Saved best model with F1: 0.8773


Epoch 4/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:24<00:00, 15.70it/s, loss=0.6315]
                                                             

Epoch 4: Train Loss: 0.3278, Val Loss: 0.2423, Val F1: 0.8835
Saved best model with F1: 0.8835


Epoch 5/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.83it/s, loss=0.2182]
                                                             

Epoch 5: Train Loss: 0.3095, Val Loss: 0.2426, Val F1: 0.8873
Saved best model with F1: 0.8873


Epoch 6/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:22<00:00, 15.89it/s, loss=0.0841]
                                                             

Epoch 6: Train Loss: 0.2973, Val Loss: 0.2358, Val F1: 0.8870


Epoch 7/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.78it/s, loss=0.2155]
                                                             

Epoch 7: Train Loss: 0.2881, Val Loss: 0.1974, Val F1: 0.9079
Saved best model with F1: 0.9079


Epoch 8/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.80it/s, loss=0.3576]
                                                             

Epoch 8: Train Loss: 0.2801, Val Loss: 0.2108, Val F1: 0.9183
Saved best model with F1: 0.9183


Epoch 9/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:24<00:00, 15.71it/s, loss=0.2135]
                                                             

Epoch 9: Train Loss: 0.2750, Val Loss: 0.1869, Val F1: 0.9121


Epoch 10/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.77it/s, loss=0.3305]
                                                             

Epoch 10: Train Loss: 0.2683, Val Loss: 0.1867, Val F1: 0.9148


Epoch 11/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.81it/s, loss=0.1450]
                                                             

Epoch 11: Train Loss: 0.2633, Val Loss: 0.1681, Val F1: 0.9324
Saved best model with F1: 0.9324


Epoch 12/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.82it/s, loss=0.1498]
                                                             

Epoch 12: Train Loss: 0.2580, Val Loss: 0.1668, Val F1: 0.9346
Saved best model with F1: 0.9346


Epoch 13/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:22<00:00, 15.85it/s, loss=0.1038]
                                                             

Epoch 13: Train Loss: 0.2559, Val Loss: 0.1667, Val F1: 0.9319


Epoch 14/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.84it/s, loss=0.1342]
                                                             

Epoch 14: Train Loss: 0.2507, Val Loss: 0.1921, Val F1: 0.9093


Epoch 15/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:24<00:00, 15.72it/s, loss=0.1116]
                                                             

Epoch 15: Train Loss: 0.2482, Val Loss: 0.1529, Val F1: 0.9425
Saved best model with F1: 0.9425


Epoch 16/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:24<00:00, 15.71it/s, loss=0.1580]
                                                             

Epoch 16: Train Loss: 0.2445, Val Loss: 0.1591, Val F1: 0.9332


Epoch 17/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.83it/s, loss=0.1096]
                                                             

Epoch 17: Train Loss: 0.2416, Val Loss: 0.1495, Val F1: 0.9370


Epoch 18/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.83it/s, loss=0.3040]
                                                             

Epoch 18: Train Loss: 0.2397, Val Loss: 0.1545, Val F1: 0.9332


Epoch 19/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:25<00:00, 15.67it/s, loss=0.1141]
                                                             

Epoch 19: Train Loss: 0.2392, Val Loss: 0.1524, Val F1: 0.9345


Epoch 20/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [03:23<00:00, 15.79it/s, loss=0.2878]
                                                             

Epoch 20: Train Loss: 0.2383, Val Loss: 0.1524, Val F1: 0.9350
Early stopping at epoch 20

--- Evaluating CNN_Small on Test Set (cnn_3s_dataset) ---


                                                             

Test Loss: 0.1573
Test Accuracy: 0.9291
Test F1-score: 0.9317
Test ROC AUC: 0.9876


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         epoch ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñà‚ñà
[34m[1mwandb[0m: learning_rate ‚ñÜ‚ñà‚ñà‚ñà‚ñà‚ñá‚ñá‚ñá‚ñÜ‚ñÖ‚ñÖ‚ñÑ‚ñÑ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ
[34m[1mwandb[0m: test_accuracy ‚ñÅ
[34m[1mwandb[0m: test_f1_score ‚ñÅ
[34m[1mwandb[0m:     test_loss ‚ñÅ
[34m[1mwandb[0m:  test_roc_auc ‚ñÅ
[34m[1mwandb[0m:    train_loss ‚ñà‚ñÖ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
[34m[1mwandb[0m:  val_accuracy ‚ñÅ‚ñÑ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
[34m[1mwandb[0m:        val_f1 ‚ñÅ‚ñÑ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
[34m[1mwandb[0m:      val_loss ‚ñà‚ñÜ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
[34m[1mwandb[0m:   val_roc_auc ‚ñÅ‚ñÖ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
[34m[1mwandb[0m: 
[34m[1


=== Training CNN_Large ===
Using device: cuda
Configuring CNN_Large model with 5,502,882 parameters...
Loading data from: /kaggle/input/cnn-3s-dataset/cnn_3s_dataset
Train samples: 102896
Validation samples: 6996
Test samples: 14066
Using Batch size: 32


[34m[1mwandb[0m: Tracking run with wandb version 0.19.9
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250609_114040-ccw9piht[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mCNN_Large_cnn_3s_dataset_114040[0m
[34m[1mwandb[0m: ‚≠êÔ∏è View project at [34m[4mhttps://wandb.ai/nekloyh-none/audio-deepfake-detection[0m
[34m[1mwandb[0m: üöÄ View run at [34m[4mhttps://wandb.ai/nekloyh-none/audio-deepfake-detection/runs/ccw9piht[0m
Epoch 1/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:13<00:00,  5.81it/s, loss=0.3611]
                                                             

Epoch 1: Train Loss: 0.5333, Val Loss: 0.2403, Val F1: 0.8871
Saved best model with F1: 0.8871


Epoch 2/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:12<00:00,  5.82it/s, loss=0.1481]
                                                             

Epoch 2: Train Loss: 0.3268, Val Loss: 0.2099, Val F1: 0.9034
Saved best model with F1: 0.9034


Epoch 3/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:11<00:00,  5.83it/s, loss=0.3027]
                                                             

Epoch 3: Train Loss: 0.2978, Val Loss: 0.1809, Val F1: 0.9280
Saved best model with F1: 0.9280


Epoch 4/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:13<00:00,  5.81it/s, loss=0.2794]
                                                             

Epoch 4: Train Loss: 0.2700, Val Loss: 0.1526, Val F1: 0.9361
Saved best model with F1: 0.9361


Epoch 5/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:13<00:00,  5.81it/s, loss=0.2580]
                                                             

Epoch 5: Train Loss: 0.2434, Val Loss: 0.1310, Val F1: 0.9448
Saved best model with F1: 0.9448


Epoch 6/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:14<00:00,  5.80it/s, loss=0.4455]
                                                             

Epoch 6: Train Loss: 0.2202, Val Loss: 0.1052, Val F1: 0.9651
Saved best model with F1: 0.9651


Epoch 7/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:15<00:00,  5.79it/s, loss=0.0738]
                                                             

Epoch 7: Train Loss: 0.2016, Val Loss: 0.0809, Val F1: 0.9720
Saved best model with F1: 0.9720


Epoch 8/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:14<00:00,  5.80it/s, loss=0.1662]
                                                             

Epoch 8: Train Loss: 0.1866, Val Loss: 0.0704, Val F1: 0.9716


Epoch 9/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:14<00:00,  5.80it/s, loss=0.4478]
                                                             

Epoch 9: Train Loss: 0.1743, Val Loss: 0.0688, Val F1: 0.9742
Saved best model with F1: 0.9742


Epoch 10/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:13<00:00,  5.81it/s, loss=0.0468]
                                                             

Epoch 10: Train Loss: 0.1600, Val Loss: 0.0545, Val F1: 0.9790
Saved best model with F1: 0.9790


Epoch 11/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:14<00:00,  5.80it/s, loss=0.0082]
                                                             

Epoch 11: Train Loss: 0.1500, Val Loss: 0.0604, Val F1: 0.9769


Epoch 12/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:17<00:00,  5.77it/s, loss=0.0778]
                                                             

Epoch 12: Train Loss: 0.1406, Val Loss: 0.0590, Val F1: 0.9772


Epoch 13/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:17<00:00,  5.77it/s, loss=0.2913]
                                                             

Epoch 13: Train Loss: 0.1323, Val Loss: 0.0668, Val F1: 0.9729


Epoch 14/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:13<00:00,  5.81it/s, loss=0.1066]
                                                             

Epoch 14: Train Loss: 0.1195, Val Loss: 0.0552, Val F1: 0.9782


Epoch 15/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:14<00:00,  5.80it/s, loss=0.2846]
                                                             

Epoch 15: Train Loss: 0.1134, Val Loss: 0.0392, Val F1: 0.9859
Saved best model with F1: 0.9859


Epoch 16/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:13<00:00,  5.81it/s, loss=0.4624]
                                                             

Epoch 16: Train Loss: 0.1056, Val Loss: 0.0552, Val F1: 0.9784


Epoch 17/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:14<00:00,  5.80it/s, loss=0.0568]
                                                             

Epoch 17: Train Loss: 0.0985, Val Loss: 0.0449, Val F1: 0.9825


Epoch 18/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:16<00:00,  5.78it/s, loss=0.0232]
                                                             

Epoch 18: Train Loss: 0.0930, Val Loss: 0.0316, Val F1: 0.9889
Saved best model with F1: 0.9889


Epoch 19/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:15<00:00,  5.79it/s, loss=0.0126]
                                                             

Epoch 19: Train Loss: 0.0891, Val Loss: 0.0295, Val F1: 0.9894
Saved best model with F1: 0.9894


Epoch 20/20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3216/3216 [09:16<00:00,  5.78it/s, loss=0.1459]
                                                             

Epoch 20: Train Loss: 0.0869, Val Loss: 0.0287, Val F1: 0.9899
Saved best model with F1: 0.9899

--- Evaluating CNN_Large on Test Set (cnn_3s_dataset) ---


                                                             

Test Loss: 0.0311
Test Accuracy: 0.9886
Test F1-score: 0.9886
Test ROC AUC: 0.9994


[34m[1mwandb[0m:                                                                                
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:         epoch ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñà‚ñà
[34m[1mwandb[0m: learning_rate ‚ñÜ‚ñà‚ñà‚ñà‚ñà‚ñá‚ñá‚ñá‚ñÜ‚ñÖ‚ñÖ‚ñÑ‚ñÑ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ
[34m[1mwandb[0m: test_accuracy ‚ñÅ
[34m[1mwandb[0m: test_f1_score ‚ñÅ
[34m[1mwandb[0m:     test_loss ‚ñÅ
[34m[1mwandb[0m:  test_roc_auc ‚ñÅ
[34m[1mwandb[0m:    train_loss ‚ñà‚ñÖ‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ
[34m[1mwandb[0m:  val_accuracy ‚ñÅ‚ñÇ‚ñÑ‚ñÑ‚ñÖ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñá‚ñà‚ñà‚ñà‚ñà
[34m[1mwandb[0m:        val_f1 ‚ñÅ‚ñÇ‚ñÑ‚ñÑ‚ñÖ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñá‚ñà‚ñà‚ñà‚ñà
[34m[1mwandb[0m:      val_loss ‚ñà‚ñá‚ñÜ‚ñÖ‚ñÑ‚ñÑ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÅ
[34m[1mwandb[0m:   val_roc_auc ‚ñÅ‚ñÉ‚ñÖ‚ñÜ‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà
[34m[1mwandb[0m: 
[34m[1