In [4]:
import os
import time
import numpy as np
import pandas as pd
from dataclasses import dataclass
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR
from tqdm import tqdm
import timm
import torchvision

In [5]:
@dataclass
class Config:
    train_csv: str = '/kaggle/input/birdclef-2025/train.csv'
    taxonomy_csv: str = '/kaggle/input/birdclef-2025/taxonomy.csv'
    spectrogram_npy: str = '/kaggle/input/birdclef-mel-128hop/falcon_birdclef_cnn_preprocessed_128hop.npy'
    img_size: int = 256
    in_channels: int = 1
    model_name: str = "tf_efficientnetv2_s.in21k_ft_in1k"
    pretrained: bool = True
    epochs: int = 15
    batch_size: int = 32
    num_workers: int = 4
    lr: float = 3e-4
    weight_decay: float = 1e-5
    device: str = 'cuda' if torch.cuda.is_available() else 'cpu'
    T_max: int = 15
    min_lr: float = 1e-6
    seed: int = 42
    val_split_ratio: float = 0.2
cfg = Config()

In [3]:
def set_seed(seed):
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed(cfg.seed)

In [6]:
# -------------------- LOAD DATA -------------------- #
train_df = pd.read_csv(cfg.train_csv)
taxonomy = pd.read_csv(cfg.taxonomy_csv)
spec_dict = np.load(cfg.spectrogram_npy, allow_pickle=True).item()
num_classes = len(taxonomy['primary_label'].unique())

In [7]:
# -------------------- DATASET -------------------- #
class MelSpecDataset(Dataset):
    def __init__(self, df, spec_dict, cfg):
        self.df = df.reset_index(drop=True)
        self.spec_dict = spec_dict
        self.cfg = cfg
        self.num_classes = num_classes
        self.df["sample_id"] = self.df["filename"].apply(lambda f: f.split('/')[0] + '-' + f.split('/')[-1].split('.')[0])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        sample_id = row["sample_id"]
        item = self.spec_dict.get(sample_id, None)

        if item is None:
            mel = np.zeros((self.cfg.img_size, self.cfg.img_size), dtype=np.float32)
            label_id = 0
        else:
            mel = item["mel_spec"]
            label_id = item["label_id"]

        if mel.ndim == 2:
            mel = np.expand_dims(mel, axis=0)

        mel_tensor = torch.tensor(mel, dtype=torch.float32)
        target = torch.zeros(self.num_classes)
        target[label_id] = 1.0
        return mel_tensor, target

In [8]:
_, val_df = train_test_split(train_df, test_size=cfg.val_split_ratio, stratify=train_df["primary_label"], random_state=cfg.seed)
train_ds = MelSpecDataset(train_df, spec_dict, cfg)
val_ds = MelSpecDataset(val_df, spec_dict, cfg)
train_loader = DataLoader(train_ds, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers)
val_loader = DataLoader(val_ds, batch_size=cfg.batch_size, shuffle=False, num_workers=cfg.num_workers)

In [10]:
class BirdCLEFModel(nn.Module):
    def __init__(self, cfg, num_classes):
        super().__init__()
        self.cfg = cfg
        
        self.backbone = timm.create_model(
            cfg.model_name,
            pretrained=False,  
            in_chans=cfg.in_channels,
            drop_rate=0.0,    
            drop_path_rate=0.0
        )
        
        backbone_out = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()
        self.pooling = nn.AdaptiveAvgPool2d(1)
        self.feat_dim = backbone_out
        self.classifier = nn.Linear(backbone_out, num_classes)
        
    def forward(self, x):
        features = self.backbone(x)
        if isinstance(features, dict):
            features = features['features']
        if len(features.shape) == 4:
            features = self.pooling(features)
            features = features.view(features.size(0), -1)

        logits = self.classifier(features)
        return logits

In [11]:
class FocalLossBCE(nn.Module):
    def __init__(self, alpha=0.25, gamma=2, reduction="mean", bce_weight=0.6, focal_weight=1.4):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.bce = nn.BCEWithLogitsLoss(reduction=reduction)
        self.bce_weight = bce_weight
        self.focal_weight = focal_weight

    def forward(self, logits, targets):
        focal = torchvision.ops.focal_loss.sigmoid_focal_loss(
            inputs=logits,
            targets=targets,
            alpha=self.alpha,
            gamma=self.gamma,
            reduction=self.reduction,
        )
        bce = self.bce(logits, targets)
        return self.bce_weight * bce + self.focal_weight * focal

def get_criterion():
    return FocalLossBCE()

In [14]:
def train():
    model = BirdCLEFModel(cfg, num_classes)
    if torch.cuda.device_count() > 1:
        model = nn.DataParallel(model)
    model = model.to(cfg.device)

    optimizer = AdamW(model.parameters(), lr=cfg.lr, weight_decay=cfg.weight_decay)
    scheduler = CosineAnnealingLR(optimizer, T_max=cfg.T_max, eta_min=cfg.min_lr)
    criterion = get_criterion()
    best_val_auc = 0.0

    for epoch in range(cfg.epochs):
        model.train()
        total_loss = 0
        tk = tqdm(train_loader, desc=f"Epoch {epoch+1}/{cfg.epochs}")
        for x, y in tk:
            x, y = x.to(cfg.device), y.to(cfg.device)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            tk.set_postfix(loss=loss.item())
        scheduler.step()

        # Validation
        model.eval()
        all_preds, all_targets = [], []
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(cfg.device), y.to(cfg.device)
                logits = model(x)
                preds = torch.sigmoid(logits).cpu().numpy()
                targets = y.cpu().numpy()
                all_preds.append(preds)
                all_targets.append(targets)
        all_preds = np.vstack(all_preds)
        all_targets = np.vstack(all_targets)
        aucs = []
        for i in range(num_classes):
            if np.sum(all_targets[:, i]) > 0:
                try:
                    auc = roc_auc_score(all_targets[:, i], all_preds[:, i])
                    aucs.append(auc)
                except:
                    aucs.append(np.nan)
        mean_auc = np.nanmean(aucs)
        print(f"Epoch {epoch+1}: Train Loss = {total_loss / len(train_loader):.4f}, Val AUC = {mean_auc:.4f}")

        if mean_auc > best_val_auc:
            best_val_auc = mean_auc
            torch.save(model.state_dict(), "birdclef_final_model.pt")
            print("Saved best model (AUC improved)")

    print("Saving final model")
    torch.save(model.state_dict(), "final_model.pt")

In [15]:
if __name__ == "__main__":
    train()

Epoch 1/15: 100%|██████████| 893/893 [04:40<00:00,  3.18it/s, loss=0.0202]


Epoch 1: Train Loss = 0.0322, Val AUC = 0.8077
Saved best model (AUC improved)


Epoch 2/15: 100%|██████████| 893/893 [04:51<00:00,  3.06it/s, loss=0.0159]


Epoch 2: Train Loss = 0.0180, Val AUC = 0.9144
Saved best model (AUC improved)


Epoch 3/15: 100%|██████████| 893/893 [04:49<00:00,  3.08it/s, loss=0.0152]


Epoch 3: Train Loss = 0.0153, Val AUC = 0.9446
Saved best model (AUC improved)


Epoch 4/15: 100%|██████████| 893/893 [04:47<00:00,  3.11it/s, loss=0.0179] 


Epoch 4: Train Loss = 0.0132, Val AUC = 0.9656
Saved best model (AUC improved)


Epoch 5/15: 100%|██████████| 893/893 [05:07<00:00,  2.90it/s, loss=0.0102] 


Epoch 5: Train Loss = 0.0114, Val AUC = 0.9793
Saved best model (AUC improved)


Epoch 6/15: 100%|██████████| 893/893 [04:56<00:00,  3.01it/s, loss=0.00857]


Epoch 6: Train Loss = 0.0095, Val AUC = 0.9907
Saved best model (AUC improved)


Epoch 7/15: 100%|██████████| 893/893 [04:54<00:00,  3.03it/s, loss=0.00593]


Epoch 7: Train Loss = 0.0073, Val AUC = 0.9975
Saved best model (AUC improved)


Epoch 8/15: 100%|██████████| 893/893 [04:49<00:00,  3.08it/s, loss=0.00524]


Epoch 8: Train Loss = 0.0049, Val AUC = 0.9995
Saved best model (AUC improved)


Epoch 9/15: 100%|██████████| 893/893 [04:49<00:00,  3.08it/s, loss=0.00247] 


Epoch 9: Train Loss = 0.0028, Val AUC = 0.9999
Saved best model (AUC improved)


Epoch 10/15: 100%|██████████| 893/893 [04:49<00:00,  3.08it/s, loss=0.0017]  


Epoch 10: Train Loss = 0.0015, Val AUC = 1.0000
Saved best model (AUC improved)


Epoch 11/15: 100%|██████████| 893/893 [04:49<00:00,  3.08it/s, loss=0.00283] 


Epoch 11: Train Loss = 0.0008, Val AUC = 1.0000


Epoch 12/15: 100%|██████████| 893/893 [04:50<00:00,  3.08it/s, loss=0.000971]


Epoch 12: Train Loss = 0.0005, Val AUC = 1.0000
Saved best model (AUC improved)


Epoch 13/15: 100%|██████████| 893/893 [04:50<00:00,  3.07it/s, loss=0.00229] 


Epoch 13: Train Loss = 0.0003, Val AUC = 1.0000


Epoch 14/15: 100%|██████████| 893/893 [04:49<00:00,  3.09it/s, loss=0.000163]


Epoch 14: Train Loss = 0.0003, Val AUC = 1.0000
Saved best model (AUC improved)


Epoch 15/15: 100%|██████████| 893/893 [04:48<00:00,  3.10it/s, loss=0.000712]


Epoch 15: Train Loss = 0.0002, Val AUC = 1.0000
Saved best model (AUC improved)
Saving final model
