In [1]:
!pip install -q torchlibrosa


In [2]:
import os
import sys
import random
import numpy as np
import pandas as pd
import librosa
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm.notebook import tqdm

# Set seeds
torch.manual_seed(42)
np.random.seed(42)
random.seed(42)

# Config
CFG = {
    'sample_rate': 32000,
    'duration': 5,
    'n_mels': 128,
    'batch_size': 32,
    'num_workers': 4,
    'epochs': 100,
    'learning_rate': 1e-4,
    'in_chans': 3,  # ✅ 3 for ViT, 1 for HTSAT
}


In [3]:
train_df = pd.read_csv('/kaggle/input/birdclef-2025/train.csv')
taxonomy_df = pd.read_csv('/kaggle/input/birdclef-2025/taxonomy.csv')

# Full path
train_df['full_path'] = train_df['filename'].apply(
    lambda x: os.path.join('/kaggle/input/birdclef-2025/train_audio', x)
)

# Only existing files
train_df = train_df[train_df['full_path'].apply(os.path.exists)].reset_index(drop=True)
print(f"✅ Total files: {len(train_df)}")

# Encode labels
le = LabelEncoder()
train_df['label_idx'] = le.fit_transform(train_df['primary_label'])
CFG['num_classes'] = len(le.classes_)
print(f"✅ Encoded {CFG['num_classes']} classes")


✅ Total files: 28564
✅ Encoded 206 classes


In [4]:
train_df, val_df = train_test_split(
    train_df,
    test_size=0.2,
    stratify=train_df['primary_label'],
    random_state=42
)

print(f"✅ Train: {len(train_df)}, Val: {len(val_df)}")


✅ Train: 22851, Val: 5713


In [5]:
class BirdCLEFDataset(Dataset):
    def __init__(self, df, cfg, augment=False):
        self.df = df.reset_index(drop=True)
        self.cfg = cfg
        self.augment = augment

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        y, sr = librosa.load(row['full_path'], sr=self.cfg['sample_rate'])

        # Pad/truncate
        length = sr * self.cfg['duration']
        if len(y) < length:
            y = np.pad(y, (0, length - len(y)))
        else:
            y = y[:length]

        # Augmentation
        if self.augment:
            shift = int(random.uniform(-0.1, 0.1) * len(y))
            y = np.roll(y, shift)

        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=self.cfg['n_mels'])
        mel_db = librosa.power_to_db(mel).astype(np.float32)

        # Normalize -1 to 1
        mel_db = (mel_db - mel_db.min()) / (mel_db.max() - mel_db.min())
        mel_db = 2 * mel_db - 1

        mel_db = torch.tensor(mel_db).unsqueeze(0)
        mel_db = F.interpolate(mel_db.unsqueeze(0), size=(224, 224), mode='bilinear', align_corners=False).squeeze(0)

        if self.cfg['in_chans'] == 3:
            mel_db = mel_db.repeat(3, 1, 1)

        return mel_db, row['label_idx']


In [6]:
train_ds = BirdCLEFDataset(train_df, CFG, augment=True)
val_ds = BirdCLEFDataset(val_df, CFG, augment=False)

train_dl = DataLoader(train_ds, batch_size=CFG['batch_size'],
                      shuffle=True, num_workers=CFG['num_workers'], pin_memory=True)
val_dl = DataLoader(val_ds, batch_size=CFG['batch_size'],
                    shuffle=False, num_workers=CFG['num_workers'], pin_memory=True)

# Sanity check
xb, yb = next(iter(train_dl))
print(f"✅ Batch shape: {xb.shape}, labels: {yb[:5]}")


✅ Batch shape: torch.Size([32, 3, 224, 224]), labels: tensor([ 75,  97, 179,  91, 144])


In [7]:
# Uncomment if using HTSAT
# sys.path.append('/kaggle/input/htsat-package-zip')
# from htsat.htsat import HTSAT_Swin_Transformer

# class DummyConfig:
#     sample_rate = 32000
#     window_size = 1024
#     hop_size = 320
#     mel_bins = 128
#     fmin = 50
#     fmax = 14000
#     enable_tscam = False

# model = HTSAT_Swin_Transformer(
#     spec_size=224,
#     patch_size=4,
#     in_chans=1,
#     num_classes=CFG['num_classes'],
#     window_size=7,
#     embed_dim=96,
#     depths=[2, 2, 6, 2],
#     num_heads=[3, 6, 12, 24],
#     ffn_expansion_factor=4,
#     qkv_bias=True,
#     config=DummyConfig()
# )

# ✅ OR use ViT
import timm
model = timm.create_model(
    "vit_base_patch16_224",
    pretrained=True,
    num_classes=CFG['num_classes'],
    in_chans=CFG['in_chans']
)

model = model.cuda()
print("✅ Model loaded")


model.safetensors:   0%|          | 0.00/346M [00:00<?, ?B/s]

✅ Model loaded


In [8]:
optimizer = optim.AdamW(model.parameters(), lr=CFG['learning_rate'])
criterion = nn.CrossEntropyLoss()

from torch.optim.lr_scheduler import ReduceLROnPlateau
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, verbose=True)

print("✅ Optimizer, loss, scheduler ready")


✅ Optimizer, loss, scheduler ready




In [9]:
debug_ds = Subset(train_ds, range(3))
for i in range(len(debug_ds)):
    mel_db, label = debug_ds[i]
    print(f"✅ Shape: {mel_db.shape}")
    print(f"✅ Min: {mel_db.min():.4f} Max: {mel_db.max():.4f}")
    print(f"✅ Mean: {mel_db.mean():.4f} Std: {mel_db.std():.4f}")
    print(f"✅ Label idx: {label}")


✅ Shape: torch.Size([3, 224, 224])
✅ Min: -1.0000 Max: 0.9843
✅ Mean: -0.3045 Std: 0.2426
✅ Label idx: 203
✅ Shape: torch.Size([3, 224, 224])
✅ Min: -1.0000 Max: 0.9845
✅ Mean: -0.0543 Std: 0.3728
✅ Label idx: 129
✅ Shape: torch.Size([3, 224, 224])
✅ Min: -1.0000 Max: 0.9584
✅ Mean: -0.1276 Std: 0.2466
✅ Label idx: 107


In [10]:
CHECKPOINT_DIR = "./checkpoints"
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

early_stop_patience = 10
best_val_loss = float("inf")
epochs_no_improve = 0

print(f"✅ Checkpoint dir: {CHECKPOINT_DIR}")


✅ Checkpoint dir: ./checkpoints


In [11]:
# ✅ CELL 10 — Fixed-Epoch HTSAT Training Loop (40 Epochs) 🔒

import time
import torch.nn.functional as F
from torch.cuda.amp import GradScaler, autocast
from sklearn.metrics import accuracy_score

# Fixed config
EPOCHS = 40  # ⬅️ Fixed to 40 epochs
best_val_acc = 0.0

# Label smoothing loss
criterion = nn.CrossEntropyLoss(label_smoothing=0.1)

# Cosine learning rate scheduler
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)

# Mixed precision scaler
scaler = GradScaler()

print(f"✅ Training for exactly {EPOCHS} epochs...")

for epoch in range(1, EPOCHS + 1):
    start = time.time()

    # --- TRAIN ---
    model.train()
    train_loss, correct, total = 0.0, 0, 0

    for xb, yb in tqdm(train_dl, desc=f"Train Ep{epoch}"):
        xb, yb = xb.cuda(), yb.cuda()

        optimizer.zero_grad()
        with autocast():
            logits = model(xb)
            loss = criterion(logits, yb)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        train_loss += loss.item() * xb.size(0)
        preds = logits.argmax(1)
        correct += (preds == yb).sum().item()
        total += yb.size(0)

    train_acc = correct / total
    train_loss /= total

    # --- VALIDATION ---
    model.eval()
    val_loss, val_correct, val_total = 0.0, 0, 0

    with torch.no_grad():
        for xb, yb in tqdm(val_dl, desc=f"Val Ep{epoch}"):
            xb, yb = xb.cuda(), yb.cuda()
            logits = model(xb)
            loss = criterion(logits, yb)

            val_loss += loss.item() * xb.size(0)
            preds = logits.argmax(1)
            val_correct += (preds == yb).sum().item()
            val_total += yb.size(0)

    val_acc = val_correct / val_total
    val_loss /= val_total

    scheduler.step()

    print(f"✅ Epoch [{epoch}/{EPOCHS}] "
          f"Train Loss: {train_loss:.4f} Acc: {train_acc:.4f} | "
          f"Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}")

    # Save checkpoints
    torch.save(model.state_dict(), f"{CHECKPOINT_DIR}/last.pth")
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), f"{CHECKPOINT_DIR}/best_model.pth")
        print("✅ Saved NEW BEST model")

    end = time.time()
    print(f"⏱️ Epoch time: {(end - start)/60:.2f} min")


✅ Training for exactly 40 epochs...


  scaler = GradScaler()


Train Ep1:   0%|          | 0/715 [00:00<?, ?it/s]

  with autocast():


Val Ep1:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [1/40] Train Loss: 3.6773 Acc: 0.2988 | Val Loss: 2.6170 Acc: 0.5463
✅ Saved NEW BEST model
⏱️ Epoch time: 12.86 min


Train Ep2:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep2:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [2/40] Train Loss: 2.2384 Acc: 0.6347 | Val Loss: 2.1961 Acc: 0.6532
✅ Saved NEW BEST model
⏱️ Epoch time: 12.05 min


Train Ep3:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep3:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [3/40] Train Loss: 1.8306 Acc: 0.7410 | Val Loss: 2.0664 Acc: 0.6874
✅ Saved NEW BEST model
⏱️ Epoch time: 11.94 min


Train Ep4:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep4:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [4/40] Train Loss: 1.5534 Acc: 0.8115 | Val Loss: 2.0344 Acc: 0.7010
✅ Saved NEW BEST model
⏱️ Epoch time: 11.79 min


Train Ep5:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep5:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [5/40] Train Loss: 1.3325 Acc: 0.8752 | Val Loss: 2.0296 Acc: 0.7136
✅ Saved NEW BEST model
⏱️ Epoch time: 11.76 min


Train Ep6:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep6:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [6/40] Train Loss: 1.1527 Acc: 0.9325 | Val Loss: 2.0916 Acc: 0.7089
⏱️ Epoch time: 11.63 min


Train Ep7:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep7:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [7/40] Train Loss: 1.0716 Acc: 0.9560 | Val Loss: 2.0869 Acc: 0.7124
⏱️ Epoch time: 11.70 min


Train Ep8:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep8:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [8/40] Train Loss: 1.0348 Acc: 0.9670 | Val Loss: 2.0837 Acc: 0.7205
✅ Saved NEW BEST model
⏱️ Epoch time: 11.76 min


Train Ep9:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep9:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [9/40] Train Loss: 1.0105 Acc: 0.9729 | Val Loss: 2.0737 Acc: 0.7163
⏱️ Epoch time: 11.80 min


Train Ep10:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep10:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [10/40] Train Loss: 0.9846 Acc: 0.9802 | Val Loss: 2.0508 Acc: 0.7229
✅ Saved NEW BEST model
⏱️ Epoch time: 11.74 min


Train Ep11:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep11:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [11/40] Train Loss: 0.9696 Acc: 0.9814 | Val Loss: 2.0545 Acc: 0.7254
✅ Saved NEW BEST model
⏱️ Epoch time: 11.72 min


Train Ep12:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep12:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [12/40] Train Loss: 0.9701 Acc: 0.9806 | Val Loss: 2.0623 Acc: 0.7224
⏱️ Epoch time: 11.93 min


Train Ep13:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep13:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [13/40] Train Loss: 0.9569 Acc: 0.9838 | Val Loss: 2.0781 Acc: 0.7170
⏱️ Epoch time: 11.84 min


Train Ep14:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep14:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [14/40] Train Loss: 0.9316 Acc: 0.9895 | Val Loss: 2.0498 Acc: 0.7285
✅ Saved NEW BEST model
⏱️ Epoch time: 11.84 min


Train Ep15:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep15:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [15/40] Train Loss: 0.9244 Acc: 0.9905 | Val Loss: 2.1044 Acc: 0.7157
⏱️ Epoch time: 11.93 min


Train Ep16:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep16:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [16/40] Train Loss: 0.9397 Acc: 0.9854 | Val Loss: 2.0371 Acc: 0.7310
✅ Saved NEW BEST model
⏱️ Epoch time: 11.83 min


Train Ep17:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep17:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [17/40] Train Loss: 0.9018 Acc: 0.9946 | Val Loss: 2.0414 Acc: 0.7273
⏱️ Epoch time: 11.86 min


Train Ep18:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep18:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [18/40] Train Loss: 0.9003 Acc: 0.9940 | Val Loss: 2.0546 Acc: 0.7212
⏱️ Epoch time: 11.78 min


Train Ep19:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep19:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [19/40] Train Loss: 0.9088 Acc: 0.9917 | Val Loss: 2.0362 Acc: 0.7287
⏱️ Epoch time: 12.03 min


Train Ep20:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep20:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [20/40] Train Loss: 0.8949 Acc: 0.9948 | Val Loss: 2.0351 Acc: 0.7294
⏱️ Epoch time: 11.81 min


Train Ep21:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep21:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [21/40] Train Loss: 0.8887 Acc: 0.9955 | Val Loss: 2.0641 Acc: 0.7199
⏱️ Epoch time: 11.84 min


Train Ep22:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep22:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [22/40] Train Loss: 0.8837 Acc: 0.9956 | Val Loss: 1.9996 Acc: 0.7364
✅ Saved NEW BEST model
⏱️ Epoch time: 11.83 min


Train Ep23:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep23:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [23/40] Train Loss: 0.8783 Acc: 0.9968 | Val Loss: 1.9700 Acc: 0.7430
✅ Saved NEW BEST model
⏱️ Epoch time: 11.79 min


Train Ep24:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep24:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [24/40] Train Loss: 0.8716 Acc: 0.9978 | Val Loss: 1.9519 Acc: 0.7490
✅ Saved NEW BEST model
⏱️ Epoch time: 11.88 min


Train Ep25:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep25:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [25/40] Train Loss: 0.8727 Acc: 0.9976 | Val Loss: 1.9424 Acc: 0.7504
✅ Saved NEW BEST model
⏱️ Epoch time: 11.91 min


Train Ep26:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep26:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [26/40] Train Loss: 0.8663 Acc: 0.9988 | Val Loss: 1.9320 Acc: 0.7535
✅ Saved NEW BEST model
⏱️ Epoch time: 11.92 min


Train Ep27:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep27:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [27/40] Train Loss: 0.8643 Acc: 0.9989 | Val Loss: 1.9436 Acc: 0.7523
⏱️ Epoch time: 11.85 min


Train Ep28:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep28:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [28/40] Train Loss: 0.8638 Acc: 0.9988 | Val Loss: 1.9450 Acc: 0.7528
⏱️ Epoch time: 11.87 min


Train Ep29:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep29:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [29/40] Train Loss: 0.8617 Acc: 0.9989 | Val Loss: 1.9261 Acc: 0.7605
✅ Saved NEW BEST model
⏱️ Epoch time: 11.72 min


Train Ep30:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep30:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [30/40] Train Loss: 0.8608 Acc: 0.9989 | Val Loss: 1.9352 Acc: 0.7576
⏱️ Epoch time: 11.84 min


Train Ep31:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep31:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [31/40] Train Loss: 0.8602 Acc: 0.9988 | Val Loss: 1.9223 Acc: 0.7614
✅ Saved NEW BEST model
⏱️ Epoch time: 11.97 min


Train Ep32:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep32:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [32/40] Train Loss: 0.8597 Acc: 0.9988 | Val Loss: 1.9185 Acc: 0.7653
✅ Saved NEW BEST model
⏱️ Epoch time: 11.87 min


Train Ep33:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep33:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [33/40] Train Loss: 0.8594 Acc: 0.9987 | Val Loss: 1.9178 Acc: 0.7616
⏱️ Epoch time: 12.20 min


Train Ep34:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep34:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [34/40] Train Loss: 0.8586 Acc: 0.9989 | Val Loss: 1.9231 Acc: 0.7619
⏱️ Epoch time: 12.31 min


Train Ep35:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep35:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [35/40] Train Loss: 0.8580 Acc: 0.9989 | Val Loss: 1.9219 Acc: 0.7653
⏱️ Epoch time: 12.24 min


Train Ep36:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep36:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [36/40] Train Loss: 0.8579 Acc: 0.9989 | Val Loss: 1.9225 Acc: 0.7628
⏱️ Epoch time: 11.97 min


Train Ep37:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep37:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [37/40] Train Loss: 0.8575 Acc: 0.9990 | Val Loss: 1.9222 Acc: 0.7628
⏱️ Epoch time: 11.90 min


Train Ep38:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep38:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [38/40] Train Loss: 0.8573 Acc: 0.9990 | Val Loss: 1.9237 Acc: 0.7625
⏱️ Epoch time: 11.81 min


Train Ep39:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep39:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [39/40] Train Loss: 0.8572 Acc: 0.9991 | Val Loss: 1.9238 Acc: 0.7628
⏱️ Epoch time: 12.11 min


Train Ep40:   0%|          | 0/715 [00:00<?, ?it/s]

Val Ep40:   0%|          | 0/179 [00:00<?, ?it/s]

✅ Epoch [40/40] Train Loss: 0.8572 Acc: 0.9991 | Val Loss: 1.9237 Acc: 0.7630
⏱️ Epoch time: 12.00 min
