# BirdClef+ 2025 Submission Baseline

## Import libraries

In [1]:
import configparser
import os
import warnings
from pathlib import Path
import time
from tqdm import tqdm
import concurrent.futures

import numpy as np
import pandas as pd
import librosa
import soundfile
import torch
import torchaudio
import torchaudio.transforms as at
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torchvision import models
import torch.optim as optim

import pytorch_lightning as pl

warnings.filterwarnings('ignore')

## Config

In [2]:
config = configparser.ConfigParser()

In [3]:
config['project'] = {
    'name': 'birdclef_2025',
    'project_path': '/mnt/d/Projects_D/BirdCLEF_2025'
}

config['data'] = {
    'data_path': config['project']['project_path'] + '/data',
    'workspace_path': config['project']['project_path'] + '/data/',
    'birdclef_2025': config['project']['project_path'] + '/data/birdclef_2025',
    'processed_audio':config['project']['project_path'] + '/data/audio_processed',
}

config['audio_params'] = {
    'wav_sec': 5,
    'sample_rate': 32000,
}

config['audio_preprocessing'] = {
    'min_segment': 32000 * 5,
    'backend': 'soundfile'
}

config['mel_spectrogram'] = {
    'n_fft': 1024,
    'win_length': 1024,
    'hop_length': 512,
    'n_mels': 80,
    'f_min': 20,
    'f_max': 15000,
    'mel_scale': 'htk',
}

config['model'] = {
    'model_backbone': 'resnet34',
    'model_desc': 'resnet34',
    'model_ver': 'baseline',
    'model_path': config['project']['project_path'] + '/models/resnet34-baseline'
}

config['testing'] = {
    'debug': 0,
    'batch_size': 60,
    'num_workers': 4,
    'test_audio': config['data']['workspace_path'] + '/test_audio' + '/segments',
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'chunks_size': int(60 / int(config['audio_params']['wav_sec'])),
    'submission_path': config['data']['workspace_path'] + '/submissions'
}

In [4]:
os.makedirs(config['testing']['test_audio'], exist_ok=True)
os.makedirs(config['testing']['submission_path'], exist_ok=True)

In [5]:
print(config['data']['birdclef_2025'], config['model']['model_path'], config['testing']['test_audio'])

/mnt/d/Projects_D/BirdCLEF_2025/data/birdclef_2025 /mnt/d/Projects_D/BirdCLEF_2025/models/resnet34-baseline /mnt/d/Projects_D/BirdCLEF_2025/data//test_audio/segments


## Load test data

In [6]:
test_dir = '/test_soundscapes'
test_files = [file for file in os.listdir(config['data']['birdclef_2025'] + test_dir) if file.endswith('.ogg')]

if len(test_files) == 0:
    test_dir = '/train_soundscapes'
    test_files = [file for file in os.listdir(config['data']['birdclef_2025'] + test_dir) if file.endswith('.ogg')][:700]
    config['testing']['debug'] = '1'

In [7]:
print(f"Is Debug: {bool(int(config['testing']['debug']))}")

Is Debug: True


## Dataset

In [8]:
class BirdclefTestDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.df = df
        self.config = config
        self.input_path = Path(config['testing']['test_audio'])
        self.min_segment = int(config['audio_preprocessing']['min_segment'])

    @classmethod
    def normalize_std(cls, spec, eps=1e-23):
        mean = torch.mean(spec)
        std = torch.std(spec)
        return (spec - mean) / (std + eps)

    def get_mel_spectrogram(self, audio_signal):
        params = self.config['mel_spectrogram']
        mel_spectrogram = at.MelSpectrogram(
            sample_rate=int(self.config['audio_params']['sample_rate']),
            n_fft=int(params['n_fft']),
            win_length=int(params['win_length']),
            hop_length=int(params['hop_length']),
            n_mels=int(params['n_mels']),
            f_min=float(params['f_min']),
            f_max=float(params['f_max']),
            mel_scale=params['mel_scale']
        )
        mel_spec = mel_spectrogram(audio_signal)
        return torch.log(mel_spec)

    def __getitem__(self, index):
        filename = self.df.iloc[index].row_id
        sig, _ = torchaudio.load(self.input_path / filename, backend=self.config['audio_preprocessing']['backend'])
        sig = sig / torch.max(torch.abs(sig))
        sig = sig + 1.5849e-05 * (torch.rand(1, self.min_segment) - 0.5)

        mel_spec = self.get_mel_spectrogram(sig)
        mel_spec = self.normalize_std(mel_spec)

        return mel_spec, filename

    def __len__(self):
        return len(self.df)

## Load model

In [9]:
class BirdclefModel(pl.LightningModule):
    def __init__(self, class_labels):
        super(BirdclefModel, self).__init__()
        self.save_hyperparameters()
        self.class_labels = class_labels
        self.num_classes = len(self.class_labels)
        self.model_backbone = config['model']['model_backbone']
        self.model = self.get_model()

    def get_model(self):
        model = models.resnet34(pretrained=False)
        num_features = model.fc.in_features
        model.fc = torch.nn.Linear(num_features, self.num_classes)
        return model

    def forward(self, x):
        x = torch.cat((x, x, x), 1)
        return self.model(x)

## Labels

In [10]:
class_labels = os.listdir(config['data']['birdclef_2025'] + '/train_audio')
num_classes = len(class_labels)

In [11]:
num_classes

206

## Process test dataset

In [12]:
chunks_size = int(config['testing']['chunks_size'])

test_segment_files_dict = {}

for i in range(len(test_files)):
    test_segment_files_dict[i] = []

def process_file(file, index):
    test_filename = file.split('.')[0]
    sig, sr = torchaudio.load(config['data']['birdclef_2025'] + test_dir + '/' + file, backend=config['audio_preprocessing']['backend'])
    chunks = torch.chunk(sig, chunks_size, dim=1)
    for i, chunk in enumerate(chunks):
        test_segment_filename = config['testing']['test_audio'] + '/' + test_filename + '_' + str((i+1) * int(config['audio_params']['wav_sec'])) + '.ogg'
        torchaudio.save(test_segment_filename, chunk, sr, backend=config['audio_preprocessing']['backend'])
        test_segment_files_dict[index].append(test_segment_filename.split('/')[-1])

# Use ThreadPoolExecutor to parallelize the processing
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
    executor.map(process_file, test_files, [i for i in range(len(test_files))])

In [13]:
test_segment_files = []

for i in range(len(test_files)):
    test_segment_files.extend(test_segment_files_dict[i])

In [14]:
if len(test_segment_files) > 0:
    sig, sr = torchaudio.load(config['testing']['test_audio'] + '/' + test_segment_files[0])
    print(sig.shape, sr)

torch.Size([1, 160000]) 32000


In [15]:
test_df = pd.DataFrame(test_segment_files, columns=['row_id'])

In [16]:
test_df.count()

row_id    8400
dtype: int64

In [17]:
test_df.head()

Unnamed: 0,row_id
0,H02_20230420_074000_5.ogg
1,H02_20230420_074000_10.ogg
2,H02_20230420_074000_15.ogg
3,H02_20230420_074000_20.ogg
4,H02_20230420_074000_25.ogg


## Load model

In [18]:
checkpoint_path = config['model']['model_path'] + '/' + config['model']['model_desc'] + '-' + config['model']['model_ver'] + '-best.ckpt'
model = BirdclefModel.load_from_checkpoint(checkpoint_path, class_labels=class_labels)
model = model.to(torch.float32)
model = model.to('cpu')
model.eval()

BirdclefModel(
  (model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track

## Make predictions

In [28]:
test_dataset = BirdclefTestDataset(test_df)
test_loader = DataLoader(test_dataset, batch_size=int(config['testing']['batch_size']), shuffle=False, num_workers=int(config['testing']['num_workers']), drop_last=False)

In [29]:
pred = {'row_id': []}
for species_code in class_labels:
    pred[species_code] = []

with torch.no_grad():
    for mel_spec, filename in tqdm(test_loader):
        preds = model(mel_spec)
        preds = torch.softmax(preds, dim=1).cpu().numpy()

        for file, prediction in zip(filename, preds):
            file_name = file.split('.')[0]
            pred['row_id'].append(file_name)

            for i, label in enumerate(class_labels):
                pred[label].append(prediction[i])

100%|██████████| 140/140 [02:29<00:00,  1.07s/it]


## Prepare submission file

In [30]:
submission = pd.DataFrame(pred)

In [31]:
submission.shape

(8400, 207)

In [32]:
submission.head()

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230420_074000_5,0.002915,1.5e-05,2.4e-05,0.011482,0.001628,0.000603,0.001692,9.5e-05,0.000517,...,0.001842,0.00649,0.000541,0.001558,3.2e-05,0.000701,0.000251,0.002829,0.000311,0.001703
1,H02_20230420_074000_10,0.001721,7e-06,2.3e-05,0.024109,0.000795,0.000323,0.000913,3.4e-05,0.000196,...,0.000775,0.003544,0.000221,0.000571,1.5e-05,0.000481,9.5e-05,0.001118,0.000153,0.001015
2,H02_20230420_074000_15,0.000651,3e-06,2.7e-05,0.009436,0.00049,0.000255,0.001705,3.5e-05,0.000242,...,0.001407,0.003517,0.000491,0.000705,4.3e-05,0.001211,0.000504,0.002381,0.000453,0.002101
3,H02_20230420_074000_20,0.001349,7e-06,6e-06,0.001191,0.000207,6.4e-05,0.002938,5.2e-05,0.000156,...,0.001517,0.003161,0.000129,0.000762,4e-06,0.000623,0.000148,0.002991,0.000374,0.001109
4,H02_20230420_074000_25,0.001924,1.3e-05,1.8e-05,0.002331,0.00055,0.000882,0.002103,0.000157,0.000638,...,0.003094,0.009516,0.000388,0.001889,2.5e-05,0.000863,0.000297,0.003366,0.000499,0.002599


In [27]:
submission.to_csv(config['testing']['submission_path'] + '/' + config['model']['model_desc'] + '-' + config['model']['model_ver'] + '-submission.csv', index=False)