In [32]:
import os
import logging
from pathlib import Path

import numpy as np
import pandas as pd
import librosa
import torch
import torch.nn.functional as F
from tqdm.auto import tqdm
import sys
from joblib import Parallel, delayed
logging.basicConfig(level=logging.ERROR)

In [33]:
# 
class CFG:
    def __init__(self, mode="train", kaggle_notebook=False, debug=False):
        assert mode in ["train", "inference"], "mode must be 'train' or 'inference'"
        self.mode = mode
        self.KAGGLE_NOTEBOOK = kaggle_notebook
        self.debug = debug

        # ===== Path Settings =====
        if self.KAGGLE_NOTEBOOK:
            self.OUTPUT_DIR = ''
            self.train_datadir = '/kaggle/input/birdclef-2025/train_audio'
            self.train_csv = '/kaggle/input/birdclef-2025/train.csv'
            self.test_soundscapes = '/kaggle/input/birdclef-2025/test_soundscapes'
            self.submission_csv = '/kaggle/input/birdclef-2025/sample_submission.csv'
            self.taxonomy_csv = '/kaggle/input/birdclef-2025/taxonomy.csv'
            self.spectrogram_npy = '/kaggle/input/birdclef25-mel-spectrograms/birdclef2025_melspec_5sec_256_256.npy'
            self.model_path = "/kaggle/input/birdclef-2025-baseline-fold0-0404"
            
            self.device = "cpu"
            self.batch_size = 8
            self.n_jobs = 2
            
        else:
            self.OUTPUT_DIR = '../data/result/'
            self.train_datadir = '../data/raw/train_audio/'
            self.train_csv = '../data/raw/train.csv'
            self.test_soundscapes = '../data/raw/train_soundscapes/'
            self.submission_csv = '../data/raw/sample_submission.csv'
            self.taxonomy_csv = '../data/raw/taxonomy.csv'
            self.spectrogram_npy = '../data/processed/mel-spec_0329/birdclef2025_melspec_5sec_256_256.npy'
            self.MODELS_DIR = "../models/"
            self.model_path =  "../models/baseline_7sec/"
            
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            self.batch_size = 32
            self.n_jobs = 16

        # ===== Model Settings =====
        self.model_name = 'efficientnet_b0'
        self.pretrained = True if mode == "train" else False
        self.in_channels = 1

        # ===== Audio Settings =====
        self.FS = 32000
        self.WINDOW_SIZE = 5
        self.TARGET_DURATION = 5
        self.TARGET_SHAPE = (256, 256)
        self.N_FFT = 1024
        self.HOP_LENGTH = 512
        self.N_MELS = 128
        self.FMIN = 50
        self.FMAX = 14000


        # ===== Inference Mode =====
        if mode == "inference":
            self.use_tta = False
            self.tta_count = 3
            self.threshold = 0.5

            self.use_specific_folds = False
            self.folds = [0, 1, 2, 3, 4]  # Used only if use_specific_folds is True

            self.debug_count = 3
            self.seed = 42
            
    def update_debug_settings(self):
        if self.debug:
            self.epochs = 2
            self.selected_folds = [0]

In [None]:
"TODO: configを2つにわけるべきかも．柔軟に変える方はnotebook側で，固定したい方はmodule側とか"
"TODO: Debugモードになっていたらsubmissionでエラーになる"
"TODO: 疑似ラベル作成のためなら，事前にデータセットを準備するべき"
cfg = CFG(mode='inference', kaggle_notebook=False)

if cfg.KAGGLE_NOTEBOOK:
    sys.path.append("/kaggle/input/birdclef-2025-libs/")
from module import models_lib, utils_lib, preprocess_lib, inference_lib

# Set seed
utils_lib.set_seed(cfg.seed)

In [35]:
print(f"Using device: {cfg.device}")
print(f"Loading taxonomy data...")
taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
species_ids = taxonomy_df['primary_label'].tolist()
num_classes = len(species_ids)
print(f"Number of classes: {num_classes}")

Using device: cuda
Loading taxonomy data...
Number of classes: 206


In [36]:


def process_audio_file(audio_path, cfg):
    """1ファイル分のmelspecデータを返す（row_id, melspecのリスト）"""
    dataset = []
    soundscape_id = Path(audio_path).stem
    try:
        audio_data, _ = librosa.load(audio_path, sr=cfg.FS)
        total_segments = int(len(audio_data) / (cfg.FS * cfg.WINDOW_SIZE))

        for segment_idx in range(total_segments):
            start = int(segment_idx * cfg.FS * cfg.WINDOW_SIZE)
            end = int(start + cfg.FS * cfg.WINDOW_SIZE)
            segment_audio = audio_data[start:end]

            mel_spec = preprocess_lib.process_audio_segment(segment_audio, cfg)
            row_id = f"{soundscape_id}_{(segment_idx + 1) * cfg.WINDOW_SIZE}"

            dataset.append({
                "row_id": row_id,
                "mel_spec": mel_spec
            })
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
    return dataset

def generate_melspec_dataset(cfg):
    test_dir = Path(cfg.test_soundscapes)
    if not test_dir.exists():
        print(f"Test directory {test_dir} does not exist.")
        return []

    test_files = list(test_dir.glob('*.ogg'))
    if len(test_files) == 0:
        print("No test audio files found.")
        return []

    if cfg.debug:
        print(f"Debug mode enabled, using only {cfg.debug_count} files")
        test_files = test_files[:cfg.debug_count]

    results = Parallel(n_jobs=cfg.n_jobs)(
        delayed(process_audio_file)(path, cfg) for path in tqdm(test_files, desc="Parallel melspec gen")
    )
    dataset = [item for sublist in results for item in sublist]
    return dataset

In [37]:
def create_dataloader_from_dataset(dataset, cfg):
    """numpyのmelspecリストをtorch.Tensorにまとめて返す"""
    all_specs = [item["mel_spec"] for item in dataset]
    tensors = torch.tensor(np.stack(all_specs), dtype=torch.float32).unsqueeze(1)  # (B, 1, H, W)
    return tensors.to(cfg.device)

In [38]:
def run_inference_from_dataset(dataset, models, cfg, species_ids):

    row_ids = []
    all_preds = []

    for i in range(0, len(dataset), cfg.batch_size):
        batch = dataset[i:i+cfg.batch_size]

        mel_list = [item["mel_spec"] for item in batch]
        tensors = torch.tensor(np.stack(mel_list), dtype=torch.float32).unsqueeze(1).to(cfg.device)

        preds_per_model = []
        with torch.no_grad():
            for model in models:
                outputs = model(tensors)
                probs = torch.sigmoid(outputs).cpu().numpy()
                preds_per_model.append(probs)

        avg_preds = np.mean(preds_per_model, axis=0)
        all_preds.append(avg_preds)

        row_ids.extend([item["row_id"] for item in batch])

    predictions = np.concatenate(all_preds, axis=0)
    return row_ids, predictions

In [39]:
def main():
    print("Generating dataset...")
    dataset = generate_melspec_dataset(cfg)

    print("Loading models...")
    models = models_lib.load_models(cfg, num_classes)

    # モデルが読み込めていない場合はエラーを出して終了
    if not models or len(models) == 0:
        raise RuntimeError("No models found. Please check model_path in CFG or ensure models are available.")

    print("Running inference...")
    if len(dataset) > 0:
        row_ids, predictions = run_inference_from_dataset(dataset, models, cfg, species_ids)
    else:
        print("No test data available, generating empty submission.")
        row_ids = []
        predictions = []

    submission_df = utils_lib.create_submission(row_ids, predictions, species_ids, cfg)
    submission_path = os.path.join(cfg.OUTPUT_DIR, 'submission.csv')
    submission_df.to_csv(submission_path, index=False)

    print(f"Submission saved to {submission_path}")

In [None]:
# 全部で5minsくらい　cuda, batch_size=32で
if __name__ == "__main__":
    main()

Generating dataset...


Parallel melspec gen:   0%|          | 0/9726 [00:00<?, ?it/s]

Exception ignored in: <function _releaseLock at 0x7f24279303a0>
Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/logging/__init__.py", line 228, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


BrokenProcessPool: A task has failed to un-serialize. Please ensure that the arguments of the function are all picklable.

In [41]:
# 提出用ファイルを読み込む
submission = pd.read_csv(os.path.join(cfg.OUTPUT_DIR, 'submission.csv'))
submission.head()

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230420_074000_5,0.000476,0.000389,0.000693,0.000431,0.005304,0.007696,0.006499,0.000309,0.000228,...,0.010506,0.006806,0.003751,0.003634,0.000177,0.003378,0.00016,0.007449,0.000744,0.003861
1,H02_20230420_074000_10,0.000237,0.000287,0.000352,0.000141,0.00119,0.004709,0.002705,0.00024,0.00013,...,0.004917,0.002516,0.002501,0.00242,8e-05,0.001881,0.000191,0.006686,0.000596,0.00205
2,H02_20230420_074000_15,0.000597,0.000959,0.000936,0.000235,0.003129,0.002791,0.010146,0.000604,0.000558,...,0.008911,0.001423,0.001646,0.001031,0.002807,0.00627,0.001539,0.010027,0.001813,0.000727
3,H02_20230420_074000_20,0.000616,0.000652,0.000425,6.6e-05,0.002437,0.001315,0.007524,0.000494,0.000608,...,0.012083,0.003022,0.002732,0.000945,0.00065,0.006351,0.000171,0.006809,0.000268,0.000462
4,H02_20230420_074000_25,0.000364,0.000292,0.000551,9.9e-05,0.002509,0.004306,0.004866,0.000286,0.000242,...,0.007307,0.003911,0.001425,0.00225,5.8e-05,0.003091,9.1e-05,0.014852,0.000686,0.001296


In [43]:
submission

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230420_074000_5,0.000476,0.000389,0.000693,0.000431,0.005304,0.007696,0.006499,0.000309,0.000228,...,0.010506,0.006806,0.003751,0.003634,0.000177,0.003378,1.600528e-04,0.007449,0.000744,0.003861
1,H02_20230420_074000_10,0.000237,0.000287,0.000352,0.000141,0.001190,0.004709,0.002705,0.000240,0.000130,...,0.004917,0.002516,0.002501,0.002420,0.000080,0.001881,1.914157e-04,0.006686,0.000596,0.002050
2,H02_20230420_074000_15,0.000597,0.000959,0.000936,0.000235,0.003129,0.002791,0.010146,0.000604,0.000558,...,0.008911,0.001423,0.001646,0.001031,0.002807,0.006270,1.539217e-03,0.010027,0.001813,0.000727
3,H02_20230420_074000_20,0.000616,0.000652,0.000425,0.000066,0.002437,0.001315,0.007524,0.000494,0.000608,...,0.012083,0.003022,0.002732,0.000945,0.000650,0.006351,1.713473e-04,0.006809,0.000268,0.000462
4,H02_20230420_074000_25,0.000364,0.000292,0.000551,0.000099,0.002509,0.004306,0.004866,0.000286,0.000242,...,0.007307,0.003911,0.001425,0.002250,0.000058,0.003091,9.084485e-05,0.014852,0.000686,0.001296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116707,O203_20230526_023000_40,0.000033,0.000023,0.000285,0.000011,0.003540,0.000499,0.001344,0.000020,0.000015,...,0.000245,0.000316,0.000021,0.000236,0.000002,0.000213,4.310795e-07,0.003752,0.000038,0.000071
116708,O203_20230526_023000_45,0.000047,0.000027,0.000733,0.000014,0.002550,0.000976,0.001476,0.000033,0.000026,...,0.000450,0.000710,0.000027,0.000420,0.000002,0.000231,1.392403e-06,0.004766,0.000086,0.000091
116709,O203_20230526_023000_50,0.000035,0.000022,0.000323,0.000010,0.001571,0.000897,0.001441,0.000023,0.000011,...,0.000287,0.000331,0.000040,0.000231,0.000001,0.000360,1.159480e-06,0.002413,0.000164,0.000160
116710,O203_20230526_023000_55,0.000105,0.000066,0.000811,0.000173,0.001929,0.001489,0.001584,0.000071,0.000061,...,0.000643,0.001187,0.000114,0.000574,0.000016,0.000556,5.901956e-06,0.001756,0.000119,0.000102


In [42]:
print("✅ Shape:", submission.shape)
print("✅ Columns:", submission.columns.tolist())
print("✅ Dtypes:\n", submission.dtypes)
print("✅ Nulls:\n", submission.isna().sum().sum())

✅ Shape: (116712, 207)
✅ Columns: ['row_id', '1139490', '1192948', '1194042', '126247', '1346504', '134933', '135045', '1462711', '1462737', '1564122', '21038', '21116', '21211', '22333', '22973', '22976', '24272', '24292', '24322', '41663', '41778', '41970', '42007', '42087', '42113', '46010', '47067', '476537', '476538', '48124', '50186', '517119', '523060', '528041', '52884', '548639', '555086', '555142', '566513', '64862', '65336', '65344', '65349', '65373', '65419', '65448', '65547', '65962', '66016', '66531', '66578', '66893', '67082', '67252', '714022', '715170', '787625', '81930', '868458', '963335', 'amakin1', 'amekes', 'ampkin1', 'anhing', 'babwar', 'bafibi1', 'banana', 'baymac', 'bbwduc', 'bicwre1', 'bkcdon', 'bkmtou1', 'blbgra1', 'blbwre1', 'blcant4', 'blchaw1', 'blcjay1', 'blctit1', 'blhpar1', 'blkvul', 'bobfly1', 'bobher1', 'brtpar1', 'bubcur1', 'bubwre1', 'bucmot3', 'bugtan', 'butsal1', 'cargra1', 'cattyr', 'chbant1', 'chfmac1', 'cinbec1', 'cocher1', 'cocwoo1', 'colara1'