In [64]:
import os
import logging
from pathlib import Path

import numpy as np
import pandas as pd
import librosa
import torch
import torch.nn.functional as F
from tqdm.auto import tqdm
import sys
from joblib import Parallel, delayed
logging.basicConfig(level=logging.ERROR)


import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [65]:
# 
class CFG:
    def __init__(self, mode="train", kaggle_notebook=False, debug=False):
        assert mode in ["train", "inference"], "mode must be 'train' or 'inference'"
        self.mode = mode
        self.KAGGLE_NOTEBOOK = kaggle_notebook
        self.debug = debug

        # ===== Path Settings =====
        if self.KAGGLE_NOTEBOOK:
            self.OUTPUT_DIR = ''
            self.train_datadir = '/kaggle/input/birdclef-2025/train_audio'
            self.train_csv = '/kaggle/input/birdclef-2025/train.csv'
            self.test_soundscapes = '/kaggle/input/birdclef-2025/test_soundscapes'
            self.submission_csv = '/kaggle/input/birdclef-2025/sample_submission.csv'
            self.taxonomy_csv = '/kaggle/input/birdclef-2025/taxonomy.csv'
            self.spectrogram_npy = '/kaggle/input/birdclef25-mel-spectrograms/birdclef2025_melspec_5sec_256_256.npy'
            
            # kaggle notebookならここを変更
            self.model_path = "/kaggle/input/birdclef-2025-baseline-fold0-0404"
            
            self.device = "cpu"
            self.batch_size = 8
            self.n_jobs = 3
            
        else:
            self.OUTPUT_DIR = '../data/result/'
            self.train_datadir = '../data/raw/train_audio/'
            self.train_csv = '../data/raw/train.csv'
            self.test_soundscapes = '../data/raw/test_soundscapes_small/'
            self.submission_csv = '../data/raw/sample_submission.csv'
            self.taxonomy_csv = '../data/raw/taxonomy.csv'
            self.spectrogram_npy = '../data/processed/mel-spec_0329/birdclef2025_melspec_5sec_256_256.npy'
            self.MODELS_DIR = "../models/"
            
            # ローカルならここを変更
            self.model_path =  "../models/fold0_cleaned_10sec0424_vino/"
            
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            self.batch_size = 32
            self.n_jobs = 3

        # ===== Model Settings =====
        self.model_name = 'efficientnet_b0'
        self.pretrained = True if mode == "train" else False
        self.in_channels = 1

        # ===== Audio Settings =====
        self.FS = 32000
        self.TARGET_SHAPE = (256, 256)
        self.N_FFT = 1024
        self.HOP_LENGTH = 64
        self.N_MELS = 148
        self.FMIN = 20
        self.FMAX = 16000
        
        self.seed = 42
        
        self.WINDOW_SIZE = 10 # 切り取る音声の秒数
        
        # smoothingの係数
        self.smooth_center_weight = 0.6
        self.smooth_neighbor_weight = 0.2
        
        # ===== Inference Mode =====
        if mode == "inference":
            self.use_tta = False
            self.tta_count = 3
            self.threshold = 0.5

            self.use_specific_folds = False
            self.folds = [0, 1, 2, 3, 4]  # Used only if use_specific_folds is True

            self.debug_count = 3

            
            
    def update_debug_settings(self):
        if self.debug:
            self.epochs = 2
            self.selected_folds = [0]

In [66]:
cfg = CFG(mode='inference', kaggle_notebook=False)

if cfg.KAGGLE_NOTEBOOK:
    !pip install -U openvino-telemetry  --no-index --find-links /kaggle/input/pip-hub
    !pip install -U openvino  --no-index --find-links /kaggle/input/pip-hub
    sys.path.append("/kaggle/input/birdclef-2025-libs/")
    
from openvino.runtime import Core
from module import models_lib, utils_lib, preprocess_lib, inference_lib

# Set seed
utils_lib.set_seed(cfg.seed)

In [67]:
print(f"Using device: {cfg.device}")
print(f"Loading taxonomy data...")
taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
species_ids = taxonomy_df['primary_label'].tolist()
num_classes = len(species_ids)
print(f"Number of classes: {num_classes}")

Using device: cuda
Loading taxonomy data...
Number of classes: 206


In [68]:
# mel変換
def process_audio_file(audio_path, cfg):
    """
    row_id = 5〜60（12個）に対する10秒ウィンドウの start/end を明示的に定義して処理。
    """
    dataset = []
    soundscape_id = Path(audio_path).stem
    audio_data, _ = librosa.load(audio_path, sr=cfg.FS)
    fs = cfg.FS

    # === 明示的なウィンドウ定義 ===　←頭いい
    segments = [
        (5, 0.0, 10.0),
        (10, 2.5, 12.5),
        (15, 7.5, 17.5),
        (20, 12.5, 22.5),
        (25, 17.5, 27.5),
        (30, 22.5, 32.5),
        (35, 27.5, 37.5),
        (40, 32.5, 42.5),
        (45, 37.5, 47.5),
        (50, 42.5, 52.5),
        (55, 47.5, 57.5),
        (60, 50.0, 60.0),
    ]

    for row_id_time, start_sec, end_sec in segments:
        start_idx = int(start_sec * fs)
        end_idx = int(end_sec * fs)
        segment = audio_data[start_idx:end_idx]

        mel_spec = preprocess_lib.process_audio_segment(segment, cfg)
        row_id = f"{soundscape_id}_{row_id_time}"
        dataset.append({
            "row_id": row_id,
            "mel_spec": mel_spec
        })

    return dataset





# 並列化してmelspecを生成
def generate_melspec_dataset(cfg):
    test_dir = Path(cfg.test_soundscapes)
    if not test_dir.exists():
        print(f"Test directory {test_dir} does not exist.")
        return []

    test_files = list(test_dir.glob('*.ogg'))
    if len(test_files) == 0:
        print("No test audio files found.")
        return []

    if cfg.debug:
        print(f"Debug mode enabled, using only {cfg.debug_count} files")
        test_files = test_files[:cfg.debug_count]

    results = Parallel(n_jobs=cfg.n_jobs)(
        delayed(process_audio_file)(path, cfg) for path in tqdm(test_files, desc="Parallel melspec gen")
    )
    dataset = [item for sublist in results for item in sublist]
    return dataset

In [69]:

# openvinoモデルの読み込み
def load_openvino_models(vino_dir, cfg):
    models = []
    vino_dir = Path(vino_dir)

    if cfg.use_specific_folds:
        fold_ids = cfg.folds
        xml_files = [vino_dir / f"model_fold{f}.xml" for f in fold_ids]
    else:
        xml_files = sorted(vino_dir.glob("model_fold*.xml"))

    for xml_path in xml_files:
        bin_path = xml_path.with_suffix(".bin")

        if not xml_path.exists() or not bin_path.exists():
            print(f"⚠️ Warning: Missing files for {xml_path.stem}")
            continue

        core = Core()
        model_ir = core.read_model(xml_path)
        compiled_model = core.compile_model(model_ir, device_name="CPU")
        models.append(compiled_model)

        # 🔍 モデルのファイル名（fold情報）をログに出す
        print(f"✅ Loaded model: {xml_path.name}")

    print(f"🎉 Total {len(models)} OpenVINO model(s) loaded from {vino_dir}")
    return models

# openvinoモデルによる推論
def run_inference_openvino(dataset, models_ir, cfg, species_ids):
    row_ids = []
    all_preds = []

    for i in range(0, len(dataset), cfg.batch_size):
        batch = dataset[i:i+cfg.batch_size]

        mel_list = [item["mel_spec"] for item in batch]
        input_tensor = np.stack(mel_list).astype(np.float32)  # (B, H, W)
        input_tensor = np.expand_dims(input_tensor, axis=1)  # (B, 1, H, W)

        preds_per_model = []
        for model in models_ir:
            input_layer = model.input(0)
            output_layer = model.output(0)
            result = model([input_tensor])[output_layer]
            probs = 1 / (1 + np.exp(-result))  # sigmoid
            preds_per_model.append(probs)

        avg_preds = np.mean(preds_per_model, axis=0)
        all_preds.append(avg_preds)
        row_ids.extend([item["row_id"] for item in batch])

    predictions = np.concatenate(all_preds, axis=0)
    return row_ids, predictions

In [70]:
# 予測値をsmoothingする．
def smooth_submission_df(submission_df, cfg):
    """
    Smooth predictions in a submission DataFrame and return the modified DataFrame.

    Each row's prediction is averaged with its neighbors using weights from cfg.

    :param submission_df: DataFrame containing submission data (row_id + prediction columns)
    :param cfg: Configuration object containing smoothing weights
    :return: Smoothed submission DataFrame
    """
    print("Smoothing submission predictions...")
    sub = submission_df.copy()
    cols = sub.columns[1:]
    groups = sub['row_id'].astype(str).str.rsplit('_', n=1).str[0].values
    unique_groups = np.unique(groups)

    for group in unique_groups:
        idx = np.where(groups == group)[0]
        sub_group = sub.iloc[idx].copy()
        predictions = sub_group[cols].values
        new_predictions = predictions.copy()

        if predictions.shape[0] > 1:
            w_c = cfg.smooth_center_weight
            w_n = cfg.smooth_neighbor_weight
            new_predictions[0] = predictions[0] * (1 - w_n) + predictions[1] * w_n
            new_predictions[-1] = predictions[-1] * (1 - w_n) + predictions[-2] * w_n
            for i in range(1, predictions.shape[0] - 1):
                new_predictions[i] = (
                    predictions[i - 1] * w_n +
                    predictions[i] * w_c +
                    predictions[i + 1] * w_n
                )
        sub.iloc[idx, 1:] = new_predictions

    return sub

In [71]:
print("Generating dataset...")
dataset = generate_melspec_dataset(cfg)

print("Loading OpenVINO models...")
vino_dir = Path(cfg.model_path).with_name(Path(cfg.model_path).name)
models_ir = load_openvino_models(vino_dir, cfg)

if not models_ir:
    raise RuntimeError("No OpenVINO models found.")

print("Running OpenVINO inference...")
if len(dataset) > 0:
    row_ids, predictions = run_inference_openvino(dataset, models_ir, cfg, species_ids)
else:
    print("No test data available, generating empty submission.")
    row_ids = []
    predictions = []

# smoothing前の予測値を保存
submission_df = utils_lib.create_submission(row_ids, predictions, species_ids, cfg)
submission_path = os.path.join(cfg.OUTPUT_DIR, 'submission_before_smoothing.csv')
submission_df.to_csv(submission_path, index=False)

print(f"Submission saved to {submission_path}")


Generating dataset...


Parallel melspec gen:   0%|          | 0/4 [00:00<?, ?it/s]

Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/fold0_cleaned_10sec0424_vino
Running OpenVINO inference...
Creating submission dataframe...
Submission saved to ../data/result/submission_before_smoothing.csv


In [72]:
# smoothingして再度保存．
smoothed_df = smooth_submission_df(submission_df, cfg)
smoothed_submission_path = os.path.join(cfg.OUTPUT_DIR, 'submission.csv')
smoothed_df.to_csv(smoothed_submission_path, index=False)
print(f"Smoothed submission saved to {smoothed_submission_path}")

Smoothing submission predictions...
Smoothed submission saved to ../data/result/submission.csv


In [73]:
# 提出用ファイルを読み込む
submission = pd.read_csv(os.path.join(cfg.OUTPUT_DIR, 'submission.csv'))
submission.head(12)

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230502_080500_5,7.796637e-05,1.722976e-05,0.0003932335,1.975739e-05,0.0001556526,8e-06,0.0008489688,5.652367e-05,1.949767e-05,...,0.006206,0.001608,0.004829,0.010371,0.000553,0.000285,0.001455,0.001459,0.005801,0.000114
1,H02_20230502_080500_10,2.258119e-05,4.547491e-06,0.0001067456,1.340135e-05,4.698053e-05,3e-06,0.0002761642,1.494612e-05,5.0341e-06,...,0.002881,0.000474,0.002629,0.003126,0.000584,9.7e-05,0.000691,0.0004,0.001742,3.6e-05
2,H02_20230502_080500_15,2.42531e-06,4.769964e-07,6.096673e-06,1.560526e-05,6.151394e-06,6e-06,2.394036e-05,1.075632e-06,2.508138e-07,...,0.006484,0.000434,0.001637,0.000843,0.000604,5.2e-05,0.002493,5.5e-05,0.000273,2.7e-05
3,H02_20230502_080500_20,3.181127e-06,9.177555e-07,6.9081e-06,2.857231e-05,6.564919e-06,1.7e-05,9.420888e-07,1.926924e-06,5.71409e-07,...,0.015233,0.0012,0.000865,0.000463,0.000872,0.000107,0.007069,9.3e-05,0.00057,6.9e-05
4,H02_20230502_080500_25,1.081958e-06,3.096293e-07,2.539968e-06,9.160517e-06,2.154536e-06,7e-06,2.706282e-07,6.623202e-07,2.038381e-07,...,0.005431,0.00086,0.000182,0.000203,0.000307,4.2e-05,0.003444,4e-05,0.00049,2.9e-05
5,H02_20230502_080500_30,9.707059e-08,2.855509e-08,6.656915e-07,8.951988e-08,2.534996e-07,1e-06,2.843061e-08,7.638785e-08,1.80193e-08,...,0.000591,0.000719,9e-06,0.000257,7.6e-05,5e-06,0.000609,1.5e-05,0.000149,3e-06
6,H02_20230502_080500_35,5.056424e-07,1.499641e-07,5.751105e-06,7.744919e-07,4.493389e-06,6e-06,6.737179e-08,5.015582e-07,1.535333e-07,...,0.000702,0.000747,7e-06,0.000486,0.000203,1e-05,0.001104,1.9e-05,6.2e-05,3e-06
7,H02_20230502_080500_40,1.292302e-06,3.874038e-07,1.438227e-05,2.10502e-06,1.260895e-05,1.3e-05,9.48235e-08,1.340858e-06,4.465842e-07,...,0.00029,0.000516,1.6e-05,0.000839,0.000388,3.8e-05,0.002176,1.6e-05,9.5e-05,7e-06
8,H02_20230502_080500_45,1.29219e-06,4.38368e-07,6.582141e-06,1.511583e-06,5.505532e-06,6e-06,1.372583e-07,1.236607e-06,4.033519e-07,...,0.000385,0.001003,0.00024,0.002477,0.000249,0.000131,0.002468,1.4e-05,0.002146,1.8e-05
9,H02_20230502_080500_50,1.363495e-06,4.833872e-07,3.764634e-06,1.091697e-06,1.545281e-06,2e-06,2.423513e-07,1.214006e-06,4.27783e-07,...,0.00117,0.00155,0.000632,0.002623,0.000218,0.000239,0.004848,2.5e-05,0.005907,2.9e-05


In [74]:
submission = pd.read_csv(os.path.join(cfg.OUTPUT_DIR, 'submission_before_smoothing.csv'))
submission.head()

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230502_080500_5,9.610241e-05,2.144495e-05,0.0004878571,2.144495e-05,0.0001911043,1.012999e-05,0.001032231,7.031184e-05,2.430024e-05,...,0.007346,0.001988,0.005555,0.012821,0.000519,0.000346,0.001701,0.00181,0.007121,0.00014
1,H02_20230502_080500_10,5.422219e-06,3.689885e-07,1.473898e-05,1.300713e-05,1.3846e-05,1.067703e-06,0.0001159193,1.370957e-06,2.873685e-07,...,0.001648,9e-05,0.001927,0.00057,0.000688,4e-05,0.000473,5.5e-05,0.000519,1.2e-05
2,H02_20230502_080500_15,5.36875e-07,1.855391e-07,1.65369e-06,6.540437e-06,2.260324e-06,4.181187e-07,8.31528e-07,3.059023e-07,8.152021e-09,...,0.002116,0.000109,0.00181,0.001099,0.000335,1.8e-05,0.000335,2.8e-05,2.9e-05,5e-06
3,H02_20230502_080500_20,5.093706e-06,1.459376e-06,1.078331e-05,4.539787e-05,1.012999e-05,2.753569e-05,1.287895e-06,3.089494e-06,9.422445e-07,...,0.024423,0.001755,0.00083,0.000346,0.001325,0.000169,0.010987,0.00014,0.000755,0.000109
4,H02_20230502_080500_25,8.764247e-08,2.510999e-08,5.36875e-07,1.27519e-07,1.742979e-07,1.873876e-06,1.522998e-08,6.023574e-08,2.215949e-08,...,0.000779,0.000626,2.6e-05,0.00018,4.8e-05,1.2e-05,0.002051,1.7e-05,0.000553,1.1e-05


In [75]:
submission

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230502_080500_5,9.610241e-05,2.144495e-05,0.0004878571,2.144495e-05,0.0001911043,1.012999e-05,0.001032231,7.031184e-05,2.430024e-05,...,0.007346,0.001988,0.005555,0.012821,0.000519,0.000346,0.001701,0.001810211,0.007121,0.0001398221
1,H02_20230502_080500_10,5.422219e-06,3.689885e-07,1.473898e-05,1.300713e-05,1.3846e-05,1.067703e-06,0.0001159193,1.370957e-06,2.873685e-07,...,0.001648,9e-05,0.001927,0.00057,0.000688,4e-05,0.000473,5.475977e-05,0.000519,1.221908e-05
2,H02_20230502_080500_15,5.36875e-07,1.855391e-07,1.65369e-06,6.540437e-06,2.260324e-06,4.181187e-07,8.31528e-07,3.059023e-07,8.152021e-09,...,0.002116,0.000109,0.00181,0.001099,0.000335,1.8e-05,0.000335,2.753569e-05,2.9e-05,4.785095e-06
3,H02_20230502_080500_20,5.093706e-06,1.459376e-06,1.078331e-05,4.539787e-05,1.012999e-05,2.753569e-05,1.287895e-06,3.089494e-06,9.422445e-07,...,0.024423,0.001755,0.00083,0.000346,0.001325,0.000169,0.010987,0.0001398221,0.000755,0.0001088969
4,H02_20230502_080500_25,8.764247e-08,2.510999e-08,5.36875e-07,1.27519e-07,1.742979e-07,1.873876e-06,1.522998e-08,6.023574e-08,2.215949e-08,...,0.000779,0.000626,2.6e-05,0.00018,4.8e-05,1.2e-05,0.002051,1.670142e-05,0.000553,1.147877e-05
5,H02_20230502_080500_30,5.315785e-08,1.344041e-08,3.059023e-07,2.215949e-08,1.19793e-07,4.181187e-07,1.955568e-08,4.139938e-08,1.04674e-08,...,0.000392,0.000667,4e-06,0.000131,6.6e-05,3e-06,8e-05,1.221908e-05,3.5e-05,6.475948e-07
6,H02_20230502_080500_35,2.382369e-07,7.734422e-08,1.873876e-06,2.536019e-07,7.33821e-07,3.966986e-06,6.825603e-08,1.975053e-07,3.653482e-08,...,0.001001,0.00097,8e-06,0.00071,0.000131,5e-06,0.000755,2.2828e-05,8.5e-05,2.726471e-06
7,H02_20230502_080500_40,1.760343e-06,5.043474e-07,2.2828e-05,3.089494e-06,2.014569e-05,1.892515e-05,1.125352e-07,1.873876e-06,6.475948e-07,...,0.000116,0.000158,5e-06,0.000169,0.000553,3.3e-05,0.003173,1.670142e-05,1.8e-05,5.77192e-06
8,H02_20230502_080500_45,9.422445e-07,3.466326e-07,1.553498e-06,1.003014e-06,1.873876e-06,3.089494e-06,6.825603e-08,8.851568e-07,2.536019e-07,...,0.000102,0.001134,5.5e-05,0.002981,0.000149,8.5e-05,0.000607,5.422219e-06,0.000335,1.3846e-05
9,H02_20230502_080500_50,1.873876e-06,6.475948e-07,5.422219e-06,1.459376e-06,1.760343e-06,1.873876e-06,3.689885e-07,1.65369e-06,6.08359e-07,...,0.001501,0.001455,0.001032,0.003273,0.000245,0.000368,0.007346,3.763645e-05,0.009708,4.264747e-05


In [76]:
print("✅ Shape:", submission.shape)
print("✅ Columns:", submission.columns.tolist())
print("✅ Dtypes:\n", submission.dtypes)
print("✅ Nulls:\n", submission.isna().sum().sum())

✅ Shape: (48, 207)
✅ Columns: ['row_id', '1139490', '1192948', '1194042', '126247', '1346504', '134933', '135045', '1462711', '1462737', '1564122', '21038', '21116', '21211', '22333', '22973', '22976', '24272', '24292', '24322', '41663', '41778', '41970', '42007', '42087', '42113', '46010', '47067', '476537', '476538', '48124', '50186', '517119', '523060', '528041', '52884', '548639', '555086', '555142', '566513', '64862', '65336', '65344', '65349', '65373', '65419', '65448', '65547', '65962', '66016', '66531', '66578', '66893', '67082', '67252', '714022', '715170', '787625', '81930', '868458', '963335', 'amakin1', 'amekes', 'ampkin1', 'anhing', 'babwar', 'bafibi1', 'banana', 'baymac', 'bbwduc', 'bicwre1', 'bkcdon', 'bkmtou1', 'blbgra1', 'blbwre1', 'blcant4', 'blchaw1', 'blcjay1', 'blctit1', 'blhpar1', 'blkvul', 'bobfly1', 'bobher1', 'brtpar1', 'bubcur1', 'bubwre1', 'bucmot3', 'bugtan', 'butsal1', 'cargra1', 'cattyr', 'chbant1', 'chfmac1', 'cinbec1', 'cocher1', 'cocwoo1', 'colara1', 'c

In [77]:
if cfg.test_soundscapes != '/kaggle/input/birdclef-2025/test_soundscapes':
    raise RuntimeError("Test directory is not set correctly.")

RuntimeError: Test directory is not set correctly.