In [2]:
import os
import logging
from pathlib import Path

import numpy as np
import pandas as pd
import librosa
import torch
import torch.nn.functional as F
from tqdm.auto import tqdm
import sys
from joblib import Parallel, delayed
logging.basicConfig(level=logging.ERROR)


import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [13]:
class CFG:
    def __init__(self, mode="train", kaggle_notebook=False, debug=False):
        assert mode in ["train", "inference"], "mode must be 'train' or 'inference'"
        self.mode = mode
        self.KAGGLE_NOTEBOOK = kaggle_notebook
        self.debug = debug

        # ===== Path Settings =====
        if self.KAGGLE_NOTEBOOK:
            self.OUTPUT_DIR = ''
            self.train_datadir = '/kaggle/input/birdclef-2025/train_audio'
            self.train_csv = '/kaggle/input/birdclef-2025/train.csv'
            self.test_soundscapes = '/kaggle/input/birdclef-2025/test_soundscapes'
            self.submission_csv = '/kaggle/input/birdclef-2025/sample_submission.csv'
            self.taxonomy_csv = '/kaggle/input/birdclef-2025/taxonomy.csv'
            self.spectrogram_npy = '/kaggle/input/birdclef25-mel-spectrograms/birdclef2025_melspec_5sec_256_256.npy'
            self.model_path = "/kaggle/input/bc25-models-fold0-mel-0411"
            
            self.device = "cpu"
            self.batch_size = 8
            self.n_jobs = 3
            
        else:
            self.OUTPUT_DIR = '../data/result/'
            self.train_datadir = '../data/raw/train_audio/'
            self.train_csv = '../data/raw/train.csv'
            self.test_soundscapes = '../data/raw/test_soundscapes/'
            self.submission_csv = '../data/raw/sample_submission.csv'
            self.taxonomy_csv = '../data/raw/taxonomy.csv'
            self.spectrogram_npy = '../data/processed/mel-spec_0329/birdclef2025_melspec_5sec_256_256.npy'
            self.MODELS_DIR = "../models/"
            self.model_path =  "../models/fold0_cleaned_contrast02_vino/"
            
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            self.batch_size = 32
            self.n_jobs = 3

        # ===== Model Settings =====
        self.model_name = 'efficientnet_b0'
        self.pretrained = True if mode == "train" else False
        self.in_channels = 1

        # ===== Audio Settings =====
        self.FS = 32000
        self.WINDOW_SIZE = 5
        self.TARGET_DURATION = 5
        self.TARGET_SHAPE = (256, 256)
        self.N_FFT = 1024
        self.HOP_LENGTH = 64
        self.N_MELS = 148
        self.FMIN = 20
        self.FMAX = 16000
        
        self.seed = 42
        


        # ===== Inference Mode =====
        if mode == "inference":
            self.use_tta = False
            self.tta_count = 3
            self.threshold = 0.5

            self.use_specific_folds = False
            self.folds = [0, 1, 2, 3, 4]  # Used only if use_specific_folds is True

            self.debug_count = 3
            self.is_contrast = True
            self.contrast_factor = 0.2
            
    def update_debug_settings(self):
        if self.debug:
            self.epochs = 2
            self.selected_folds = [0]

In [14]:
"TODO: configを2つにわけるべきかも．柔軟に変える方はnotebook側で，固定したい方はmodule側とか"
"TODO: Debugモードになっていたらsubmissionでエラーになる"
"TODO: 疑似ラベル作成のためなら，事前にデータセットを準備するべき"
cfg = CFG(mode='inference', kaggle_notebook=False)

if cfg.KAGGLE_NOTEBOOK:
    !pip install -U openvino-telemetry  --no-index --find-links /kaggle/input/pip-hub
    !pip install -U openvino  --no-index --find-links /kaggle/input/pip-hub
    sys.path.append("/kaggle/input/birdclef-2025-libs/")
    
from openvino.runtime import Core
from module import models_lib, utils_lib, preprocess_lib, inference_lib

# Set seed
utils_lib.set_seed(cfg.seed)

In [15]:
print(f"Using device: {cfg.device}")
print(f"Loading taxonomy data...")
taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
species_ids = taxonomy_df['primary_label'].tolist()
num_classes = len(species_ids)
print(f"Number of classes: {num_classes}")

Using device: cuda
Loading taxonomy data...
Number of classes: 206


In [16]:
def process_audio_file(audio_path, cfg):
    """1ファイル分のmelspecデータを返す（row_id, melspecのリスト）"""
    dataset = []
    soundscape_id = Path(audio_path).stem
    try:
        audio_data, _ = librosa.load(audio_path, sr=cfg.FS)
        total_segments = int(len(audio_data) / (cfg.FS * cfg.WINDOW_SIZE))

        for segment_idx in range(total_segments):
            start = int(segment_idx * cfg.FS * cfg.WINDOW_SIZE)
            end = int(start + cfg.FS * cfg.WINDOW_SIZE)
            segment_audio = audio_data[start:end]

            mel_spec = preprocess_lib.process_audio_segment(segment_audio, cfg)
            row_id = f"{soundscape_id}_{(segment_idx + 1) * cfg.WINDOW_SIZE}"

            dataset.append({
                "row_id": row_id,
                "mel_spec": mel_spec
            })
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
    return dataset

def generate_melspec_dataset(cfg):
    test_dir = Path(cfg.test_soundscapes)
    if not test_dir.exists():
        print(f"Test directory {test_dir} does not exist.")
        return []

    test_files = list(test_dir.glob('*.ogg'))
    if len(test_files) == 0:
        print("No test audio files found.")
        return []

    if cfg.debug:
        print(f"Debug mode enabled, using only {cfg.debug_count} files")
        test_files = test_files[:cfg.debug_count]

    results = Parallel(n_jobs=cfg.n_jobs)(
        delayed(process_audio_file)(path, cfg) for path in tqdm(test_files, desc="Parallel melspec gen")
    )
    dataset = [item for sublist in results for item in sublist]
    return dataset


def apply_contrast_to_dataset(dataset, factor=0.5):
    """
    dataset（list of dicts）の 'mel_spec' にコントラストを適用し、新しいリストを返す。
    dataset が空でも安全に処理。

    :param dataset: [{"row_id": ..., "mel_spec": ...}, ...]
    :param factor: コントラスト強度（例: 0.5）
    :return: 新しい dataset（コントラスト強調済み）または空リスト
    """
    if not dataset:
        print("⚠️ apply_contrast_to_dataset: dataset is empty. Returning empty list.")
        return []
    print(f"Applying contrast with factor {factor} to dataset of length {len(dataset)}")
    def enhance(spec, factor=0.5):
        mean = np.mean(spec)
        enhanced = mean + (spec - mean) * (1 + factor)
        return np.clip(enhanced, 0, 1)

    new_dataset = []
    for item in dataset:
        new_dataset.append({
            "row_id": item["row_id"],
            "mel_spec": enhance(item["mel_spec"], factor)
        })

    return new_dataset

In [17]:
def load_openvino_models(vino_dir, cfg):
    models = []
    vino_dir = Path(vino_dir)

    if cfg.use_specific_folds:
        fold_ids = cfg.folds
        xml_files = [vino_dir / f"model_fold{f}.xml" for f in fold_ids]
    else:
        xml_files = sorted(vino_dir.glob("model_fold*.xml"))

    for xml_path in xml_files:
        bin_path = xml_path.with_suffix(".bin")

        if not xml_path.exists() or not bin_path.exists():
            print(f"⚠️ Warning: Missing files for {xml_path.stem}")
            continue

        core = Core()
        model_ir = core.read_model(xml_path)
        compiled_model = core.compile_model(model_ir, device_name="CPU")
        models.append(compiled_model)

        # 🔍 モデルのファイル名（fold情報）をログに出す
        print(f"✅ Loaded model: {xml_path.name}")

    print(f"🎉 Total {len(models)} OpenVINO model(s) loaded from {vino_dir}")
    return models

# === NEW ===
def run_inference_openvino(dataset, models_ir, cfg, species_ids):
    row_ids = []
    all_preds = []

    for i in range(0, len(dataset), cfg.batch_size):
        batch = dataset[i:i+cfg.batch_size]

        mel_list = [item["mel_spec"] for item in batch]
        input_tensor = np.stack(mel_list).astype(np.float32)  # (B, H, W)
        input_tensor = np.expand_dims(input_tensor, axis=1)  # (B, 1, H, W)

        preds_per_model = []
        for model in models_ir:
            input_layer = model.input(0)
            output_layer = model.output(0)
            result = model([input_tensor])[output_layer]
            probs = 1 / (1 + np.exp(-result))  # sigmoid
            preds_per_model.append(probs)

        avg_preds = np.mean(preds_per_model, axis=0)
        all_preds.append(avg_preds)
        row_ids.extend([item["row_id"] for item in batch])

    predictions = np.concatenate(all_preds, axis=0)
    return row_ids, predictions

In [None]:
print("Generating dataset...")
dataset = generate_melspec_dataset(cfg)

if cfg.is_contrast:
    dataset = apply_contrast_to_dataset(dataset, factor=cfg.contrast_factor)
else:
    print("No contrast applied to dataset.")

print("Loading OpenVINO models...")
vino_dir = Path(cfg.model_path).with_name(Path(cfg.model_path).name)
models_ir = load_openvino_models(vino_dir, cfg)

if not models_ir:
    raise RuntimeError("No OpenVINO models found.")

print("Running OpenVINO inference...")
if len(dataset) > 0:
    row_ids, predictions = run_inference_openvino(dataset, models_ir, cfg, species_ids)
else:
    print("No test data available, generating empty submission.")
    row_ids = []
    predictions = []

submission_df = utils_lib.create_submission(row_ids, predictions, species_ids, cfg)
submission_path = os.path.join(cfg.OUTPUT_DIR, 'submission.csv')
submission_df.to_csv(submission_path, index=False)

print(f"Submission saved to {submission_path}")


Generating dataset...


  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)
  self.comm = Comm(**args)


Parallel melspec gen:   0%|          | 0/71 [00:00<?, ?it/s]

Applying contrast to dataset...
Applying contrast with factor 0.2 to dataset of length 852
Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/fold0_cleaned_contrast02_vino
Running OpenVINO inference...
Creating submission dataframe...
Submission saved to ../data/result/submission.csv


In [20]:
# 提出用ファイルを読み込む
submission = pd.read_csv(os.path.join(cfg.OUTPUT_DIR, 'submission.csv'))
submission.head()

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230420_074000_5,0.001367,0.000315,0.000646,0.000203,0.000804,0.006904,0.001648,0.001455,0.005911,...,0.007577,0.003707,0.001988,0.002051,0.000191,0.001927,0.00097,0.00629,0.001501,0.001501
1,H02_20230420_074000_10,0.001598,0.000473,0.000646,0.000278,0.001455,0.004905,0.001065,0.00141,0.00506,...,0.004199,0.002397,0.001367,0.001367,0.00018,0.004468,0.000488,0.004468,0.000688,0.000607
2,H02_20230420_074000_15,0.00141,0.000667,0.00083,0.001755,0.002473,0.003273,0.000883,0.001245,0.001284,...,0.005555,0.002183,0.00407,0.000296,0.00094,0.002473,0.001988,0.002632,0.000883,0.000536
3,H02_20230420_074000_20,0.001701,0.000646,0.000626,0.000346,0.001455,0.003594,0.001245,0.001207,0.00522,...,0.002051,0.004199,0.00094,0.00057,0.000278,0.001988,0.000357,0.003273,0.000444,0.000368
4,H02_20230420_074000_25,0.00097,0.000217,0.000417,0.000346,0.001367,0.003594,0.000588,0.000856,0.002183,...,0.001549,0.000911,0.00117,0.000431,8.5e-05,0.001284,0.000335,0.002473,0.000368,0.000261


In [21]:
submission

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230420_074000_5,1.367026e-03,3.150387e-04,6.462032e-04,2.034270e-04,8.040859e-04,6.903838e-03,1.648483e-03,1.455063e-03,5.911068e-03,...,0.007577,0.003707,0.001988,0.002051,0.000191,0.001927,0.000970,0.006290,0.001501,0.001501
1,H02_20230420_074000_10,1.597846e-03,4.728544e-04,6.462032e-04,2.780310e-04,1.455063e-03,4.905406e-03,1.064963e-03,1.410358e-03,5.060331e-03,...,0.004199,0.002397,0.001367,0.001367,0.000180,0.004468,0.000488,0.004468,0.000688,0.000607
2,H02_20230420_074000_15,1.410358e-03,6.667023e-04,8.295891e-04,1.754614e-03,2.472623e-03,3.273065e-03,8.830458e-04,1.244843e-03,1.284308e-03,...,0.005555,0.002183,0.004070,0.000296,0.000940,0.002473,0.001988,0.002632,0.000883,0.000536
3,H02_20230420_074000_20,1.700722e-03,6.462032e-04,6.263342e-04,3.459916e-04,1.455063e-03,3.593603e-03,1.244843e-03,1.206590e-03,5.220126e-03,...,0.002051,0.004199,0.000940,0.000570,0.000278,0.001988,0.000357,0.003273,0.000444,0.000368
4,H02_20230420_074000_25,9.697520e-04,2.165441e-04,4.173158e-04,3.459916e-04,1.367026e-03,3.593603e-03,5.884088e-04,8.559006e-04,2.182717e-03,...,0.001549,0.000911,0.001170,0.000431,0.000085,0.001284,0.000335,0.002473,0.000368,0.000261
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
847,H02_20230502_080500_40,1.553498e-06,1.067703e-06,1.553498e-06,2.726471e-06,1.136564e-06,5.093706e-06,1.003014e-06,2.726471e-06,6.083590e-07,...,0.008062,0.000051,0.000140,0.004610,0.000804,0.000045,0.000231,0.000109,0.000090,0.000058
848,H02_20230502_080500_45,7.031184e-05,1.670142e-05,5.829126e-05,2.586743e-05,5.144222e-05,1.488385e-04,1.398221e-04,5.829126e-05,3.535625e-05,...,0.003273,0.003594,0.003594,0.004610,0.002890,0.004199,0.000688,0.001170,0.012821,0.000856
849,H02_20230502_080500_50,1.300713e-05,6.144175e-06,1.568955e-05,1.209866e-06,9.516251e-06,7.484623e-05,3.763645e-05,1.670142e-05,1.221908e-05,...,0.055823,0.000315,0.000245,0.000856,0.000169,0.000261,0.000392,0.001170,0.004070,0.000116
850,H02_20230502_080500_55,1.742979e-07,3.059023e-07,1.357433e-07,2.873685e-07,1.357433e-07,8.851568e-07,2.238029e-07,2.536019e-07,5.315785e-08,...,0.001325,0.000028,0.000008,0.000245,0.000732,0.000016,0.001549,0.000015,0.000075,0.000009


In [22]:
print("✅ Shape:", submission.shape)
print("✅ Columns:", submission.columns.tolist())
print("✅ Dtypes:\n", submission.dtypes)
print("✅ Nulls:\n", submission.isna().sum().sum())

✅ Shape: (852, 207)
✅ Columns: ['row_id', '1139490', '1192948', '1194042', '126247', '1346504', '134933', '135045', '1462711', '1462737', '1564122', '21038', '21116', '21211', '22333', '22973', '22976', '24272', '24292', '24322', '41663', '41778', '41970', '42007', '42087', '42113', '46010', '47067', '476537', '476538', '48124', '50186', '517119', '523060', '528041', '52884', '548639', '555086', '555142', '566513', '64862', '65336', '65344', '65349', '65373', '65419', '65448', '65547', '65962', '66016', '66531', '66578', '66893', '67082', '67252', '714022', '715170', '787625', '81930', '868458', '963335', 'amakin1', 'amekes', 'ampkin1', 'anhing', 'babwar', 'bafibi1', 'banana', 'baymac', 'bbwduc', 'bicwre1', 'bkcdon', 'bkmtou1', 'blbgra1', 'blbwre1', 'blcant4', 'blchaw1', 'blcjay1', 'blctit1', 'blhpar1', 'blkvul', 'bobfly1', 'bobher1', 'brtpar1', 'bubcur1', 'bubwre1', 'bucmot3', 'bugtan', 'butsal1', 'cargra1', 'cattyr', 'chbant1', 'chfmac1', 'cinbec1', 'cocher1', 'cocwoo1', 'colara1', '