In [22]:
import os
import logging
from pathlib import Path

import numpy as np
import pandas as pd
import librosa
import torch
from tqdm.auto import tqdm
import sys
from joblib import Parallel, delayed
logging.basicConfig(level=logging.ERROR)
import hashlib
import json
import pickle
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [23]:
# 
class CFG:
    def __init__(self, mode="train", kaggle_notebook=False, debug=False):
        assert mode in ["train", "inference"], "mode must be 'train' or 'inference'"
        self.mode = mode
        self.KAGGLE_NOTEBOOK = kaggle_notebook
        self.debug = debug

        # ===== Path Settings =====
        if self.KAGGLE_NOTEBOOK:
            self.OUTPUT_DIR = ''
            self.train_datadir = '/kaggle/input/birdclef-2025/train_audio'
            self.train_csv = '/kaggle/input/birdclef-2025/train.csv'
            self.test_soundscapes = '/kaggle/input/birdclef-2025/test_soundscapes'
            self.submission_csv = '/kaggle/input/birdclef-2025/sample_submission.csv'
            self.taxonomy_csv = '/kaggle/input/birdclef-2025/taxonomy.csv'
            self.spectrogram_npy = '/kaggle/input/birdclef25-mel-spectrograms/birdclef2025_melspec_5sec_256_256.npy'
            
            # kaggle notebookならここを変更
            self.model_path = "/kaggle/input/birdclef-2025-baseline-fold0-0404"
            
            self.device = "cpu"
            self.batch_size = 8
            self.n_jobs = 3
            
        else:
            self.OUTPUT_DIR = '../data/result/'
            self.train_datadir = '../data/raw/train_audio/'
            self.train_csv = '../data/raw/train.csv'
            self.test_soundscapes = '../data/raw/test_soundscapes_small'
            self.submission_csv = '../data/raw/sample_submission.csv'
            self.taxonomy_csv = '../data/raw/taxonomy.csv'
            self.spectrogram_npy = None
            self.MODELS_DIR = "../models/"
            
            # ローカルならここを変更
            self.model_path =  None
            
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            self.batch_size = 32
            self.n_jobs = 32

        # ===== Model Settings =====
        self.model_name = 'efficientnet_b0'
        self.pretrained = True if mode == "train" else False
        self.in_channels = 1

        # ===== Audio Settings =====
        self.FS = 32000
        self.WINDOW_SIZE = 5
        self.TARGET_DURATION = 5
        self.TARGET_SHAPE = (256, 256)
        self.N_FFT = 1024
        self.HOP_LENGTH = None # 下で指定する．
        self.N_MELS = 148
        self.FMIN = 20
        self.FMAX = 16000
        
        self.seed = 42
        
        
        # ===== Inference Mode =====
        if mode == "inference":
            self.use_tta = False
            self.tta_count = 3
            self.threshold = 0.5

            self.use_specific_folds = False
            self.folds = [0, 1, 2, 3, 4]  # Used only if use_specific_folds is True

            self.debug_count = 3
            self.ensemble_strategy = "mean" # "mean", "max", "min", "median" など
            
            
            
    def update_debug_settings(self):
        if self.debug:
            self.epochs = 2
            self.selected_folds = [0]
            


In [24]:
MODE = "inference"  
KAGGLE_NOTEBOOK = False


cfg = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)

if cfg.KAGGLE_NOTEBOOK:
    !pip install -U openvino-telemetry  --no-index --find-links /kaggle/input/pip-hub
    !pip install -U openvino  --no-index --find-links /kaggle/input/pip-hub
    sys.path.append("/kaggle/input/birdclef-2025-libs/")
    
from openvino.runtime import Core
from module import utils_lib, preprocess_lib

# Set seed
utils_lib.set_seed(cfg.seed)

In [25]:


def load_all_configs():
    cfg_list = []

    # sfzn1 hd hl512 psdMxp, 0.850
    cfg1 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg1.model_path = "../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl512-psd"
    cfg1.HOP_LENGTH = 512
    cfg1.spectrogram_npy = "../data/processed/mel_trn_sdscps_hl512/mel_train_soundscapes.npy"
    cfg_list.append(cfg1)

    # sfzn1 hd hl16 psdMxp, 0.849
    cfg2 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg2.HOP_LENGTH = 64
    cfg2.model_path = "../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl16-psdmxp"
    cfg2.spectrogram_npy = "../data/processed/mel_trn_sdscps_hl64/mel_train_soundscapes.npy"
    cfg_list.append(cfg2)
    
    # maxRMS psdMxp 0.843
    cfg3 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg3.model_path = "../models/ensmbl_0527/model-maxrms-0527-newjfcide/model_maxRMS_vino"
    cfg3.spectrogram_npy = "../data/processed/mel_trn_sdscps_hl64/mel_train_soundscapes.npy"
    cfg3.HOP_LENGTH = 64
    cfg_list.append(cfg3)
    
    # maxDB psdMxp 0.838
    cfg4 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg4.model_path = "../models/ensmbl_0527/model-maxdb-0527-newjfcide/model_maxdb_vino"
    cfg4.spectrogram_npy = "../data/processed/mel_trn_sdscps_hl64/mel_train_soundscapes.npy"
    cfg4.HOP_LENGTH = 64
    cfg_list.append(cfg4)
    
    # smart psdMxp 0.842
    cfg5 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg5.model_path = "../models/ensmbl_0527/model-sfzn1000-smart-pseudo-alpha0-2"
    cfg5.spectrogram_npy = "../data/processed/mel_trn_sdscps_hl64/mel_train_soundscapes.npy"
    cfg5.HOP_LENGTH = 64
    cfg_list.append(cfg5)
    
    return cfg_list

def load_melspec_dataset(cfg):
    if not os.path.exists(cfg.spectrogram_npy):
        raise FileNotFoundError(f"Mel-spectrogram file not found: {cfg.spectrogram_npy}")
    
    with open(cfg.spectrogram_npy, "rb") as f:
        dataset = pickle.load(f)
    return dataset

In [26]:
print(f"Using device: {cfg.device}")
print(f"Loading taxonomy data...")
taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
species_ids = taxonomy_df['primary_label'].tolist()
num_classes = len(species_ids)
print(f"Number of classes: {num_classes}")

Using device: cuda
Loading taxonomy data...
Number of classes: 206


In [27]:

# openvinoモデルの読み込み
def load_openvino_models(vino_dir, cfg):
    models = []
    vino_dir = Path(vino_dir)

    if cfg.use_specific_folds:
        fold_ids = cfg.folds
        xml_files = [vino_dir / f"model_fold{f}.xml" for f in fold_ids]
    else:
        xml_files = sorted(vino_dir.glob("model_fold*.xml"))

    for xml_path in xml_files:
        bin_path = xml_path.with_suffix(".bin")

        if not xml_path.exists() or not bin_path.exists():
            print(f"⚠️ Warning: Missing files for {xml_path.stem}")
            continue

        core = Core()
        model_ir = core.read_model(xml_path)
        compiled_model = core.compile_model(model_ir, device_name="CPU")
        models.append(compiled_model)

        # 🔍 モデルのファイル名（fold情報）をログに出す
        print(f"✅ Loaded model: {xml_path.name}")

    print(f"🎉 Total {len(models)} OpenVINO model(s) loaded from {vino_dir}")
    return models

# openvinoモデルによる推論
def run_inference_openvino(dataset, models_ir, cfg, species_ids):
    row_ids = []
    all_preds = []

    for i in range(0, len(dataset), cfg.batch_size):
        batch = dataset[i:i+cfg.batch_size]

        mel_list = [item["mel_spec"] for item in batch]
        input_tensor = np.stack(mel_list).astype(np.float32)  # (B, H, W)
        input_tensor = np.expand_dims(input_tensor, axis=1)  # (B, 1, H, W)

        preds_per_model = []
        for model in models_ir:
            input_layer = model.input(0)
            output_layer = model.output(0)
            result = model([input_tensor])[output_layer]
            probs = 1 / (1 + np.exp(-result))  # sigmoid
            preds_per_model.append(probs)

        # アンサンブル戦略の選択
        if cfg.ensemble_strategy == "mean":
            avg_preds = np.mean(preds_per_model, axis=0)
        elif cfg.ensemble_strategy == "max":
            avg_preds = np.max(preds_per_model, axis=0)
        elif cfg.ensemble_strategy == "min":
            avg_preds = np.min(preds_per_model, axis=0)
        elif cfg.ensemble_strategy == "median":
            avg_preds = np.median(preds_per_model, axis=0)
        else:
            raise ValueError(f"Unknown ensemble strategy: {cfg.ensemble_strategy}")

        all_preds.append(avg_preds)
        row_ids.extend([item["row_id"] for item in batch])

    predictions = np.concatenate(all_preds, axis=0)
    return row_ids, predictions

In [28]:
def run_inference(cfg, species_ids, dataset):

    print("Loading OpenVINO models...")
    vino_dir = Path(cfg.model_path).with_name(Path(cfg.model_path).name)
    models_ir = load_openvino_models(vino_dir, cfg)

    if not models_ir:
        raise RuntimeError("No OpenVINO models found.")

    print("Running OpenVINO inference...")
    if len(dataset) > 0:
        row_ids, predictions = run_inference_openvino(dataset, models_ir, cfg, species_ids)
    else:
        print("No test data available, generating empty submission.")
        row_ids = []
        predictions = []

    # smoothing前の予測値を保存
    submission_df = utils_lib.create_submission(row_ids, predictions, species_ids, cfg)

    return submission_df

In [29]:
# 予測値のdf_listを受け取ってアンサンブル
def ensemble_submissions_dfs(dfs, method="mean"):
    """
    複数の submission DataFrame をアンサンブル

    Parameters:
        dfs (List[pd.DataFrame]): 各 submission.csv を読み込んだ DataFrame のリスト
        method (str): アンサンブル戦略（mean, max, median）

    Returns:
        pd.DataFrame: アンサンブル後の submission
    """
    assert all(df.columns[0] == "row_id" for df in dfs), "All DataFrames must start with 'row_id' column"
    row_ids = dfs[0]['row_id'].values
    preds = np.stack([df.iloc[:, 1:].values for df in dfs], axis=0)  # (n_models, n_rows, n_classes)

    if method == "mean":
        combined = np.mean(preds, axis=0)
    elif method == "max":
        combined = np.max(preds, axis=0)
    elif method == "median":
        combined = np.median(preds, axis=0)
    else:
        raise ValueError(f"Unsupported ensemble method: {method}")

    result_df = pd.DataFrame(combined, columns=dfs[0].columns[1:])
    result_df.insert(0, "row_id", row_ids)
    return result_df


def smooth_submission_df(submission_df, cfg, weights=None):
    """
    Smooth predictions using weighted moving average over a 5-frame window: [-2, -1, 0, +1, +2],
    then blend with per-class global average within each soundscape segment group.

    Parameters:
        submission_df: pd.DataFrame with 'row_id' and prediction columns.
        cfg: config object (interface compatibility).
        weights: List of 5 floats (default = [0.1, 0.2, 0.4, 0.2, 0.1]).

    Returns:
        pd.DataFrame with smoothed predictions.
    """
    print("Smoothing submission predictions with global average blend...")

    if weights is None:
        weights = np.array([0.1, 0.2, 0.4, 0.2, 0.1])
    else:
        weights = np.array(weights)

    sub = submission_df.copy()
    cols = sub.columns[1:]
    groups = sub['row_id'].astype(str).str.rsplit('_', n=1).str[0].values
    unique_groups = np.unique(groups)

    for group in unique_groups:
        idx = np.where(groups == group)[0]
        preds = sub.iloc[idx][cols].values  # (T, C)
        T, C = preds.shape

        # エッジ処理：端を繰り返すようにパディング
        padded = np.pad(preds, ((2, 2), (0, 0)), mode='edge')  # (T+4, C)

        # 平滑化：5点加重平均（[-2, -1, 0, +1, +2]）
        smoothed = (
            padded[0:T]   * weights[0] +
            padded[1:T+1] * weights[1] +
            padded[2:T+2] * weights[2] +
            padded[3:T+3] * weights[3] +
            padded[4:T+4] * weights[4]
        )

        # 各クラスの平均予測を20%混ぜる（全セグメントに対して一様に加える）
        classwise_mean = smoothed.mean(axis=0, keepdims=True)  # shape: (1, C)
        smoothed = smoothed * 0.8 + classwise_mean * 0.2

        sub.iloc[idx, 1:] = smoothed

    return sub

In [30]:
cfg_list = load_all_configs()

inference_dfs = []
for cfg in cfg_list:
    print(f"🔵 Loading mel and running inference for model: {cfg.model_path}")

    # メル読み込み（逐次）
    dataset = load_melspec_dataset(cfg)

    # 推論
    df = run_inference(cfg, species_ids, dataset)

    # 推論結果を保持（必要であればCSV保存も可）
    inference_dfs.append(df)


🔵 Loading mel and running inference for model: ../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl512-psd
Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl512-psd
Running OpenVINO inference...
Creating submission dataframe...
🔵 Loading mel and running inference for model: ../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl16-psdmxp
Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl16-psdmxp
Running OpenVINO inference...
Creating submission dataframe...
🔵 Loading mel and running inference for model: ../models/ensmbl_0527/model-maxrms-0527-newjfcide/model_maxRMS_vino
Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/ensmbl_0527/model-maxrms-0527-newjfcide/model_maxRMS_vino
Running OpenVINO inference...
Creating submission dataframe...
🔵

In [31]:


# アンサンブル
ensemble_df = ensemble_submissions_dfs(inference_dfs, method="mean")
ensemble_df.to_csv(os.path.join("../data/processed", 'submission_before_smoothing.csv'), index=False)
print("Saved ensembled (before smoothing) submission.")

# スムージング
smoothed_df = smooth_submission_df(ensemble_df, cfg_list[0])
smoothed_df.to_csv(os.path.join("../data/processed", 'pseudo_labels_0528.csv'), index=False)
print("Saved smoothed final submission.")

Saved ensembled (before smoothing) submission.
Smoothing submission predictions with global average blend...
Saved smoothed final submission.


In [32]:
smoothed_df

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230420_074000_5,0.000060,0.000039,0.000016,0.000071,0.000040,0.000046,0.000269,0.000027,0.000710,...,0.000092,0.000123,0.001932,0.000033,0.000106,0.000151,0.000020,0.000696,0.000026,0.000041
1,H02_20230420_074000_10,0.000057,0.000040,0.000018,0.000056,0.000041,0.000049,0.000282,0.000027,0.000668,...,0.000095,0.000117,0.001101,0.000033,0.000092,0.000127,0.000022,0.000681,0.000027,0.000031
2,H02_20230420_074000_15,0.000056,0.000043,0.000022,0.000052,0.000044,0.000055,0.000317,0.000032,0.000594,...,0.000122,0.000148,0.000845,0.000032,0.000107,0.000121,0.000021,0.000813,0.000032,0.000024
3,H02_20230420_074000_20,0.000042,0.000032,0.000015,0.000035,0.000031,0.000042,0.000211,0.000023,0.000480,...,0.000101,0.000098,0.000441,0.000026,0.000060,0.000089,0.000017,0.000590,0.000027,0.000017
4,H02_20230420_074000_25,0.000036,0.000024,0.000010,0.000031,0.000024,0.000036,0.000154,0.000018,0.000434,...,0.000086,0.000077,0.000418,0.000024,0.000042,0.000083,0.000015,0.000522,0.000026,0.000016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116707,O203_20230526_023000_40,0.000148,0.000108,0.000049,0.000027,0.000109,0.000035,0.000055,0.000055,0.000556,...,0.000016,0.000039,0.000022,0.000047,0.000002,0.000022,0.000002,0.000238,0.000036,0.000025
116708,O203_20230526_023000_45,0.000174,0.000114,0.000055,0.000030,0.000129,0.000038,0.000052,0.000061,0.000617,...,0.000018,0.000039,0.000023,0.000046,0.000003,0.000023,0.000002,0.000293,0.000035,0.000023
116709,O203_20230526_023000_50,0.000142,0.000093,0.000063,0.000026,0.000122,0.000035,0.000054,0.000056,0.000511,...,0.000020,0.000037,0.000031,0.000053,0.000003,0.000020,0.000002,0.000267,0.000030,0.000018
116710,O203_20230526_023000_55,0.000102,0.000080,0.000092,0.000030,0.000126,0.000037,0.000067,0.000054,0.000352,...,0.000028,0.000045,0.000050,0.000072,0.000003,0.000021,0.000002,0.000297,0.000029,0.000016
