In [25]:
import os
import logging
from pathlib import Path

import numpy as np
import pandas as pd
import librosa
import torch
import torch.nn.functional as F
from tqdm.auto import tqdm
import sys
from joblib import Parallel, delayed
logging.basicConfig(level=logging.ERROR)


import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [26]:
# 
class CFG:
    def __init__(self, mode="train", kaggle_notebook=False, debug=False):
        assert mode in ["train", "inference"], "mode must be 'train' or 'inference'"
        self.mode = mode
        self.KAGGLE_NOTEBOOK = kaggle_notebook
        self.debug = debug

        # ===== Path Settings =====
        if self.KAGGLE_NOTEBOOK:
            self.OUTPUT_DIR = ''
            self.train_datadir = '/kaggle/input/birdclef-2025/train_audio'
            self.train_csv = '/kaggle/input/birdclef-2025/train.csv'
            self.test_soundscapes = '/kaggle/input/birdclef-2025/test_soundscapes'
            self.submission_csv = '/kaggle/input/birdclef-2025/sample_submission.csv'
            self.taxonomy_csv = '/kaggle/input/birdclef-2025/taxonomy.csv'
            self.spectrogram_npy = '/kaggle/input/birdclef25-mel-spectrograms/birdclef2025_melspec_5sec_256_256.npy'
            
            # kaggle notebookならここを変更
            self.model_path = "/kaggle/input/birdclef-2025-baseline-fold0-0404"
            
            self.device = "cpu"
            self.batch_size = 8
            self.n_jobs = 3
            
        else:
            self.OUTPUT_DIR = '../data/result/'
            self.train_datadir = '../data/raw/train_audio/'
            self.train_csv = '../data/raw/train.csv'
            self.test_soundscapes = '../data/raw/test_soundscapes_small'
            self.submission_csv = '../data/raw/sample_submission.csv'
            self.taxonomy_csv = '../data/raw/taxonomy.csv'
            self.spectrogram_npy = '../data/processed/mel-spec_0329/birdclef2025_melspec_5sec_256_256.npy'
            self.MODELS_DIR = "../models/"
            
            # ローカルならここを変更
            self.model_path =  "../models/mel_cleaned0413_vino/"
            
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            self.batch_size = 32
            self.n_jobs = 3

        # ===== Model Settings =====
        self.model_name = 'efficientnet_b0'
        self.pretrained = True if mode == "train" else False
        self.in_channels = 1

        # ===== Audio Settings =====
        self.FS = 32000
        self.WINDOW_SIZE = 5
        self.TARGET_DURATION = 5
        self.TARGET_SHAPE = (256, 256)
        self.N_FFT = 1024
        self.HOP_LENGTH = None # 下で指定する．
        self.N_MELS = 148
        self.FMIN = 20
        self.FMAX = 16000
        
        self.seed = 42
        
        
        # ===== Inference Mode =====
        if mode == "inference":
            self.use_tta = False
            self.tta_count = 3
            self.threshold = 0.5

            self.use_specific_folds = False
            self.folds = [0, 1, 2, 3, 4]  # Used only if use_specific_folds is True

            self.debug_count = 3
            self.ensemble_strategy = "mean" # "mean", "max", "min", "median" など
            
            
            
    def update_debug_settings(self):
        if self.debug:
            self.epochs = 2
            self.selected_folds = [0]
            



In [27]:
MODE = "inference"  
KAGGLE_NOTEBOOK = False


cfg = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)

if cfg.KAGGLE_NOTEBOOK:
    !pip install -U openvino-telemetry  --no-index --find-links /kaggle/input/pip-hub
    !pip install -U openvino  --no-index --find-links /kaggle/input/pip-hub
    sys.path.append("/kaggle/input/birdclef-2025-libs/")
    
from openvino.runtime import Core
from module import models_lib, utils_lib, preprocess_lib, inference_lib

# Set seed
utils_lib.set_seed(cfg.seed)

In [28]:
def load_all_configs():
    cfg_list = []

    # model A
    cfg1 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg1.model_path = "../models/fold0_safezone1000_head_vino/"
    cfg1.HOP_LENGTH = 64
    cfg_list.append(cfg1)

    # model B
    cfg2 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg2.HOP_LENGTH = 512
    cfg2.model_path = "../models/fold0_safezone1000_head_hoplength512_vino/"
    cfg_list.append(cfg2)
    
    # model C
    cfg3 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg3.model_path = "../models/fold0_sfzn1000_head_hl512_nfft2048_vino/"
    cfg3.HOP_LENGTH = 64
    cfg_list.append(cfg3)
    
    

    # model C など必要に応じて追加可能
    return cfg_list

In [29]:
print(f"Using device: {cfg.device}")
print(f"Loading taxonomy data...")
taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
species_ids = taxonomy_df['primary_label'].tolist()
num_classes = len(species_ids)
print(f"Number of classes: {num_classes}")

Using device: cuda
Loading taxonomy data...
Number of classes: 206


In [30]:
# mel変換
def process_audio_file(audio_path, cfg):
    """1ファイル分のmelspecデータを返す（row_id, melspecのリスト）"""
    dataset = []
    soundscape_id = Path(audio_path).stem
    try:
        audio_data, _ = librosa.load(audio_path, sr=cfg.FS)
        total_segments = int(len(audio_data) / (cfg.FS * cfg.WINDOW_SIZE))

        for segment_idx in range(total_segments):
            start = int(segment_idx * cfg.FS * cfg.WINDOW_SIZE)
            end = int(start + cfg.FS * cfg.WINDOW_SIZE)
            segment_audio = audio_data[start:end]

            mel_spec = preprocess_lib.process_audio_segment(segment_audio, cfg)
            row_id = f"{soundscape_id}_{(segment_idx + 1) * cfg.WINDOW_SIZE}"

            dataset.append({
                "row_id": row_id,
                "mel_spec": mel_spec
            })
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
    return dataset


# 並列化してmelspecを生成
def generate_melspec_dataset(cfg):
    test_dir = Path(cfg.test_soundscapes)
    if not test_dir.exists():
        print(f"Test directory {test_dir} does not exist.")
        return []

    test_files = list(test_dir.glob('*.ogg'))
    if len(test_files) == 0:
        print("No test audio files found.")
        return []

    if cfg.debug:
        print(f"Debug mode enabled, using only {cfg.debug_count} files")
        test_files = test_files[:cfg.debug_count]

    results = Parallel(n_jobs=cfg.n_jobs)(
        delayed(process_audio_file)(path, cfg) for path in tqdm(test_files, desc="Parallel melspec gen")
    )
    dataset = [item for sublist in results for item in sublist]
    return dataset

In [31]:

# openvinoモデルの読み込み
def load_openvino_models(vino_dir, cfg):
    models = []
    vino_dir = Path(vino_dir)

    if cfg.use_specific_folds:
        fold_ids = cfg.folds
        xml_files = [vino_dir / f"model_fold{f}.xml" for f in fold_ids]
    else:
        xml_files = sorted(vino_dir.glob("model_fold*.xml"))

    for xml_path in xml_files:
        bin_path = xml_path.with_suffix(".bin")

        if not xml_path.exists() or not bin_path.exists():
            print(f"⚠️ Warning: Missing files for {xml_path.stem}")
            continue

        core = Core()
        model_ir = core.read_model(xml_path)
        compiled_model = core.compile_model(model_ir, device_name="CPU")
        models.append(compiled_model)

        # 🔍 モデルのファイル名（fold情報）をログに出す
        print(f"✅ Loaded model: {xml_path.name}")

    print(f"🎉 Total {len(models)} OpenVINO model(s) loaded from {vino_dir}")
    return models

# openvinoモデルによる推論
def run_inference_openvino(dataset, models_ir, cfg, species_ids):
    row_ids = []
    all_preds = []

    for i in range(0, len(dataset), cfg.batch_size):
        batch = dataset[i:i+cfg.batch_size]

        mel_list = [item["mel_spec"] for item in batch]
        input_tensor = np.stack(mel_list).astype(np.float32)  # (B, H, W)
        input_tensor = np.expand_dims(input_tensor, axis=1)  # (B, 1, H, W)

        preds_per_model = []
        for model in models_ir:
            input_layer = model.input(0)
            output_layer = model.output(0)
            result = model([input_tensor])[output_layer]
            probs = 1 / (1 + np.exp(-result))  # sigmoid
            preds_per_model.append(probs)

        # アンサンブル戦略の選択
        if cfg.ensemble_strategy == "mean":
            avg_preds = np.mean(preds_per_model, axis=0)
        elif cfg.ensemble_strategy == "max":
            avg_preds = np.max(preds_per_model, axis=0)
        elif cfg.ensemble_strategy == "min":
            avg_preds = np.min(preds_per_model, axis=0)
        elif cfg.ensemble_strategy == "median":
            avg_preds = np.median(preds_per_model, axis=0)
        else:
            raise ValueError(f"Unknown ensemble strategy: {cfg.ensemble_strategy}")

        all_preds.append(avg_preds)
        row_ids.extend([item["row_id"] for item in batch])

    predictions = np.concatenate(all_preds, axis=0)
    return row_ids, predictions

In [32]:
def run_inference_and_save(cfg, species_ids):
    print(f"\n📌 Processing model at: {cfg.model_path}")
    print("Generating dataset...")
    dataset = generate_melspec_dataset(cfg)

    print("Loading OpenVINO models...")
    vino_dir = Path(cfg.model_path).with_name(Path(cfg.model_path).name)
    models_ir = load_openvino_models(vino_dir, cfg)

    if not models_ir:
        raise RuntimeError("No OpenVINO models found.")

    print("Running OpenVINO inference...")
    if len(dataset) > 0:
        row_ids, predictions = run_inference_openvino(dataset, models_ir, cfg, species_ids)
    else:
        print("No test data available, generating empty submission.")
        row_ids = []
        predictions = []

    # smoothing前の予測値を保存
    submission_df = utils_lib.create_submission(row_ids, predictions, species_ids, cfg)

    return submission_df


# 予測値のdf_listを受け取ってアンサンブル
def ensemble_submissions_dfs(dfs, method="mean"):
    """
    複数の submission DataFrame をアンサンブル

    Parameters:
        dfs (List[pd.DataFrame]): 各 submission.csv を読み込んだ DataFrame のリスト
        method (str): アンサンブル戦略（mean, max, median）

    Returns:
        pd.DataFrame: アンサンブル後の submission
    """
    assert all(df.columns[0] == "row_id" for df in dfs), "All DataFrames must start with 'row_id' column"
    row_ids = dfs[0]['row_id'].values
    preds = np.stack([df.iloc[:, 1:].values for df in dfs], axis=0)  # (n_models, n_rows, n_classes)

    if method == "mean":
        combined = np.mean(preds, axis=0)
    elif method == "max":
        combined = np.max(preds, axis=0)
    elif method == "median":
        combined = np.median(preds, axis=0)
    else:
        raise ValueError(f"Unsupported ensemble method: {method}")

    result_df = pd.DataFrame(combined, columns=dfs[0].columns[1:])
    result_df.insert(0, "row_id", row_ids)
    return result_df


def smooth_submission_df(submission_df, cfg, weights=None):
    """
    Smooth predictions using weighted moving average over a 5-frame window: [-2, -1, 0, +1, +2],
    then blend with per-class global average within each soundscape segment group.

    Parameters:
        submission_df: pd.DataFrame with 'row_id' and prediction columns.
        cfg: config object (interface compatibility).
        weights: List of 5 floats (default = [0.1, 0.2, 0.4, 0.2, 0.1]).

    Returns:
        pd.DataFrame with smoothed predictions.
    """
    print("Smoothing submission predictions with global average blend...")

    if weights is None:
        weights = np.array([0.1, 0.2, 0.4, 0.2, 0.1])
    else:
        weights = np.array(weights)

    sub = submission_df.copy()
    cols = sub.columns[1:]
    groups = sub['row_id'].astype(str).str.rsplit('_', n=1).str[0].values
    unique_groups = np.unique(groups)

    for group in unique_groups:
        idx = np.where(groups == group)[0]
        preds = sub.iloc[idx][cols].values  # (T, C)
        T, C = preds.shape

        # エッジ処理：端を繰り返すようにパディング
        padded = np.pad(preds, ((2, 2), (0, 0)), mode='edge')  # (T+4, C)

        # 平滑化：5点加重平均（[-2, -1, 0, +1, +2]）
        smoothed = (
            padded[0:T]   * weights[0] +
            padded[1:T+1] * weights[1] +
            padded[2:T+2] * weights[2] +
            padded[3:T+3] * weights[3] +
            padded[4:T+4] * weights[4]
        )

        # 各クラスの平均予測を20%混ぜる（全セグメントに対して一様に加える）
        classwise_mean = smoothed.mean(axis=0, keepdims=True)  # shape: (1, C)
        smoothed = smoothed * 0.8 + classwise_mean * 0.2

        sub.iloc[idx, 1:] = smoothed

    return sub

In [33]:
cfg_list = load_all_configs()  # 複数CFGを返す関数
inference_dfs = []

for cfg in cfg_list:
    df = run_inference_and_save(cfg, species_ids)
    inference_dfs.append(df)


📌 Processing model at: ../models/fold0_safezone1000_head_vino/
Generating dataset...


Parallel melspec gen:   0%|          | 0/4 [00:00<?, ?it/s]

Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/fold0_safezone1000_head_vino
Running OpenVINO inference...
Creating submission dataframe...

📌 Processing model at: ../models/fold0_safezone1000_head_hoplength512_vino/
Generating dataset...


Parallel melspec gen:   0%|          | 0/4 [00:00<?, ?it/s]

Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/fold0_safezone1000_head_hoplength512_vino
Running OpenVINO inference...
Creating submission dataframe...

📌 Processing model at: ../models/fold0_sfzn1000_head_hl512_nfft2048_vino/
Generating dataset...


Parallel melspec gen:   0%|          | 0/4 [00:00<?, ?it/s]

Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/fold0_sfzn1000_head_hl512_nfft2048_vino
Running OpenVINO inference...
Creating submission dataframe...


In [20]:
# アンサンブル
ensemble_df = ensemble_submissions_dfs(inference_dfs, method="mean")
ensemble_df.to_csv(os.path.join(cfg_list[0].OUTPUT_DIR, 'submission_before_smoothing.csv'), index=False)
print("Saved ensembled (before smoothing) submission.")

# スムージング
smoothed_df = smooth_submission_df(ensemble_df, cfg_list[0])
smoothed_df.to_csv(os.path.join(cfg_list[0].OUTPUT_DIR, 'submission.csv'), index=False)
print("Saved smoothed final submission.")

Saved ensembled (before smoothing) submission.
Smoothing submission predictions with global average blend...
Saved smoothed final submission.


In [21]:
# 提出用ファイルを読み込む
submission = pd.read_csv(os.path.join(cfg.OUTPUT_DIR, 'submission.csv'))
submission.head(12)

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230502_080500_5,1.2e-05,1.3e-05,1.5e-05,4e-06,2.2e-05,1.1e-05,8.8e-05,1.2e-05,7e-06,...,0.00161,0.000397,0.002641,0.001536,0.000512,0.000331,0.002202,0.001006,0.000923,5.2e-05
1,H02_20230502_080500_10,9e-06,9e-06,1.1e-05,5e-06,1.5e-05,1.1e-05,7.6e-05,1e-05,5e-06,...,0.001564,0.000428,0.004748,0.000825,0.00091,0.000341,0.002302,0.000669,0.001186,6.9e-05
2,H02_20230502_080500_15,8e-06,9e-06,1.1e-05,7e-06,1.4e-05,1.4e-05,8.1e-05,1e-05,4e-06,...,0.00136,0.000645,0.008828,0.00046,0.001628,0.000277,0.002366,0.000473,0.000781,0.000101
3,H02_20230502_080500_20,9e-06,1.2e-05,8e-06,5e-06,1.5e-05,1.6e-05,6e-05,1e-05,4e-06,...,0.000933,0.00096,0.004802,0.000332,0.001019,0.000249,0.004912,0.000418,0.00055,9.5e-05
4,H02_20230502_080500_25,7e-06,1.4e-05,6e-06,4e-06,1.6e-05,1.5e-05,4.3e-05,9e-06,4e-06,...,0.000609,0.001586,0.002621,0.000311,0.000573,0.000135,0.007618,0.000353,0.000346,8.9e-05
5,H02_20230502_080500_30,5e-06,1.1e-05,5e-06,3e-06,1.4e-05,1.4e-05,2.4e-05,6e-06,4e-06,...,0.000374,0.001882,0.000517,0.000384,0.000189,8.5e-05,0.015433,0.000325,0.000247,7.6e-05
6,H02_20230502_080500_35,4e-06,1e-05,4e-06,3e-06,1.2e-05,1.3e-05,1.9e-05,5e-06,3e-06,...,0.000351,0.002394,0.000466,0.000528,0.000158,8.6e-05,0.013781,0.000355,0.000256,7.2e-05
7,H02_20230502_080500_40,5e-06,1e-05,5e-06,3e-06,1.4e-05,1.6e-05,2.2e-05,6e-06,4e-06,...,0.000391,0.001777,0.000482,0.000683,0.000197,0.000146,0.017324,0.000445,0.00042,8.5e-05
8,H02_20230502_080500_45,6e-06,1e-05,4e-06,3e-06,1.1e-05,1.4e-05,2.2e-05,7e-06,4e-06,...,0.000463,0.001275,0.000496,0.000716,0.0002,0.000233,0.012081,0.000424,0.000652,6.9e-05
9,H02_20230502_080500_50,8e-06,1.3e-05,4e-06,3e-06,1.2e-05,1.6e-05,2.7e-05,8e-06,5e-06,...,0.000515,0.001076,0.00051,0.000805,0.000188,0.000219,0.012675,0.000376,0.000889,6.6e-05


In [22]:
submission = pd.read_csv(os.path.join(cfg.OUTPUT_DIR, 'submission_before_smoothing.csv'))
submission.head(12)

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230502_080500_5,1.7e-05,1.7e-05,2.066124e-05,3.729446e-06,3.2e-05,1.2e-05,0.000109,1.6e-05,1.086704e-05,...,0.00178,0.00018,0.000157,0.00245,7.5e-05,0.00036,0.000147,0.001477,0.00064,3e-05
1,H02_20230502_080500_10,4e-06,3e-06,3.004631e-06,1.382019e-06,4e-06,4e-06,4.4e-05,4e-06,2.568106e-06,...,0.001968,5.9e-05,0.000325,0.000183,0.000142,0.000468,0.000658,0.000437,0.002602,4e-05
2,H02_20230502_080500_15,6e-06,3e-06,1.88863e-05,1.39678e-05,8e-06,1.6e-05,0.000139,1.2e-05,7.124868e-07,...,0.001848,0.000234,0.025626,6.6e-05,0.00437,0.000202,0.000179,0.00022,0.000348,0.000171
3,H02_20230502_080500_20,1.5e-05,1.4e-05,7.751564e-06,4.370585e-06,2e-05,2.4e-05,5.7e-05,1.3e-05,4.452199e-06,...,0.000769,0.000763,0.00069,0.000481,0.000618,0.000421,0.000348,0.000602,0.000298,8.9e-05
4,H02_20230502_080500_25,8e-06,2.5e-05,4.925196e-06,1.891095e-06,2.1e-05,1.5e-05,4e-05,1e-05,6.100604e-06,...,0.00048,0.001952,1.8e-05,8.7e-05,5.3e-05,3.2e-05,0.001698,0.000334,0.000424,0.000102
5,H02_20230502_080500_30,2e-06,6e-06,2.525944e-06,1.739329e-06,1.1e-05,9e-06,8e-06,3e-06,3.061586e-06,...,8.8e-05,0.001171,1.4e-05,0.000117,4.8e-05,8e-06,0.030589,0.000134,3e-05,6.3e-05
6,H02_20230502_080500_35,2e-06,7e-06,2.733642e-06,9.293786e-07,7e-06,1e-05,5e-06,3e-06,1.076951e-06,...,0.000257,0.004558,1.1e-05,0.00066,2e-05,1.6e-05,0.001374,0.000238,0.000123,3.7e-05
7,H02_20230502_080500_40,6e-06,1.4e-05,7.155964e-06,6.617307e-06,2.2e-05,2.5e-05,2.2e-05,8e-06,4.786871e-06,...,0.000204,0.001704,2.4e-05,0.000745,0.000185,3.6e-05,0.039067,0.000643,7.5e-05,0.00015
8,H02_20230502_080500_45,3e-06,4e-06,8.212123e-07,6.006891e-07,4e-06,6e-06,9e-06,3e-06,1.691782e-06,...,0.000418,0.000648,5.3e-05,0.000539,0.000146,0.000412,0.000751,0.000451,0.000571,4.4e-05
9,H02_20230502_080500_50,1.4e-05,1.9e-05,5.689642e-06,2.933159e-06,1.2e-05,2.1e-05,4.1e-05,1.4e-05,6.41417e-06,...,0.000751,0.000467,0.000142,0.00143,0.00012,0.000339,0.00044,0.000335,0.002024,5.8e-05


In [23]:
print("✅ Shape:", submission.shape)
print("✅ Columns:", submission.columns.tolist())
print("✅ Dtypes:\n", submission.dtypes)
print("✅ Nulls:\n", submission.isna().sum().sum())

✅ Shape: (48, 207)
✅ Columns: ['row_id', '1139490', '1192948', '1194042', '126247', '1346504', '134933', '135045', '1462711', '1462737', '1564122', '21038', '21116', '21211', '22333', '22973', '22976', '24272', '24292', '24322', '41663', '41778', '41970', '42007', '42087', '42113', '46010', '47067', '476537', '476538', '48124', '50186', '517119', '523060', '528041', '52884', '548639', '555086', '555142', '566513', '64862', '65336', '65344', '65349', '65373', '65419', '65448', '65547', '65962', '66016', '66531', '66578', '66893', '67082', '67252', '714022', '715170', '787625', '81930', '868458', '963335', 'amakin1', 'amekes', 'ampkin1', 'anhing', 'babwar', 'bafibi1', 'banana', 'baymac', 'bbwduc', 'bicwre1', 'bkcdon', 'bkmtou1', 'blbgra1', 'blbwre1', 'blcant4', 'blchaw1', 'blcjay1', 'blctit1', 'blhpar1', 'blkvul', 'bobfly1', 'bobher1', 'brtpar1', 'bubcur1', 'bubwre1', 'bucmot3', 'bugtan', 'butsal1', 'cargra1', 'cattyr', 'chbant1', 'chfmac1', 'cinbec1', 'cocher1', 'cocwoo1', 'colara1', 'c

In [24]:
assert cfg.test_soundscapes == "/kaggle/input/birdclef-2025/test_soundscapes", "Test_soundscapes path is incorrect!"

AssertionError: Test_soundscapes path is incorrect!