In [1]:
import os
import logging
from pathlib import Path

import numpy as np
import pandas as pd
import librosa
import torch
from tqdm.auto import tqdm
import sys
from joblib import Parallel, delayed
logging.basicConfig(level=logging.ERROR)
import hashlib
import json

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
# 
class CFG:
    def __init__(self, mode="train", kaggle_notebook=False, debug=False):
        assert mode in ["train", "inference"], "mode must be 'train' or 'inference'"
        self.mode = mode
        self.KAGGLE_NOTEBOOK = kaggle_notebook
        self.debug = debug

        # ===== Path Settings =====
        if self.KAGGLE_NOTEBOOK:
            self.OUTPUT_DIR = ''
            self.train_datadir = '/kaggle/input/birdclef-2025/train_audio'
            self.train_csv = '/kaggle/input/birdclef-2025/train.csv'
            self.test_soundscapes = '/kaggle/input/birdclef-2025/test_soundscapes'
            self.submission_csv = '/kaggle/input/birdclef-2025/sample_submission.csv'
            self.taxonomy_csv = '/kaggle/input/birdclef-2025/taxonomy.csv'
            self.spectrogram_npy = '/kaggle/input/birdclef25-mel-spectrograms/birdclef2025_melspec_5sec_256_256.npy'
            
            # kaggle notebookならここを変更
            self.model_path = "/kaggle/input/birdclef-2025-baseline-fold0-0404"
            
            self.device = "cpu"
            self.batch_size = 8
            self.n_jobs = 3
            
        else:
            self.OUTPUT_DIR = '../data/result/'
            self.train_datadir = '../data/raw/train_audio/'
            self.train_csv = '../data/raw/train.csv'
            self.test_soundscapes = '../data/raw/test_soundscapes_small'
            self.submission_csv = '../data/raw/sample_submission.csv'
            self.taxonomy_csv = '../data/raw/taxonomy.csv'
            self.spectrogram_npy = '../data/processed/mel-spec_0329/birdclef2025_melspec_5sec_256_256.npy'
            self.MODELS_DIR = "../models/"
            
            # ローカルならここを変更
            self.model_path =  "../models/mel_cleaned0413_vino/"
            
            self.device = "cuda" if torch.cuda.is_available() else "cpu"
            self.batch_size = 32
            self.n_jobs = 3

        # ===== Model Settings =====
        self.model_name = 'efficientnet_b0'
        self.pretrained = True if mode == "train" else False
        self.in_channels = 1

        # ===== Audio Settings =====
        self.FS = 32000
        self.WINDOW_SIZE = 5
        self.TARGET_DURATION = 5
        self.TARGET_SHAPE = (256, 256)
        self.N_FFT = 1024
        self.HOP_LENGTH = None # 下で指定する．
        self.N_MELS = 148
        self.FMIN = 20
        self.FMAX = 16000
        
        self.seed = 42
        
        
        # ===== Inference Mode =====
        if mode == "inference":
            self.use_tta = False
            self.tta_count = 3
            self.threshold = 0.5

            self.use_specific_folds = False
            self.folds = [0, 1, 2, 3, 4]  # Used only if use_specific_folds is True

            self.debug_count = 3
            self.ensemble_strategy = "mean" # "mean", "max", "min", "median" など
            
            
            
    def update_debug_settings(self):
        if self.debug:
            self.epochs = 2
            self.selected_folds = [0]
            


In [3]:
MODE = "inference"  
KAGGLE_NOTEBOOK = False


cfg = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)

if cfg.KAGGLE_NOTEBOOK:
    !pip install -U openvino-telemetry  --no-index --find-links /kaggle/input/pip-hub
    !pip install -U openvino  --no-index --find-links /kaggle/input/pip-hub
    sys.path.append("/kaggle/input/birdclef-2025-libs/")
    
from openvino.runtime import Core
from module import utils_lib, preprocess_lib

# Set seed
utils_lib.set_seed(cfg.seed)

In [4]:
# configをここで設定する．
def load_all_configs():
    cfg_list = []

    # sfzn1 hd hl512 psdMxp, 0.850
    cfg1 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg1.model_path = "../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl512-psd"
    cfg1.HOP_LENGTH = 512
    cfg_list.append(cfg1)

    # sfzn1 hd hl16 psdMxp, 0.849
    cfg2 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg2.HOP_LENGTH = 64
    cfg2.model_path = "../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl16-psdmxp"
    cfg_list.append(cfg2)
    
    # maxRMS psdMxp 0.843
    cfg3 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg3.model_path = "../models/ensmbl_0527/model-maxrms-0527-newjfcide/model_maxRMS_vino"
    cfg3.HOP_LENGTH = 64
    cfg_list.append(cfg3)
    
    # maxDB psdMxp 0.838
    cfg4 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg4.model_path = "../models/ensmbl_0527/model-maxdb-0527-newjfcide/model_maxdb_vino"
    cfg4.HOP_LENGTH = 64
    cfg_list.append(cfg4)
    
    # smart psdMxp 0.842
    cfg5 = CFG(mode=MODE, kaggle_notebook=KAGGLE_NOTEBOOK)
    cfg5.model_path = "../models/ensmbl_0527/model-sfzn1000-smart-pseudo-alpha0-2"
    cfg5.HOP_LENGTH = 64
    cfg_list.append(cfg5)
    
    return cfg_list


# 1. CFGオブジェクトからMel設定だけ抜き出してハッシュを作る
def cfg_to_mel_hash(cfg, exclude_keys=None):
    if exclude_keys is None:
        exclude_keys = [
            'mode', 'KAGGLE_NOTEBOOK', 'debug',
            'OUTPUT_DIR', 'train_datadir', 'train_csv', 'test_soundscapes',
            'submission_csv', 'taxonomy_csv', 'spectrogram_npy', 'MODELS_DIR',
            'model_path', 'device', 'batch_size', 'n_jobs',
            'use_tta', 'tta_count', 'threshold', 'use_specific_folds', 'folds',
            'debug_count', 'ensemble_strategy', 'epochs', 'selected_folds'
        ]
    
    cfg_dict = vars(cfg).copy()
    mel_cfg = {k: v for k, v in cfg_dict.items() if k not in exclude_keys}
    mel_cfg_str = json.dumps(mel_cfg, sort_keys=True)
    hash_val = hashlib.sha256(mel_cfg_str.encode()).hexdigest()[:8]
    return hash_val

In [5]:
print(f"Using device: {cfg.device}")
print(f"Loading taxonomy data...")
taxonomy_df = pd.read_csv(cfg.taxonomy_csv)
species_ids = taxonomy_df['primary_label'].tolist()
num_classes = len(species_ids)
print(f"Number of classes: {num_classes}")

Using device: cuda
Loading taxonomy data...
Number of classes: 206


In [6]:
# mel変換
def process_audio_file(audio_path, cfg):
    """1ファイル分のmelspecデータを返す（row_id, melspecのリスト）"""
    dataset = []
    soundscape_id = Path(audio_path).stem
    try:
        audio_data, _ = librosa.load(audio_path, sr=cfg.FS)
        total_segments = int(len(audio_data) / (cfg.FS * cfg.WINDOW_SIZE))

        for segment_idx in range(total_segments):
            start = int(segment_idx * cfg.FS * cfg.WINDOW_SIZE)
            end = int(start + cfg.FS * cfg.WINDOW_SIZE)
            segment_audio = audio_data[start:end]

            mel_spec = preprocess_lib.process_audio_segment(segment_audio, cfg)
            row_id = f"{soundscape_id}_{(segment_idx + 1) * cfg.WINDOW_SIZE}"

            dataset.append({
                "row_id": row_id,
                "mel_spec": mel_spec
            })
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
    return dataset


# 並列化してmelspecを生成
def generate_melspec_dataset(cfg):
    test_dir = Path(cfg.test_soundscapes)
    if not test_dir.exists():
        print(f"Test directory {test_dir} does not exist.")
        return []

    test_files = list(test_dir.glob('*.ogg'))
    if len(test_files) == 0:
        print("No test audio files found.")
        return []

    if cfg.debug:
        print(f"Debug mode enabled, using only {cfg.debug_count} files")
        test_files = test_files[:cfg.debug_count]

    results = Parallel(n_jobs=cfg.n_jobs)(
        delayed(process_audio_file)(path, cfg) for path in tqdm(test_files, desc="Parallel melspec gen")
    )
    dataset = [item for sublist in results for item in sublist]
    return dataset

In [7]:

# openvinoモデルの読み込み
def load_openvino_models(vino_dir, cfg):
    models = []
    vino_dir = Path(vino_dir)

    if cfg.use_specific_folds:
        fold_ids = cfg.folds
        xml_files = [vino_dir / f"model_fold{f}.xml" for f in fold_ids]
    else:
        xml_files = sorted(vino_dir.glob("model_fold*.xml"))

    for xml_path in xml_files:
        bin_path = xml_path.with_suffix(".bin")

        if not xml_path.exists() or not bin_path.exists():
            print(f"⚠️ Warning: Missing files for {xml_path.stem}")
            continue

        core = Core()
        model_ir = core.read_model(xml_path)
        compiled_model = core.compile_model(model_ir, device_name="CPU")
        models.append(compiled_model)

        # 🔍 モデルのファイル名（fold情報）をログに出す
        print(f"✅ Loaded model: {xml_path.name}")

    print(f"🎉 Total {len(models)} OpenVINO model(s) loaded from {vino_dir}")
    return models

# openvinoモデルによる推論
def run_inference_openvino(dataset, models_ir, cfg, species_ids):
    row_ids = []
    all_preds = []

    for i in range(0, len(dataset), cfg.batch_size):
        batch = dataset[i:i+cfg.batch_size]

        mel_list = [item["mel_spec"] for item in batch]
        input_tensor = np.stack(mel_list).astype(np.float32)  # (B, H, W)
        input_tensor = np.expand_dims(input_tensor, axis=1)  # (B, 1, H, W)

        preds_per_model = []
        for model in models_ir:
            input_layer = model.input(0)
            output_layer = model.output(0)
            result = model([input_tensor])[output_layer]
            probs = 1 / (1 + np.exp(-result))  # sigmoid
            preds_per_model.append(probs)

        # アンサンブル戦略の選択
        if cfg.ensemble_strategy == "mean":
            avg_preds = np.mean(preds_per_model, axis=0)
        elif cfg.ensemble_strategy == "max":
            avg_preds = np.max(preds_per_model, axis=0)
        elif cfg.ensemble_strategy == "min":
            avg_preds = np.min(preds_per_model, axis=0)
        elif cfg.ensemble_strategy == "median":
            avg_preds = np.median(preds_per_model, axis=0)
        else:
            raise ValueError(f"Unknown ensemble strategy: {cfg.ensemble_strategy}")

        all_preds.append(avg_preds)
        row_ids.extend([item["row_id"] for item in batch])

    predictions = np.concatenate(all_preds, axis=0)
    return row_ids, predictions

In [8]:
def run_inference(cfg, species_ids, dataset):

    print("Loading OpenVINO models...")
    vino_dir = Path(cfg.model_path).with_name(Path(cfg.model_path).name)
    models_ir = load_openvino_models(vino_dir, cfg)

    if not models_ir:
        raise RuntimeError("No OpenVINO models found.")

    print("Running OpenVINO inference...")
    if len(dataset) > 0:
        row_ids, predictions = run_inference_openvino(dataset, models_ir, cfg, species_ids)
    else:
        print("No test data available, generating empty submission.")
        row_ids = []
        predictions = []

    # smoothing前の予測値を保存
    submission_df = utils_lib.create_submission(row_ids, predictions, species_ids, cfg)

    return submission_df

In [9]:
# 予測値のdf_listを受け取ってアンサンブル
def ensemble_submissions_dfs(dfs, method="mean"):
    """
    複数の submission DataFrame をアンサンブル

    Parameters:
        dfs (List[pd.DataFrame]): 各 submission.csv を読み込んだ DataFrame のリスト
        method (str): アンサンブル戦略（mean, max, median）

    Returns:
        pd.DataFrame: アンサンブル後の submission
    """
    assert all(df.columns[0] == "row_id" for df in dfs), "All DataFrames must start with 'row_id' column"
    row_ids = dfs[0]['row_id'].values
    preds = np.stack([df.iloc[:, 1:].values for df in dfs], axis=0)  # (n_models, n_rows, n_classes)

    if method == "mean":
        combined = np.mean(preds, axis=0)
    elif method == "max":
        combined = np.max(preds, axis=0)
    elif method == "median":
        combined = np.median(preds, axis=0)
    else:
        raise ValueError(f"Unsupported ensemble method: {method}")

    result_df = pd.DataFrame(combined, columns=dfs[0].columns[1:])
    result_df.insert(0, "row_id", row_ids)
    return result_df


def smooth_submission_df(submission_df, cfg, weights=None):
    """
    Smooth predictions using weighted moving average over a 5-frame window: [-2, -1, 0, +1, +2],
    then blend with per-class global average within each soundscape segment group.

    Parameters:
        submission_df: pd.DataFrame with 'row_id' and prediction columns.
        cfg: config object (interface compatibility).
        weights: List of 5 floats (default = [0.1, 0.2, 0.4, 0.2, 0.1]).

    Returns:
        pd.DataFrame with smoothed predictions.
    """
    print("Smoothing submission predictions with global average blend...")

    if weights is None:
        weights = np.array([0.1, 0.2, 0.4, 0.2, 0.1])
    else:
        weights = np.array(weights)

    sub = submission_df.copy()
    cols = sub.columns[1:]
    groups = sub['row_id'].astype(str).str.rsplit('_', n=1).str[0].values
    unique_groups = np.unique(groups)

    for group in unique_groups:
        idx = np.where(groups == group)[0]
        preds = sub.iloc[idx][cols].values  # (T, C)
        T, C = preds.shape

        # エッジ処理：端を繰り返すようにパディング
        padded = np.pad(preds, ((2, 2), (0, 0)), mode='edge')  # (T+4, C)

        # 平滑化：5点加重平均（[-2, -1, 0, +1, +2]）
        smoothed = (
            padded[0:T]   * weights[0] +
            padded[1:T+1] * weights[1] +
            padded[2:T+2] * weights[2] +
            padded[3:T+3] * weights[3] +
            padded[4:T+4] * weights[4]
        )

        # 各クラスの平均予測を20%混ぜる（全セグメントに対して一様に加える）
        classwise_mean = smoothed.mean(axis=0, keepdims=True)  # shape: (1, C)
        smoothed = smoothed * 0.8 + classwise_mean * 0.2

        sub.iloc[idx, 1:] = smoothed

    return sub

In [10]:
cfg_list = load_all_configs()

cfg_hash_list = []
for cfg in cfg_list:
    mel_hash = cfg_to_mel_hash(cfg)
    cfg_hash_list.append({
        "hash": mel_hash,
        "cfg": cfg
    })
print(cfg_hash_list)

 
datasets = {}
for item in cfg_hash_list:
    mel_hash = item["hash"]
    cfg = item["cfg"]

    if mel_hash not in datasets:
        print(f"🔵 Generating dataset for hash: {mel_hash}")
        datasets[mel_hash] = generate_melspec_dataset(cfg)


inference_dfs = []
for item in cfg_hash_list:
    mel_hash = item["hash"]
    cfg = item["cfg"]

    dataset = datasets[mel_hash]
    df = run_inference(cfg, species_ids, dataset)
    inference_dfs.append(df)



[{'hash': '12de2aa3', 'cfg': <__main__.CFG object at 0x7f5c5174d240>}, {'hash': '0cae7500', 'cfg': <__main__.CFG object at 0x7f5c5174d570>}, {'hash': '0cae7500', 'cfg': <__main__.CFG object at 0x7f5c5174f760>}, {'hash': '0cae7500', 'cfg': <__main__.CFG object at 0x7f5c5174d540>}, {'hash': '0cae7500', 'cfg': <__main__.CFG object at 0x7f5c5174e2c0>}]
🔵 Generating dataset for hash: 12de2aa3


Parallel melspec gen:   0%|          | 0/4 [00:00<?, ?it/s]

🔵 Generating dataset for hash: 0cae7500


Parallel melspec gen:   0%|          | 0/4 [00:00<?, ?it/s]

Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl512-psd
Running OpenVINO inference...
Creating submission dataframe...
Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/ensmbl_0527/bc25-models-fld0-sfzn1-hd-hl16-psdmxp
Running OpenVINO inference...
Creating submission dataframe...
Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/ensmbl_0527/model-maxrms-0527-newjfcide/model_maxRMS_vino
Running OpenVINO inference...
Creating submission dataframe...
Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from ../models/ensmbl_0527/model-maxdb-0527-newjfcide/model_maxdb_vino
Running OpenVINO inference...
Creating submission dataframe...
Loading OpenVINO models...
✅ Loaded model: model_fold0.xml
🎉 Total 1 OpenVINO model(s) loaded from .

In [11]:
# アンサンブル
ensemble_df = ensemble_submissions_dfs(inference_dfs, method="mean")
ensemble_df.to_csv(os.path.join(cfg_list[0].OUTPUT_DIR, 'submission_before_smoothing.csv'), index=False)
print("Saved ensembled (before smoothing) submission.")

# スムージング
smoothed_df = smooth_submission_df(ensemble_df, cfg_list[0])
smoothed_df.to_csv(os.path.join(cfg_list[0].OUTPUT_DIR, 'submission.csv'), index=False)
print("Saved smoothed final submission.")

Saved ensembled (before smoothing) submission.
Smoothing submission predictions with global average blend...
Saved smoothed final submission.


In [12]:
# 提出用ファイルを読み込む
submission = pd.read_csv(os.path.join(cfg.OUTPUT_DIR, 'submission.csv'))
submission.head(12)

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230502_080500_5,3e-06,2e-06,4e-06,2.258568e-06,1.4e-05,2e-06,8.4e-05,5e-06,7e-06,...,0.000364,0.000179,0.00245,0.000472,0.002823,7.3e-05,0.000459,0.000317,0.000453,1.5e-05
1,H02_20230502_080500_10,3e-06,2e-06,5e-06,3.306503e-06,9e-06,2e-06,0.000101,4e-06,4e-06,...,0.000376,0.000354,0.004458,0.000421,0.005123,5.8e-05,0.000465,0.00038,0.000435,2.3e-05
2,H02_20230502_080500_15,3e-06,2e-06,8e-06,5.211169e-06,6e-06,3e-06,0.000154,4e-06,2e-06,...,0.000467,0.000623,0.008078,0.000375,0.009677,3.9e-05,0.000478,0.000239,0.000307,3.2e-05
3,H02_20230502_080500_20,2e-06,3e-06,5e-06,3.113919e-06,3e-06,3e-06,8.4e-05,4e-06,1e-06,...,0.000331,0.000948,0.004406,0.000483,0.00526,2.3e-05,0.000628,0.000149,0.000272,2.1e-05
4,H02_20230502_080500_25,2e-06,4e-06,5e-06,1.950416e-06,3e-06,3e-06,5.1e-05,4e-06,1e-06,...,0.000231,0.000868,0.002394,0.000638,0.002968,1.3e-05,0.000825,7.9e-05,0.000223,1.5e-05
5,H02_20230502_080500_30,1e-06,4e-06,4e-06,9.712046e-07,3e-06,3e-06,1.6e-05,4e-06,1e-06,...,0.000125,0.000718,0.000489,0.000946,0.000715,8e-06,0.00166,8.1e-05,0.000179,1.1e-05
6,H02_20230502_080500_35,1e-06,5e-06,5e-06,9.93648e-07,3e-06,3e-06,1.5e-05,4e-06,1e-06,...,0.000113,0.000597,0.000463,0.000978,0.000665,1.2e-05,0.002022,0.000108,0.000221,1.4e-05
7,H02_20230502_080500_40,1e-06,4e-06,4e-06,1.09883e-06,3e-06,3e-06,1.4e-05,4e-06,1e-06,...,9.9e-05,0.000476,0.000507,0.001034,0.000669,1.8e-05,0.002976,0.000123,0.000298,1.6e-05
8,H02_20230502_080500_45,1e-06,3e-06,3e-06,9.822965e-07,3e-06,3e-06,1.4e-05,4e-06,1e-06,...,0.000114,0.00039,0.000576,0.000876,0.000686,2.9e-05,0.00262,0.000126,0.000466,1.8e-05
9,H02_20230502_080500_50,1e-06,4e-06,3e-06,1.532997e-06,3e-06,5e-06,1.5e-05,4e-06,2e-06,...,0.000135,0.000452,0.000599,0.000946,0.000809,2.5e-05,0.003328,0.0001,0.000491,1.7e-05


In [13]:
submission = pd.read_csv(os.path.join(cfg.OUTPUT_DIR, 'submission_before_smoothing.csv'))
submission.head(12)

Unnamed: 0,row_id,1139490,1192948,1194042,126247,1346504,134933,135045,1462711,1462737,...,yebfly1,yebsee1,yecspi2,yectyr1,yehbla2,yehcar1,yelori1,yeofly1,yercac1,ywcpar
0,H02_20230502_080500_5,4.233345e-06,1.818509e-06,2.995693e-06,9.095277e-07,2.186995e-05,6.841711e-07,6.7e-05,4e-06,1.145906e-05,...,0.000372,2.9e-05,7.8e-05,0.000411,2.9e-05,9.5e-05,2.3e-05,0.000261,0.000472,5e-06
1,H02_20230502_080500_10,2.49115e-06,2.394455e-06,2.4997e-06,1.224035e-06,5.70769e-06,5.572338e-07,2.5e-05,4e-06,2.097949e-06,...,0.00021,5.4e-05,0.000799,0.000308,6.3e-05,7e-05,5.9e-05,0.000806,0.000658,1.7e-05
2,H02_20230502_080500_15,3.518075e-06,8.499957e-07,1.796063e-05,1.339797e-05,4.32873e-06,6.50572e-06,0.000413,5e-06,1.093482e-06,...,0.000931,0.000407,0.023172,0.000118,0.028255,3.4e-05,2.4e-05,0.000137,7.6e-05,7e-05
3,H02_20230502_080500_20,1.071338e-06,1.353137e-06,1.020455e-06,8.912624e-07,1.584459e-06,8.513464e-07,1.1e-05,2e-06,6.258555e-07,...,0.000281,0.001791,0.000638,0.000262,0.00048,1.9e-05,2.6e-05,7.2e-05,0.000269,1.1e-05
4,H02_20230502_080500_25,1.261818e-06,5.51716e-06,3.810783e-06,3.882612e-07,2.869581e-06,1.762249e-06,1.4e-05,5e-06,1.144944e-06,...,0.000156,0.000913,2.9e-05,0.000368,0.000126,4e-06,0.000255,3.5e-05,0.000247,7e-06
5,H02_20230502_080500_30,9.439227e-07,3.767153e-06,2.438135e-06,5.430781e-07,1.953497e-06,4.509289e-06,5e-06,4e-06,1.362224e-06,...,3.6e-05,0.000473,8e-06,0.00141,0.000299,1e-06,0.001696,3.3e-05,5.7e-05,5e-06
6,H02_20230502_080500_35,1.630114e-06,7.225812e-06,9.08779e-06,5.411377e-07,4.292623e-06,2.695429e-06,7e-06,5e-06,9.547387e-07,...,0.000103,0.00074,3.7e-05,0.000745,7.4e-05,2e-06,0.000396,0.000108,0.000114,1.2e-05
7,H02_20230502_080500_40,9.905499e-07,3.323453e-06,1.959627e-06,1.198315e-06,2.296396e-06,3.280047e-06,5e-06,3e-06,6.954504e-07,...,2e-05,0.000457,6e-06,0.001397,0.000154,1e-06,0.007203,0.000129,5.5e-05,1.5e-05
8,H02_20230502_080500_45,1.782539e-06,2.272983e-06,1.619558e-06,3.656321e-07,1.95857e-06,2.003336e-06,6e-06,5e-06,1.281973e-06,...,0.000129,0.000201,0.000274,0.000964,0.000279,5.9e-05,0.000211,0.000165,0.000875,2.8e-05
9,H02_20230502_080500_50,8.260078e-07,1.125521e-06,6.654276e-07,5.661301e-07,9.183896e-07,5.837363e-07,6e-06,2e-06,7.788832e-07,...,6.1e-05,0.000245,0.00037,0.000212,3.5e-05,2.7e-05,4.3e-05,5.4e-05,0.000504,1e-05


In [14]:
print("✅ Shape:", submission.shape)
print("✅ Columns:", submission.columns.tolist())
print("✅ Dtypes:\n", submission.dtypes)
print("✅ Nulls:\n", submission.isna().sum().sum())

✅ Shape: (48, 207)
✅ Columns: ['row_id', '1139490', '1192948', '1194042', '126247', '1346504', '134933', '135045', '1462711', '1462737', '1564122', '21038', '21116', '21211', '22333', '22973', '22976', '24272', '24292', '24322', '41663', '41778', '41970', '42007', '42087', '42113', '46010', '47067', '476537', '476538', '48124', '50186', '517119', '523060', '528041', '52884', '548639', '555086', '555142', '566513', '64862', '65336', '65344', '65349', '65373', '65419', '65448', '65547', '65962', '66016', '66531', '66578', '66893', '67082', '67252', '714022', '715170', '787625', '81930', '868458', '963335', 'amakin1', 'amekes', 'ampkin1', 'anhing', 'babwar', 'bafibi1', 'banana', 'baymac', 'bbwduc', 'bicwre1', 'bkcdon', 'bkmtou1', 'blbgra1', 'blbwre1', 'blcant4', 'blchaw1', 'blcjay1', 'blctit1', 'blhpar1', 'blkvul', 'bobfly1', 'bobher1', 'brtpar1', 'bubcur1', 'bubwre1', 'bucmot3', 'bugtan', 'butsal1', 'cargra1', 'cattyr', 'chbant1', 'chfmac1', 'cinbec1', 'cocher1', 'cocwoo1', 'colara1', 'c

In [15]:
assert cfg.test_soundscapes == "/kaggle/input/birdclef-2025/test_soundscapes", "Test_soundscapes path is incorrect!"

AssertionError: Test_soundscapes path is incorrect!