In [8]:
import os
import logging
import random
import gc
import time
import cv2
import math
import warnings
from pathlib import Path
from datetime import datetime, timezone, timedelta

import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score
import librosa

from sklearn.metrics import roc_auc_score, average_precision_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.auto import tqdm
import json
import timm

from importlib import reload

logging.basicConfig(level=logging.ERROR)

from module import preprocess_lib, datasets_lib, utils_lib, models_lib, learning_lib, config_lib
reload(config_lib)

<module 'module.config_lib' from '/root/program/birdclef-2025/scripts/module/config_lib.py'>

In [9]:


class CFG:
    def __init__(self, mode="train", kaggle_notebook=False, debug=False):
        assert mode in ["train", "inference"], "mode must be 'train' or 'inference'"
        self.mode = mode
        self.KAGGLE_NOTEBOOK = kaggle_notebook
        self.debug = debug

        # ===== Path Settings =====
        if self.KAGGLE_NOTEBOOK:
            self.OUTPUT_DIR = ''
            self.train_datadir = '/kaggle/input/birdclef-2025/train_audio'
            self.train_csv = '/kaggle/input/birdclef-2025/train.csv'
            self.test_soundscapes = '/kaggle/input/birdclef-2025/test_soundscapes'
            self.submission_csv = '/kaggle/input/birdclef-2025/sample_submission.csv'
            self.taxonomy_csv = '/kaggle/input/birdclef-2025/taxonomy.csv'
            self.spectrogram_npy = '/kaggle/input/birdclef25-mel-spectrograms/birdclef2025_melspec_5sec_256_256.npy'
            self.model_path = '/kaggle/input/birdclef-2025-0330'
        else:
            self.OUTPUT_DIR = '../data/result/'
            self.RAW_DIR = '../data/raw/'
            self.PROCESSED_DIR = '../data/processed/'
            self.train_datadir = '../data/raw/train_audio/'
            self.train_csv = '../data/raw/train.csv'
            self.test_soundscapes = '../data/raw/test_soundscapes/'
            self.submission_csv = '../data/raw/sample_submission.csv'
            self.taxonomy_csv = '../data/raw/taxonomy.csv'
            self.models_dir = "../models/" # 全modelの保存先
            self.model_path = self.models_dir # 各モデルの保存先．学習時に動的に変更．
            
            self.spectrogram_npy = '../data/processed/mel_cleaned_0413/birdclef2025_melspec_5sec_256_256.npy'
            
            self.pseudo_label_csv = "../data/processed/pseudo_labels/ensmbl_0865/pseudo_labels.csv"
            self.pseudo_melspec_npy = "../data/processed/mel_trn_sdscps_hl512/mel_train_soundscapes.npy"

        # ===== Model Settings =====
        self.model_name = 'efficientnet_b0'
        self.pretrained = True if mode == "train" else False
        self.in_channels = 1

        # ===== Audio Settings =====
        self.FS = 32000
        self.WINDOW_SIZE = 5.0 # 推論時のウィンドウサイズ
        self.TARGET_DURATION = 5.0 # データセット作成時のウィンドウサイズ
        self.TARGET_SHAPE = (256, 256)
        self.N_FFT = 1024
        self.HOP_LENGTH = 512
        self.N_MELS = 128
        self.FMIN = 50
        self.FMAX = 14000     
           

        self.seed = 42
        # ===== Training Mode =====
        if mode == "train":
          
            self.apex = False
            self.print_freq = 100
            self.num_workers = 2

            self.LOAD_DATA = True
            self.epochs = 10
            self.batch_size = 516
            self.criterion = 'BCEWithLogitsLoss'

            self.n_fold = 5
            self.selected_folds = [0, 1, 2, 3, 4]

            self.optimizer = 'AdamW'
            self.lr = 5e-4
            self.weight_decay = 1e-5
            self.scheduler = 'CosineAnnealingLR'
            self.min_lr = 1e-6
            self.T_max = self.epochs

            self.aug_prob = 0.5
            self.mixup_alpha_real = 0.5
            self.mixup_alpha_pseudo = 0.5
            
            self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
            
            
            self.pseudo_no_call_threshold = 0.02
            self.pseudo_high_conf_threshold = 0.7
        
        

            if self.debug:
                self.epochs = 2
                self.selected_folds = [0]
                self.batch_size = 4
                

In [10]:
cfg = CFG(mode="train", kaggle_notebook=False, debug=False)

In [11]:
utils_lib.set_seed(cfg.seed)

pseudo_df = pd.read_csv(cfg.pseudo_label_csv)
print("Loaded pseudo labels from:", cfg.pseudo_label_csv)

df = pseudo_df.copy()
species_cols = df.columns.drop("row_id")

# === soft label整形 ===
df[species_cols] = df[species_cols].where(df[species_cols] >= cfg.pseudo_no_call_threshold, 0.0)

# === 分類（必ず species_cols を使う）===
no_call_df = df[df[species_cols].max(axis=1) == 0.0].copy()
no_call_df["primary_label"] = "no_call"
no_call_df["pseudo_source"] = "no_call"

high_conf_df = df[df[species_cols].max(axis=1) >= cfg.pseudo_high_conf_threshold].copy()
high_conf_df["primary_label"] = high_conf_df[species_cols].idxmax(axis=1)
high_conf_df["pseudo_source"] = "high_conf"

pseudo_df = pd.concat([no_call_df, high_conf_df], axis=0).reset_index()
print(f"✅ no_call: {len(no_call_df)}, high_conf: {len(high_conf_df)}, total: {len(pseudo_df)}")

Loaded pseudo labels from: ../data/processed/pseudo_labels/ensmbl_0865/pseudo_labels.csv
✅ no_call: 8082, high_conf: 4742, total: 12824


In [12]:
# === 3. 必要なmelだけロード ===
used_ids = set(pseudo_df["row_id"])
print("Loading mel specs for used row_ids from:", cfg.pseudo_melspec_npy)
raw = np.load(cfg.pseudo_melspec_npy, allow_pickle=True)
pseudo_melspecs = {
    d["row_id"]: d["mel_spec"]
    for d in raw
    if d["row_id"] in used_ids
}

print(f"✅ Filtered mel specs loaded: {len(pseudo_melspecs)}")

del raw
gc.collect()


Loading mel specs for used row_ids from: ../data/processed/mel_trn_sdscps_hl512/mel_train_soundscapes.npy
✅ Filtered mel specs loaded: 12824


0

In [13]:
import os
import numpy as np
import pickle
from datetime import datetime
import pytz
import csv

# === JST時刻でディレクトリ作成 ===
jst = pytz.timezone('Asia/Tokyo')
now = datetime.now(jst)
timestamp = now.strftime("%Y%m%d_%H%M")

# ✅ 保存先フォルダを debug に応じて分岐
if cfg.debug:
    output_dir = os.path.join(cfg.PROCESSED_DIR, "pseudo_debug")
else:
    output_dir = os.path.join(cfg.PROCESSED_DIR, f"mel_train_soundscapes_{timestamp}")
os.makedirs(output_dir, exist_ok=True)

# === pseudo_melspecs をそのまま np.array でラップして保存 ===
wrapped_array = np.array([pseudo_melspecs], dtype=object)
output_path = os.path.join(output_dir, "mel_train_soundscapes.npy")

with open(output_path, 'wb') as f:
    pickle.dump(wrapped_array, f, protocol=5)

print(f"\n✅ pseudo_melspecs saved to: {output_path}")
print(f"📦 File size: {os.path.getsize(output_path) / (1024 ** 2):.2f} MB")
print(f"📊 Total entries: {len(wrapped_array)}")

# === config を CSV形式で保存 ===
config_path = os.path.join(output_dir, "config.csv")
config_dict = {k: v for k, v in vars(cfg).items() if not k.startswith("__")}

with open(config_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["key", "value"])
    for key, value in config_dict.items():
        writer.writerow([key, value])

print(f"📝 Config saved to: {config_path}")


✅ pseudo_melspecs saved to: ../data/processed/mel_train_soundscapes_20250528_1727/mel_train_soundscapes.npy
📦 File size: 3207.19 MB
📊 Total entries: 1
📝 Config saved to: ../data/processed/mel_train_soundscapes_20250528_1727/config.csv
