# Raw Audios -> wav Audios
Audio files ori -> Audio files

In [1]:
import os
import shutil
import re

# 设定原始和目标文件夹的完整路径
src_base_dir = "E:/AMR/DA/Projekt/data/Audio_files_ori"  # 原始文件夹
dst_base_dir = "E:/AMR/DA/Projekt/data/Audio_files"  # 目标文件夹

# 确保目标文件夹存在
os.makedirs(dst_base_dir, exist_ok=True)

# 正则表达式匹配 XC 编号格式，例如 "XC123456"
xc_pattern = re.compile(r"(XC\d+)")

# 遍历原始文件夹
for folder in os.listdir(src_base_dir):
    src_folder_path = os.path.join(src_base_dir, folder)
    dst_folder_path = os.path.join(dst_base_dir, folder)

    # 确保目标子文件夹存在
    os.makedirs(dst_folder_path, exist_ok=True)

    # 确保是文件夹
    if os.path.isdir(src_folder_path):
        for file in os.listdir(src_folder_path):
            if file.endswith((".mp3", ".ogg", ".wav")):  # 处理音频文件
                match = xc_pattern.search(file)  # 提取 XC 编号
                if match:
                    number = match.group(1)  # 只保留 "XC123456"
                    new_filename = f"{number}.wav"  # 统一改为 "XC123456.wav"

                    # 原始文件路径和目标文件路径
                    src_file_path = os.path.join(src_folder_path, file)
                    dst_file_path = os.path.join(dst_folder_path, new_filename)

                    # 复制并重命名
                    shutil.copy2(src_file_path, dst_file_path)
                    print(f"已处理: {src_file_path} -> {dst_file_path}")

print("所有文件已重命名并移动至目标文件夹！")


已处理: E:/AMR/DA/Projekt/data/Audio_files_ori\Alarm call - Black-headed Gull\XC106520_Chroicocephalus ridibundus.wav -> E:/AMR/DA/Projekt/data/Audio_files\Alarm call - Black-headed Gull\XC106520.wav
已处理: E:/AMR/DA/Projekt/data/Audio_files_ori\Alarm call - Black-headed Gull\XC129576_Chroicocephalus ridibundus.wav -> E:/AMR/DA/Projekt/data/Audio_files\Alarm call - Black-headed Gull\XC129576.wav
已处理: E:/AMR/DA/Projekt/data/Audio_files_ori\Alarm call - Black-headed Gull\XC129577_Chroicocephalus ridibundus.wav -> E:/AMR/DA/Projekt/data/Audio_files\Alarm call - Black-headed Gull\XC129577.wav
已处理: E:/AMR/DA/Projekt/data/Audio_files_ori\Alarm call - Black-headed Gull\XC132961_Chroicocephalus ridibundus.wav -> E:/AMR/DA/Projekt/data/Audio_files\Alarm call - Black-headed Gull\XC132961.wav
已处理: E:/AMR/DA/Projekt/data/Audio_files_ori\Alarm call - Black-headed Gull\XC134722_Chroicocephalus ridibundus.wav -> E:/AMR/DA/Projekt/data/Audio_files\Alarm call - Black-headed Gull\XC134722.wav
已处理: E:/AMR/DA/

In [2]:
import os
root_data_path = "E:/AMR/DA/Projekt/data/data_list/0408"
os.makedirs(root_data_path, exist_ok=True)

# Train meta csv Generator -> train_meta_100.csv

In [3]:
import os
import pandas as pd

# 设置数据文件夹路径
base_dir = "E:/AMR/DA/Projekt/data/Audio_files"  # 这里可以改成你的实际路径

# 用于存储数据
data = []

# 遍历 Audio_files 文件夹
for folder in os.listdir(base_dir):
    folder_path = os.path.join(base_dir, folder)
    
    # 只处理文件夹
    if os.path.isdir(folder_path):
        # 解析文件夹名称，获取 vocalization 和 bird_name
        parts = folder.split(" - ")
        if len(parts) != 2:
            print(f"跳过文件夹：{folder}，命名格式不符合预期")
            continue
        
        vocalization, bird_name = parts
        
        # 遍历该类别下的所有音频文件
        for file in os.listdir(folder_path):
            if file.endswith(".wav"):  # 只处理 .wav 文件
                number = file.replace(".wav", "")  # 提取 XC 编号
                full_path = os.path.join(folder_path, file)  # 记录完整路径（绝对路径）
                full_path = full_path.replace("\\", "/")  # 统一路径分隔符
                data.append([bird_name, vocalization, number, full_path])

# 创建 DataFrame
df = pd.DataFrame(data, columns=["bird_name", "vocalization", "number", "path"])

# 保存 CSV
csv_path = f"{root_data_path}/train_meta_100.csv"
df.to_csv(csv_path, index=False, encoding="utf-8")
print(f"CSV 文件已保存至 {csv_path}")


CSV 文件已保存至 E:/AMR/DA/Projekt/data/data_list/0408/train_meta_100.csv


# 去重， 从train meta csv中删除有重复XC的行

In [4]:
import pandas as pd

# 📌 你的 CSV 文件路径
csv_path = f"{root_data_path}/train_meta_100.csv"
output_csv_path = f"{root_data_path}/train_meta_100_deduplicated.csv"

# ✅ 读取 CSV
df = pd.read_csv(csv_path)

# ✅ 提取 `XC` 编号
df["XC_ID"] = df["number"].apply(lambda x: x.split("_")[0] if "_" in x else x)

# ✅ **去重（保留第一次出现的 XC_ID）**
df_deduplicated = df.drop_duplicates(subset="XC_ID", keep="first")

# ✅ **删除 `XC_ID` 辅助列**
df_deduplicated = df_deduplicated.drop(columns=["XC_ID"])

# ✅ 保存去重后的 CSV
df_deduplicated.to_csv(output_csv_path, index=False, encoding="utf-8-sig")

print(f"✅ 去重完成！原始数据: {len(df)} 条 → 处理后: {len(df_deduplicated)} 条")
print(f"📄 结果已保存至: {output_csv_path}")


✅ 去重完成！原始数据: 10654 条 → 处理后: 9710 条
📄 结果已保存至: E:/AMR/DA/Projekt/data/data_list/0408/train_meta_100_deduplicated.csv


# 新版生成spec和mel，512 256的 -> all_data_meta.csv

In [5]:
import os
import numpy as np
import pandas as pd
import librosa
import cv2
import math
import cupy as cp
from cupyx.scipy import signal as cupy_signal
from tqdm import tqdm

class config:
    SEED = 2024
    DEVICE = 'cpu'
    GENERATE_STFT = False   # ✅ 是否生成 STFT 线性频谱图
    GENERATE_MEL = True    # ✅ 是否生成 mel 频谱图
    OUTPUT_DIR_SPEC = "E:/AMR/DA/Projekt/data/Audio_spec_paperstyle"
    OUTPUT_DIR_MEL = "E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256"
    FS = 48000
    N_FFT = 512
    WIN_SIZE = 512
    WIN_LAP = 384
    MIN_FREQ = 150
    MAX_FREQ = 15000
    SEGMENT_DURATION = 3
    SPEC_SIZE = (512, 256)

# ✅ 创建输出目录（按需）
if config.GENERATE_STFT:
    os.makedirs(config.OUTPUT_DIR_SPEC, exist_ok=True)
if config.GENERATE_MEL:
    os.makedirs(config.OUTPUT_DIR_MEL, exist_ok=True)

csv_path = f"{root_data_path}/train_meta_100_deduplicated.csv"
train_df = pd.read_csv(csv_path)

def oog2spec_via_cupy(audio_data):
    audio_data = cp.array(audio_data)
    mean_signal = cp.nanmean(audio_data)
    audio_data = cp.nan_to_num(audio_data, nan=mean_signal) if cp.isnan(audio_data).mean() < 1 else cp.zeros_like(audio_data)
    frequencies, times, spec_data = cupy_signal.spectrogram(
        audio_data,
        fs=config.FS,
        nfft=config.N_FFT,
        nperseg=config.WIN_SIZE,
        noverlap=config.WIN_LAP,
        window='hann'
    )
    valid_freq = (frequencies >= config.MIN_FREQ) & (frequencies <= config.MAX_FREQ)
    spec_data = spec_data[valid_freq, :]
    spec_data = cp.log10(spec_data + 1e-20)
    spec_data = spec_data - spec_data.min()
    spec_data = spec_data / spec_data.max()
    return spec_data.get()

def audio2mel(audio_data):
    mel_spec = librosa.feature.melspectrogram(
        y=audio_data,
        sr=config.FS,
        n_fft=config.N_FFT,
        hop_length=config.N_FFT - config.WIN_LAP,
        win_length=config.WIN_SIZE,
        window='hann',
        n_mels=config.SPEC_SIZE[1],
        fmin=config.MIN_FREQ,
        fmax=config.MAX_FREQ
    )
    mel_spec = np.log10(mel_spec + 1e-9)
    mel_spec = mel_spec - mel_spec.min()
    mel_spec = mel_spec / mel_spec.max()
    return mel_spec

all_data = []

for i, row in tqdm(train_df.iterrows(), total=len(train_df)):
    file_path = row["path"]
    bird_name = row["bird_name"]
    vocalization = row["vocalization"]
    number = row["number"]

    try:
        audio_data, _ = librosa.load(file_path, sr=config.FS)
    except Exception as e:
        print(f"加载失败: {file_path}, 错误: {e}")
        continue

    total_duration = len(audio_data) / config.FS
    num_segments = math.floor(total_duration / config.SEGMENT_DURATION)

    for seg_idx in range(num_segments):
        start_idx = seg_idx * config.SEGMENT_DURATION * config.FS
        end_idx = start_idx + config.SEGMENT_DURATION * config.FS
        segment_audio = audio_data[start_idx:end_idx]

        spec_filepath, mel_filepath = "", ""

        if config.GENERATE_STFT:
            spec_data = oog2spec_via_cupy(segment_audio)
            spec_data = cv2.resize(spec_data, config.SPEC_SIZE, interpolation=cv2.INTER_AREA)
            spec_filename = f"{number}_seg{seg_idx}.npy"
            spec_filepath = os.path.join(config.OUTPUT_DIR_SPEC, spec_filename)
            np.save(spec_filepath, spec_data.astype(np.float32))

        if config.GENERATE_MEL:
            mel_data = audio2mel(segment_audio)
            mel_data = cv2.resize(mel_data, config.SPEC_SIZE, interpolation=cv2.INTER_AREA)
            mel_filename = f"{number}_seg{seg_idx}_mel.npy"
            mel_filepath = os.path.join(config.OUTPUT_DIR_MEL, mel_filename)
            np.save(mel_filepath, mel_data.astype(np.float32))

        all_data.append([bird_name, vocalization, number, seg_idx, spec_filepath, mel_filepath])

# ✅ 保存元数据
meta_df = pd.DataFrame(all_data, columns=[
    "bird_name", "vocalization", "number", "segment_index", "spec_path", "mel_path"
])
meta_csv_path = f"{root_data_path}/all_data_meta.csv"
meta_df.to_csv(meta_csv_path, index=False, encoding="utf-8")

print(f"✅ 所有音频处理完成！")
if config.GENERATE_STFT:
    print(f"📁 线性频谱图存储于: {config.OUTPUT_DIR_SPEC}")
if config.GENERATE_MEL:
    print(f"📁 Mel 频谱图存储于: {config.OUTPUT_DIR_MEL}")
print(f"📝 元数据 CSV 文件已保存至: {meta_csv_path}")


  cupy._util.experimental('cupyx.jit.rawkernel')
  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
  audio_data, _ = librosa.load(file_path, sr=config.FS)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  mel_spec = mel_spec / mel_spec.max()
100%|██████████| 9710/9710 [52:28<00:00,  3.08it/s]  


✅ 所有音频处理完成！
📁 Mel 频谱图存储于: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256
📝 元数据 CSV 文件已保存至: E:/AMR/DA/Projekt/data/data_list/0408/all_data_meta.csv


# 整合freefield

In [None]:
import os
import shutil

# 配置参数
source_dir = "E:/AMR/DA/Projekt/data/freefield1010"  # 原始目录
target_dir = "E:/AMR/DA/Projekt/data/negative_audio"  # 目标目录

# 确保目标目录存在
os.makedirs(target_dir, exist_ok=True)

# 遍历原始目录及其子目录
for root, _, files in os.walk(source_dir):
    for file in files:
        if file.endswith(".wav"):  # 只处理 .wav 文件
            source_file = os.path.join(root, file)  # 源文件路径
            target_file = os.path.join(target_dir, file)  # 目标文件路径

            # 如果目标文件夹中没有这个文件，则复制
            if not os.path.exists(target_file):
                shutil.copy2(source_file, target_file)  # 使用 copy2 保留文件的原始元数据
                print(f"复制 {source_file} 到 {target_file}")
            else:
                print(f"跳过 {file}，目标文件已存在。")

print("音频文件传输完成！")


# 新版生成负样本spec和mel，并添加到all data meta 中

In [14]:
import os
import numpy as np
import pandas as pd
import librosa
import cv2
import math
import cupy as cp
from cupyx.scipy import signal as cupy_signal
from tqdm import tqdm

class config:
    SEED = 2024
    DEVICE = 'cpu'
    GENERATE_STFT = False  # ✅ 控制是否生成 STFT 频谱图
    GENERATE_MEL = True    # ✅ 控制是否生成 mel 频谱图
    OUTPUT_DIR_SPEC = "E:/AMR/DA/Projekt/data/Audio_spec_paperstyle"
    OUTPUT_DIR_MEL = "E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256"
    FS = 48000
    N_FFT = 512
    WIN_SIZE = 512
    WIN_LAP = 384
    MIN_FREQ = 150
    MAX_FREQ = 15000
    SEGMENT_DURATION = 3
    SPEC_SIZE = (512, 256)
    MAX_NEGATIVE_SAMPLES = 3000

# ✅ 创建输出目录
if config.GENERATE_STFT:
    os.makedirs(config.OUTPUT_DIR_SPEC, exist_ok=True)
if config.GENERATE_MEL:
    os.makedirs(config.OUTPUT_DIR_MEL, exist_ok=True)

def oog2spec_via_cupy(audio_data):
    if len(audio_data) == 0:
        return None
    audio_data = cp.array(audio_data)
    frequencies, times, spec_data = cupy_signal.spectrogram(
        audio_data,
        fs=config.FS,
        nfft=config.N_FFT,
        nperseg=config.WIN_SIZE,
        noverlap=config.WIN_LAP,
        window='hann'
    )
    valid_freq = (frequencies >= config.MIN_FREQ) & (frequencies <= config.MAX_FREQ)
    spec_data = spec_data[valid_freq, :]
    spec_data = cp.log10(spec_data + 1e-20)
    spec_data = (spec_data - spec_data.min()) / (spec_data.max() - spec_data.min() + 1e-9)
    return spec_data.get()

def audio2mel(audio_data):
    mel_spec = librosa.feature.melspectrogram(
        y=audio_data,
        sr=config.FS,
        n_fft=config.N_FFT,
        hop_length=config.N_FFT - config.WIN_LAP,
        win_length=config.WIN_SIZE,
        window='hann',
        n_mels=config.SPEC_SIZE[1],
        fmin=config.MIN_FREQ,
        fmax=config.MAX_FREQ
    )
    mel_spec = np.log10(mel_spec + 1e-9)
    mel_spec = (mel_spec - mel_spec.min()) / (mel_spec.max() - mel_spec.min() + 1e-9)
    return mel_spec

# === 负样本音频路径
negative_samples_dir = "E:/AMR/DA/Projekt/data/negative_audio"

negative_samples_processed = 0
negative_data = []

for root, _, files in os.walk(negative_samples_dir):
    for file in tqdm(files, desc="🔄 Processing Negative Audio Files"):
        if not file.endswith(".wav"):
            continue

        file_path = os.path.join(root, file)

        try:
            audio_data, _ = librosa.load(file_path, sr=config.FS)
            if len(audio_data) == 0:
                continue
        except Exception as e:
            print(f"❌ 加载失败: {file}, 错误: {e}")
            continue

        total_duration = len(audio_data) / config.FS
        num_segments = math.floor(total_duration / config.SEGMENT_DURATION)

        max_segments = 3  # ✅ 只保留 seg0～2
        for seg_idx in range(min(num_segments, max_segments)):
            start_idx = seg_idx * config.SEGMENT_DURATION * config.FS
            end_idx = start_idx + config.SEGMENT_DURATION * config.FS
            segment_audio = audio_data[int(start_idx):int(end_idx)]

            if len(segment_audio) < config.SEGMENT_DURATION * config.FS:
                pad_len = config.SEGMENT_DURATION * config.FS - len(segment_audio)
                segment_audio = np.pad(segment_audio, (0, pad_len), mode="constant")

            spec_path, mel_path = "", ""

            if config.GENERATE_STFT:
                spec_data = oog2spec_via_cupy(segment_audio)
                if spec_data is None:
                    continue
                spec_data = cv2.resize(spec_data, config.SPEC_SIZE, interpolation=cv2.INTER_AREA)
                spec_filename = f"negative_{file.split('.')[0]}_seg{seg_idx}.npy"
                spec_path = os.path.join(config.OUTPUT_DIR_SPEC, spec_filename)
                np.save(spec_path, spec_data.astype(np.float32))

            if config.GENERATE_MEL:
                mel_data = audio2mel(segment_audio)
                mel_data = cv2.resize(mel_data, config.SPEC_SIZE, interpolation=cv2.INTER_AREA)
                mel_filename = f"negative_{file.split('.')[0]}_seg{seg_idx}_mel.npy"
                mel_path = os.path.join(config.OUTPUT_DIR_MEL, mel_filename)
                np.save(mel_path, mel_data.astype(np.float32))

            negative_data.append([
                "Background Noise", "none",
                f"negative_{file.split('.')[0]}",
                seg_idx, spec_path, mel_path
            ])

            negative_samples_processed += 1
            if negative_samples_processed >= config.MAX_NEGATIVE_SAMPLES:
                print(f"🎯 达到最大负样本数 {config.MAX_NEGATIVE_SAMPLES}，停止处理。")
                break

        if negative_samples_processed >= config.MAX_NEGATIVE_SAMPLES:
            break
    if negative_samples_processed >= config.MAX_NEGATIVE_SAMPLES:
        break

# ✅ 合并 CSV
meta_csv_path = f"{root_data_path}/all_data_meta.csv"
new_meta_csv_path = f"{root_data_path}/all_data_meta_with_negative.csv"

if os.path.exists(meta_csv_path):
    original_df = pd.read_csv(meta_csv_path)
    negative_df = pd.DataFrame(negative_data, columns=["bird_name", "vocalization", "number", "segment_index", "spec_path", "mel_path"])
    combined_df = pd.concat([original_df, negative_df], ignore_index=True)
else:
    print("⚠️ 未找到原始 CSV，仅使用负样本生成新文件。")
    combined_df = pd.DataFrame(negative_data, columns=["bird_name", "vocalization", "number", "segment_index", "spec_path", "mel_path"])

combined_df.to_csv(new_meta_csv_path, index=False, encoding="utf-8-sig")

# ✅ 输出结果
print(f"\n✅ 负样本处理完成，生成样本数：{negative_samples_processed}")
if config.GENERATE_STFT:
    print(f"📁 STFT 频谱图已保存至: {config.OUTPUT_DIR_SPEC}")
if config.GENERATE_MEL:
    print(f"📁 Mel 频谱图已保存至: {config.OUTPUT_DIR_MEL}")
print(f"📝 合并后的元数据 CSV 保存至: {new_meta_csv_path}")


  mel_basis = filters.mel(sr=sr, n_fft=n_fft, **kwargs)
🔄 Processing Negative Audio Files:  13%|█▎        | 999/7690 [00:39<04:27, 25.00it/s]


🎯 达到最大负样本数 3000，停止处理。

✅ 负样本处理完成，生成样本数：3000
📁 Mel 频谱图已保存至: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256
📝 合并后的元数据 CSV 保存至: E:/AMR/DA/Projekt/data/data_list/0408/all_data_meta_with_negative.csv


# Spectogram.npy Filter

In [15]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm

# 📌 配置路径
data_csv_path = f"{root_data_path}/all_data_meta_with_negative.csv"
clean_data_csv_path = f"{root_data_path}/all_data_meta_clean.csv"
blacklist_path = f"{root_data_path}/blacklist.txt"

# ✅ 控制检测类型
CHECK_SPEC = False   # 是否检测 spec_path（STFT 图）
CHECK_MEL = True     # 是否检测 mel_path（Mel 图）

# ✅ 加载黑名单
def load_blacklist():
    if os.path.exists(blacklist_path):
        with open(blacklist_path, "r") as f:
            return set(line.strip() for line in f.readlines())
    return set()

# ✅ 保存黑名单
def save_blacklist(blacklist):
    with open(blacklist_path, "w") as f:
        for file in sorted(blacklist):
            f.write(file + "\n")
    print(f"✅ 黑名单已更新，共 {len(blacklist)} 个无效文件")

# ✅ 查找无效 npy 文件
def find_invalid_npy_files(df, blacklist):
    invalid_files = set(blacklist)

    print("🔍 开始检查频谱文件有效性 ...")
    for _, row in tqdm(df.iterrows(), total=len(df), desc="扫描无效数据"):
        path_keys = []
        if CHECK_SPEC:
            path_keys.append("spec_path")
        if CHECK_MEL:
            path_keys.append("mel_path")

        for path_key in path_keys:
            file_path = row.get(path_key, "")
            if not isinstance(file_path, str) or file_path.strip() == "":
                continue  # 忽略空路径

            if file_path in invalid_files:
                continue

            if not os.path.exists(file_path):
                print(f"❌ 文件不存在: {file_path}")
                invalid_files.add(file_path)
                continue

            try:
                arr = np.load(file_path)
                if np.isnan(arr).any() or np.isinf(arr).any():
                    print(f"❌ 发现 NaN/Inf: {file_path}")
                    invalid_files.add(file_path)
            except Exception as e:
                print(f"⚠️ 读取失败: {file_path}, 错误: {e}")
                invalid_files.add(file_path)

    return invalid_files

# ✅ 主流程
df = pd.read_csv(data_csv_path)
blacklist = load_blacklist()
invalid_files = find_invalid_npy_files(df, blacklist)
save_blacklist(invalid_files)

# ✅ 构造过滤条件
mask = pd.Series([False] * len(df))
if CHECK_SPEC:
    mask |= df["spec_path"].isin(invalid_files)
if CHECK_MEL:
    mask |= df["mel_path"].isin(invalid_files)

# ✅ 清洗数据并保存
df_clean = df[~mask]
df_clean.to_csv(clean_data_csv_path, index=False, encoding="utf-8")
print(f"\n✅ 已过滤无效数据，生成 {clean_data_csv_path}（保留 {len(df_clean)} 条数据）")


🔍 开始检查频谱文件有效性 ...


扫描无效数据:  18%|█▊        | 32754/187052 [04:06<19:49, 129.76it/s] 

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC572645_seg19_mel.npy


扫描无效数据:  35%|███▍      | 64614/187052 [07:52<15:03, 135.49it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC732164_seg142_mel.npy


扫描无效数据:  37%|███▋      | 68410/187052 [08:20<14:50, 133.29it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC571221_seg19_mel.npy


扫描无效数据:  40%|████      | 74956/187052 [09:11<13:29, 138.50it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC392790_seg7_mel.npy


扫描无效数据:  43%|████▎     | 79742/187052 [09:49<14:00, 127.65it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC680315_seg17_mel.npy


扫描无效数据:  53%|█████▎    | 99248/187052 [12:18<10:44, 136.27it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC782924_seg59_mel.npy


扫描无效数据:  81%|████████  | 151113/187052 [18:58<04:49, 124.10it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591233_seg2_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591233_seg3_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591233_seg6_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591233_seg7_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591233_seg8_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591238_seg2_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591238_seg3_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591238_seg4_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591238_seg5_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591238_seg7_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591238_seg8_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591238_seg9_mel.npy
❌ 发现 NaN/Inf: E:

扫描无效数据:  81%|████████  | 151140/187052 [18:58<04:43, 126.77it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591245_seg7_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591245_seg8_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591245_seg11_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591250_seg2_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591250_seg3_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591250_seg6_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591250_seg7_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591250_seg10_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591250_seg11_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591250_seg14_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591250_seg15_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591250_seg18_mel.npy
❌ 发现 NaN/I

扫描无效数据:  81%|████████  | 151168/187052 [18:58<04:38, 128.68it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591254_seg1_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591254_seg5_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591254_seg10_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591256_seg2_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591256_seg3_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591256_seg4_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591256_seg7_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591256_seg8_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591256_seg9_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591256_seg10_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591263_seg2_mel.npy


扫描无效数据:  81%|████████  | 151210/187052 [18:59<04:29, 132.98it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591263_seg5_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591263_seg6_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591263_seg9_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591272_seg2_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591272_seg3_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591272_seg6_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591272_seg7_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591272_seg10_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591272_seg11_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591272_seg12_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591272_seg13_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591272_seg16_mel.npy
❌ 发现 NaN/In

扫描无效数据:  81%|████████  | 151238/187052 [18:59<04:26, 134.57it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591289_seg2_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591289_seg3_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591289_seg4_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591289_seg5_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591289_seg8_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591289_seg9_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591289_seg10_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591289_seg11_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC591289_seg12_mel.npy


扫描无效数据:  83%|████████▎ | 154382/187052 [19:22<04:29, 121.33it/s]

❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC862345_seg27_mel.npy
❌ 发现 NaN/Inf: E:/AMR/DA/Projekt/data/Audio_spec_mel_512_256\XC862345_seg28_mel.npy


扫描无效数据: 100%|██████████| 187052/187052 [23:29<00:00, 132.73it/s]


✅ 黑名单已更新，共 72 个无效文件

✅ 已过滤无效数据，生成 E:/AMR/DA/Projekt/data/data_list/0408/all_data_meta_clean.csv（保留 186980 条数据）


# 修改标签

# ------------------------------------------------------------
# 在此之前的代码一般只用执行一次

# Bird Target filter -> all_data_meta_allTypes.csv

In [3]:
import pandas as pd
import os
# 📌 文件路径
input_csv_path = f"{root_data_path}/all_data_meta_clean.csv"  # 经过 NaN/Inf 过滤和标签修正的文件
output_csv_path = f"{root_data_path}/all_data_meta_allTypes.csv"  # 目标鸟类数据

selected_birds = [
    "Black-headed Gull",
    "Canada Goose",
    "Carrion Crow",
    "Common Blackbird",
    "Common Chaffinch",
    "Common Kingfisher",
    "Common Redstart",
    "Dunnock",
    "Eurasian Blackcap",
    "Eurasian Blue Tit",
    "Eurasian Bullfinch",
    "Eurasian Coot",
    "Eurasian Golden Oriole",
    "Eurasian Jay",
    "Eurasian Nuthatch",
    "Eurasian Siskin",
    "Eurasian Treecreeper",
    "Eurasian Wren",
    "European Goldfinch",
    "European Robin",
    "Goldcrest",
    "Great Spotted Woodpecker",
    "Great Tit",
    "Hawfinch",
    "Hooded Crow",
    "Long-tailed Tit",
    "Mallard",
    "Marsh Tit",
    "Redwing",
    "Rook",
    "Short-toed Treecreeper",
    "Stock Dove",
    "Background Noise",
]

# 🎯 vocalization 过滤条件（根据你的实际需求修改）
selected_vocalization = ["Call", "Song", "Alarm call", "Flight call", "Begging call", "none"]  # 假设你只想保留 vocalization 为 'call' 或 'song' 的样本

# 读取数据
df = pd.read_csv(input_csv_path)
print(f"📊 过滤前数据: {len(df)} 条")

# 只保留目标鸟类
df_filtered = df[df["bird_name"].isin(selected_birds)]

# 只保留符合 vocalization 条件的数据
df_filtered = df_filtered[df_filtered["vocalization"].isin(selected_vocalization)]

# 保存筛选后的数据
df_filtered.to_csv(output_csv_path, index=False, encoding="utf-8")

print(f"✅ 目标鸟类和 vocalization 筛选完成，生成 {output_csv_path}（保留 {len(df_filtered)} 条数据）")


📊 过滤前数据: 186980 条
✅ 目标鸟类和 vocalization 筛选完成，生成 E:/AMR/DA/Projekt/data/data_list/0408/all_data_meta_allTypes.csv（保留 186980 条数据）


# 通过birdnet调整label

# split train and valid

In [None]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split

# 📌 文件路径
data_csv_path = f"{root_data_path}/all_data_meta_allTypes.csv"
train_csv_path = f"{root_data_path}/train_list.csv"
test_csv_path = f"{root_data_path}/valid_list.csv"

# 读取数据
df = pd.read_csv(data_csv_path)
print(f"📊 原始数据: {len(df)} 条")

# ✅ **定义音频编号解析函数**
def get_audio_id(number, bird_name):
    if bird_name == "Background Noise":
        return number
    return number.split("_")[0] if "_" in number else number

# 计算 `audio_id`
df["audio_id"] = df.apply(lambda row: get_audio_id(row["number"], row["bird_name"]), axis=1)

# 🚀 拆分为普通样本和负样本
neg_samples = df[df["bird_name"] == "Background Noise"].copy()
pos_samples = df[df["bird_name"] != "Background Noise"].copy()

# ✅ 按 audio_id 进行训练/测试划分（20% 测试集）
unique_audio_ids = pos_samples["audio_id"].unique()
train_audio_ids, test_audio_ids = train_test_split(unique_audio_ids, test_size=0.2, random_state=2024, shuffle=True)

train_df = pos_samples[pos_samples["audio_id"].isin(train_audio_ids)]
test_df = pos_samples[pos_samples["audio_id"].isin(test_audio_ids)]

# ✅ 限制测试集每类不超过 500 个样本，剩下的回流到训练集（排除碎片泄露）
test_limited_df = []
train_remainder_df = []
test_audio_ids_set = set(test_df["audio_id"])

for bird_name in test_df["bird_name"].unique():
    class_samples = test_df[test_df["bird_name"] == bird_name]

    if len(class_samples) > 500:
        test_limited = class_samples.sample(n=500, random_state=2024)
        remainder = class_samples.drop(test_limited.index)

        # ❗ 移除与 test_limited 相同 audio_id 的碎片，避免回流污染
        remainder = remainder[~remainder["audio_id"].isin(test_limited["audio_id"])]
        train_remainder = remainder
    else:
        test_limited = class_samples
        train_remainder = pd.DataFrame()

    test_limited_df.append(test_limited)
    train_remainder_df.append(train_remainder)

# 合并测试集 & 安全回流训练集
test_df = pd.concat(test_limited_df, ignore_index=True)
train_df = pd.concat([train_df] + train_remainder_df, ignore_index=True)

# ✅ 背景噪声划分
neg_audio_ids = neg_samples["audio_id"].unique()
print("\n🔍 负样本 `audio_id` 统计：")
print(pd.Series(neg_audio_ids).value_counts())

if len(neg_audio_ids) >= 2:
    train_neg_ids, test_neg_ids = train_test_split(neg_audio_ids, test_size=0.2, random_state=2024, shuffle=True)
    train_neg_df = neg_samples[neg_samples["audio_id"].isin(train_neg_ids)]
    test_neg_df = neg_samples[neg_samples["audio_id"].isin(test_neg_ids)]
else:
    print(f"⚠️ 负样本数量不足 ({len(neg_audio_ids)} 个)，全部放入训练集！")
    train_neg_df = neg_samples
    test_neg_df = pd.DataFrame(columns=df.columns)

# ✅ 合并训练 & 测试集
train_df = pd.concat([train_df, train_neg_df], ignore_index=True)
test_df = pd.concat([test_df, test_neg_df], ignore_index=True)

# ✅ 删除辅助列 audio_id
train_df.drop(columns=["audio_id"], inplace=True)
test_df.drop(columns=["audio_id"], inplace=True)

# ✅ 保存 CSV 文件
os.makedirs(os.path.dirname(train_csv_path), exist_ok=True)
train_df.to_csv(train_csv_path, index=False, encoding="utf-8")
test_df.to_csv(test_csv_path, index=False, encoding="utf-8")

print(f"✅ 训练集已保存至: {train_csv_path}, 样本数: {len(train_df)}")
print(f"✅ 测试集已保存至: {test_csv_path}, 样本数: {len(test_df)}")


# train and valid distribution

In [None]:
import pandas as pd
train_df = pd.read_csv("E:/AMR/DA/Projekt/data/train_list_for_zoom006_0315.csv")
valid_df = pd.read_csv("E:/AMR/DA/Projekt/data/valid_list_for_zoom006_0315.csv")

print("训练集类别分布:")
print(train_df["bird_name"].value_counts())

print("\n测试集类别分布:")
print(valid_df["bird_name"].value_counts())

train_df_refine = pd.read_csv("E:/AMR/DA/Projekt/data/train_list_for_zoom006_100_nofreefiled_refine_th12.csv")
valid_df_refine = pd.read_csv("E:/AMR/DA/Projekt/data/valid_list_for_zoom006_100_nofreefiled_refine_th12.csv")

print("训练集类别分布refine:")
print(train_df_refine["bird_name"].value_counts())
print("\n测试集类别分布refine:")
print(valid_df_refine["bird_name"].value_counts())
