In [None]:
import os
import pandas as pd
import numpy as np
from PIL import Image
from tqdm import tqdm
import librosa
import matplotlib.pyplot as plt
import matplotlib.cm as cm

def save_mel_spectrograms(audio_dir, metadata_csv, output_dir,
                          target_sr=32000, n_mels=128, fmax=None,
                          colormap='viridis'):
    # Load mapping
    metadata = pd.read_csv(metadata_csv)
    label_to_name = (
        metadata[["primary_label", "common_name"]]
        .drop_duplicates()
        .set_index("primary_label")["common_name"]
        .to_dict()
    )

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cmap = cm.get_cmap(colormap)

    # Iterate over folders (primary labels)
    for label_folder in tqdm(os.listdir(audio_dir), desc="Processing labels"):
        folder_path = os.path.join(audio_dir, label_folder)
        if not os.path.isdir(folder_path):
            continue

        common_name = label_to_name.get(label_folder, label_folder)
        common_folder = os.path.join(output_dir, common_name)
        os.makedirs(common_folder, exist_ok=True)

        for fname in os.listdir(folder_path):
            if not fname.lower().endswith(".ogg"):
                continue

            file_path = os.path.join(folder_path, fname)
            # Load audio with librosa
            waveform, sr = librosa.load(file_path, sr=target_sr, mono=True)

            # Compute mel spectrogram
            mel_spec = librosa.feature.melspectrogram(
                y=waveform,
                sr=target_sr,
                n_mels=n_mels,
                fmax=fmax
            )

            # Convert to dB
            mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

            # Normalize to 0–1
            mel_spec_db -= mel_spec_db.min()
            mel_spec_db /= mel_spec_db.max()

            # Apply colormap
            mel_spec_color = (cmap(mel_spec_db)[:, :, :3] * 255).astype(np.uint8)

            # Save as PNG
            img = Image.fromarray(mel_spec_color)
            out_name = os.path.splitext(fname)[0] + ".png"
            img.save(os.path.join(common_folder, out_name))


In [None]:
audio_dir = "/mnt/Stuff/phd_projects/esp32-projects/bird_call_id/train_audio"
metadata_csv = "/mnt/Stuff/phd_projects/esp32-projects/bird_call_id/train_metadata.csv"
spectrogram_dir = "/mnt/Stuff/phd_projects/esp32-projects/bird_call_id/spec_dir_color"

# Step 1: Preprocess & save spectrograms
save_mel_spectrograms(audio_dir, metadata_csv, spectrogram_dir)

In [None]:
import os
import shutil
import random
from pathlib import Path

# Paths
DATASET_DIR = Path("spec_dir_color")
OUTPUT_DIR = Path("splitted_dataset_color")
TRAIN_DIR = OUTPUT_DIR / "train"
TEST_DIR = OUTPUT_DIR / "test"

# Split ratio
test_ratio = 0.2
random.seed(42)

# Create output directories
for split_dir in [TRAIN_DIR, TEST_DIR]:
    split_dir.mkdir(parents=True, exist_ok=True)

# Iterate over each label folder
for label_dir in DATASET_DIR.iterdir():
    if label_dir.is_dir():
        label = label_dir.name
        files = list(label_dir.glob("*.png"))
        random.shuffle(files)

        # Calculate split index
        split_idx = int(len(files) * (1 - test_ratio))
        train_files = files[:split_idx]
        test_files = files[split_idx:]

        # Create label subdirectories
        (TRAIN_DIR / label).mkdir(exist_ok=True)
        (TEST_DIR / label).mkdir(exist_ok=True)

        # Copy files
        for f in train_files:
            shutil.copy(f, TRAIN_DIR / label / f.name)

        for f in test_files:
            shutil.copy(f, TEST_DIR / label / f.name)

print("✅ Done splitting dataset.")