Este agarra los audios en wav/, los procesa, y carga los spectrograms finales a spect/

In [2]:
import os, sys
import pandas as pd
import librosa as lbrs
import numpy as np
import noisereduce as nr
from PIL import Image


sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from utils import util

In [None]:
def load_spectrograms(segments_df, segments_dir, spectrogram_dir, output_csv_path,
                    test_audios_dir=None, sr=32000, mels=224, hoplen=512, nfft=2048):
    '''
    Creates spectrograms from pre-extracted audio segments.
    
    Args:
        segments_df (pd.DataFrame): DataFrame with 'filename' and 'class_id' for segments.
        segments_dir (str): Directory containing audio segment .wav files.
        spectrogram_dir (str): Directory to save generated spectrogram .png files.
        output_csv_path (str): Path to save the output CSV mapping filename to class_id.
        test_audios_dir (str, optional): Directory to copy some segments for testing.
        sr (int): Target sampling rate. Defaults to 32000.
        mels (int): Number of mel bands. Defaults to 224.
        hoplen (int): Hop length for spectrogram. Defaults to 512.
        nfft (int): FFT window size. Defaults to 2048.
    '''

    os.makedirs(spectrogram_dir, exist_ok=True)
    spectrogram_records = []
    saved_test_audios = 0
    
    print(f"Creating spectrograms for {len(segments_df)} segments...")
    
    for _, row in segments_df.iterrows():
        segment_filename = row['filename']
        class_id = row['class_id']
        segment_path = os.path.join(segments_dir, segment_filename)
        
        try:
            # Load the segment
            y, srate = util.lbrs_loading(segment_path, sr=sr, mono=True)
            
            # Create spectrogram image
            img, spec_path, spec_name = util.get_spec_image(y, sr=srate, mels=mels, hoplen=hoplen, nfft=nfft,
                                                            filename=segment_filename, start=0, 
                                                            spectrogram_dir=spectrogram_dir)
            Image.fromarray(img).save(spec_path)
            
            # Save some test audios if requested
            if test_audios_dir and saved_test_audios < 10:
                import shutil
                test_audio_path = os.path.join(test_audios_dir, f"test_{saved_test_audios:02d}_{segment_filename}")
                shutil.copy2(segment_path, test_audio_path)
                saved_test_audios += 1
            
            spectrogram_records.append({
                'filename': spec_name,
                'class_id': class_id
            })
            
        except Exception as e:
            print(f"Error processing segment {segment_filename}: {e}")
            continue
    
    # Save the final CSV
    final_df = pd.DataFrame(spectrogram_records)
    final_df.to_csv(output_csv_path, index=False)
    
    print(f"Spectrogram generation complete!")
    print(f"Total spectrograms created: {len(final_df)}")
    print(f"Output saved to: {output_csv_path}")
    
    return final_df

In [4]:
# Define Paths
audios_dir = os.path.join('..', 'database', 'audio', 'dev')
dev_data = pd.read_csv(os.path.join('..', 'database', 'meta', 'dev_data.csv'))

spect_dir = os.path.join('..', 'database', 'spect')
spect2_dir = os.path.join('..', 'database', 'spect2')

output_csv = os.path.join('..', 'database', 'meta', 'final_spects.csv')
output_csv2 = os.path.join('..', 'database', 'meta', 'final_spects2.csv')

test_audios_dir = os.path.join('..', 'database', 'test_audios')
test_audios2_dir = os.path.join('..', 'database', 'test_audios2')

# Solo los que estan en dev/ para no encontrar errores
dev_df = dev_data

In [5]:
util.clean_dir(spect_dir)
util.clean_dir(test_audios_dir)

print("Generating spectrograms from audio segments...")
load_spectrograms(dev_df, audios_dir, spect_dir, output_csv, test_audios_dir=test_audios_dir, noise_reduce=True)

Resetting ..\database\spect directory...
Resetting ..\database\test_audios directory...
Generating spectrograms from audio segments...
Total segments removed due to low RMS: 419


Aca veo los tamaños

In [None]:
import random
amount = 20

# List all PNG files in spect_dir
spect_files = [f for f in os.listdir(spect_dir) if f.endswith('.png')]

# Fetch 5 random images
random_files = random.sample(spect_files, min(amount, len(spect_files)))

for fname in random_files:
    img_path = os.path.join(spect_dir, fname)
    img = Image.open(img_path)
    print(f"{fname}: {img.size}")

XC616305_160000.png: (313, 224)
XC214240_0.png: (313, 224)
XC390971_320000.png: (313, 224)
XC48747_160000.png: (313, 224)
XC288823_960000.png: (313, 224)
XC394268_0.png: (313, 224)
XC396786_320000.png: (313, 224)
XC173759_640000.png: (313, 224)
XC112712_0.png: (313, 224)
XC283328_160000.png: (313, 224)
XC503247_480000.png: (313, 224)
XC476264_0.png: (313, 224)
XC492619_320000.png: (313, 224)
XC48762_1120000.png: (313, 224)
XC51046_320000.png: (313, 224)
XC139503_160000.png: (313, 224)
XC204203_640000.png: (313, 224)
XC361108_800000.png: (313, 224)
XC32322_0.png: (313, 224)
XC51744_1280000.png: (313, 224)
