In [1]:
import os
import pandas as pd
import librosa
# ... other imports ...
import gc # Add this line

In [2]:
%pip install ipywidgets

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
%pip install noisereduce

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Cell 1: Imports and Configuration
This cell imports all necessary libraries and sets up the configuration variables for our processing task.

In [4]:
import os
import pandas as pd
import librosa
import librosa.display
import numpy as np
import noisereduce as nr
import matplotlib.pyplot as plt
from tqdm.notebook  import tqdm

# --- Configuration ---
# Define paths
METADATA_PATH = '../data/birds/metadata.csv'
OUTPUT_SPECTROGRAM_DIR = '../data/birds/spectrograms/'

# Audio processing settings
SAMPLE_RATE = 32000  # Sample rate for audio processing
DURATION = 5         # Duration to process in seconds
N_MELS = 128         # Number of Mel bands
FMIN = 20            # Minimum frequency
FMAX = 16000         # Maximum frequency

# Ensure the output directory exists
os.makedirs(OUTPUT_SPECTROGRAM_DIR, exist_ok=True)

print("Configuration and libraries are ready.")

Configuration and libraries are ready.


Cell 2: Spectrogram Creation Function
This cell defines the main function that will perform the conversion for a single audio file.

In [5]:
def create_spectrogram(audio_path, output_path):
    """
    Loads an audio file, processes it, and saves its Mel spectrogram as an image.
    """
    try:
        # 1. Load the audio file, resampling to our standard sample rate
        y, sr = librosa.load(audio_path, sr=SAMPLE_RATE)

        # 2. Trim or pad audio to the target duration
        target_length = DURATION * sr
        if len(y) > target_length:
            y = y[:target_length]
        else:
            y = np.pad(y, (0, target_length - len(y)), 'constant')

        # 3. Reduce background noise
        y_reduced = nr.reduce_noise(y=y, sr=sr, stationary=True, prop_decrease=0.85)

        # 4. Generate Mel spectrogram
        S = librosa.feature.melspectrogram(
            y=y_reduced,
            sr=sr,
            n_mels=N_MELS,
            fmin=FMIN,
            fmax=FMAX
        )
        S_dB = librosa.power_to_db(S, ref=np.max)

        # 5. Save the spectrogram image without any borders or axes
        fig = plt.figure(figsize=[1, 1])
        ax = fig.add_subplot(111)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        ax.set_frame_on(False)
        librosa.display.specshow(S_dB, sr=sr, fmin=FMIN, fmax=FMAX, ax=ax)
        plt.savefig(output_path, dpi=300, bbox_inches='tight', pad_inches=0)
        plt.close(fig)

    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
print("Spectrogram creation function is defined.")

Spectrogram creation function is defined.


Cell 3: Main Processing Loop
This is the final cell. Running this will start the main process. It reads your metadata.csv and loops through every file, calling the function from Cell 2 on each one.

In [6]:
# Load metadata
metadata = pd.read_csv(METADATA_PATH)

# Use tqdm for a progress bar within the notebook
for index, row in tqdm(metadata.iterrows(), total=metadata.shape[0], desc="Processing audio files"):
    
    # This line is correct
    audio_file_path = '../' + row['file_path'].replace('\\', '/')
    
    species_name = row['species_name']
    
    # Create a unique filename for the spectrogram
    base_filename = os.path.splitext(os.path.basename(audio_file_path))[0]
    output_filename = f"{species_name}_{base_filename}.png"
    
    # THIS IS THE CORRECTED LINE:
    output_spectrogram_path = os.path.join(OUTPUT_SPECTROGRAM_DIR, output_filename)
    
    # Create spectrogram only if it doesn't already exist
    if not os.path.exists(output_spectrogram_path):
        create_spectrogram(audio_file_path, output_spectrogram_path)
            
    # Force Python to clean up memory after each iteration
    gc.collect()

print(f"\nPreprocessing complete. Spectrograms saved in '{OUTPUT_SPECTROGRAM_DIR}'.")

Processing audio files:   0%|          | 0/24459 [00:00<?, ?it/s]


Preprocessing complete. Spectrograms saved in '../data/birds/spectrograms/'.


In [None]:
import tensorflow as tf
from tensorflow.python.platform import build_info

print("TF Version:", tf.__version__)
print("CUDA Version:", build_info.build_info['cuda_version'])
print("cuDNN Version:", build_info.build_info['cudnn_version'])
