In [7]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

def convert_wav_to_log_mel(root_path, output_root, n_mels=128, sr=16000, fmax=8000):
    # Ensure the output root directory exists
    os.makedirs(output_root, exist_ok=True)

    # Walk through all subdirectories in root_path
    for subdir, _, files in os.walk(root_path):
        relative_subdir = os.path.relpath(subdir, root_path)  # Get relative path
        output_subdir = os.path.join(output_root, relative_subdir)  # New output subdir

        # Ensure corresponding output directory exists
        os.makedirs(output_subdir, exist_ok=True)

        for file in files:
            if file.endswith('.wav'):
                input_path = os.path.join(subdir, file)
                output_path = os.path.join(output_subdir, file.replace('.wav', '.png'))  # Save as PNG

                try:
                    # Load audio while preserving the original sampling rate
                    y, sr = librosa.load(input_path, sr=sr)

                    # Compute log Mel spectrogram
                    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
                    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

                    # Determine figure size based on audio duration
                    duration = librosa.get_duration(y=y, sr=sr)
                    fig_width = max(10, duration / 2)  # Scale width based on duration

                    # Save the spectrogram as an image
                    plt.figure(figsize=(fig_width, 4), dpi=300)
                    librosa.display.specshow(log_mel_spec, sr=sr, x_axis='time', y_axis='mel')
                    plt.colorbar(format='%+2.0f dB')
                    plt.title(f'Log Mel Spectrogram - {file}')
                    plt.axis("off")  # Hide axes to save space

                    # Adjust layout and add padding
                    plt.tight_layout(pad=2.0)  # Add extra padding around the plot

                    # Save the spectrogram with adjusted margins to prevent clipping
                    plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)  # Slightly increased padding
                    plt.close()

                    print(f"✔ Converted: {input_path} → {output_path})")

                except Exception as e:
                    print(f"❌ Error processing {input_path}: {e}")

# Example usage
root_path = "test"  # Root directory containing WAV subfolders
output_root = "test_spectrogram"  # New root directory for spectrograms
convert_wav_to_log_mel(root_path, output_root)

✔ Converted: test\air_conditioner_test\100852-0-0-11.wav → test_spectrogram\air_conditioner_test\100852-0-0-11.png)
✔ Converted: test\air_conditioner_test\100852-0-0-22.wav → test_spectrogram\air_conditioner_test\100852-0-0-22.png)
✔ Converted: test\air_conditioner_test\101729-0-0-26.wav → test_spectrogram\air_conditioner_test\101729-0-0-26.png)
✔ Converted: test\air_conditioner_test\101729-0-0-40.wav → test_spectrogram\air_conditioner_test\101729-0-0-40.png)
✔ Converted: test\air_conditioner_test\121285-0-0-8.wav → test_spectrogram\air_conditioner_test\121285-0-0-8.png)
✔ Converted: test\air_conditioner_test\13230-0-0-5.wav → test_spectrogram\air_conditioner_test\13230-0-0-5.png)
✔ Converted: test\air_conditioner_test\146690-0-0-115.wav → test_spectrogram\air_conditioner_test\146690-0-0-115.png)
✔ Converted: test\air_conditioner_test\146690-0-0-80.wav → test_spectrogram\air_conditioner_test\146690-0-0-80.png)
✔ Converted: test\air_conditioner_test\146690-0-0-93.wav → test_spectrogram\

In [8]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt

def convert_wav_to_log_mel(root_path, output_root, n_mels=128, sr=16000, fmax=8000):
    # Ensure the output root directory exists
    os.makedirs(output_root, exist_ok=True)

    # Walk through all subdirectories in root_path
    for subdir, _, files in os.walk(root_path):
        relative_subdir = os.path.relpath(subdir, root_path)  # Get relative path
        output_subdir = os.path.join(output_root, relative_subdir)  # New output subdir

        # Ensure corresponding output directory exists
        os.makedirs(output_subdir, exist_ok=True)

        for file in files:
            if file.endswith('.wav'):
                input_path = os.path.join(subdir, file)
                output_path = os.path.join(output_subdir, file.replace('.wav', '.png'))  # Save as PNG

                try:
                    # Load audio while preserving the original sampling rate
                    y, sr = librosa.load(input_path, sr=sr)

                    # Compute log Mel spectrogram
                    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
                    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

                    # Determine figure size based on audio duration
                    duration = librosa.get_duration(y=y, sr=sr)
                    fig_width = max(10, duration / 2)  # Scale width based on duration

                    # Save the spectrogram as an image
                    plt.figure(figsize=(fig_width, 4), dpi=300)
                    librosa.display.specshow(log_mel_spec, sr=sr, x_axis='time', y_axis='mel')
                    plt.colorbar(format='%+2.0f dB')
                    plt.title(f'Log Mel Spectrogram - {file}')
                    plt.axis("off")  # Hide axes to save space

                    # Adjust layout and add padding
                    plt.tight_layout(pad=2.0)  # Add extra padding around the plot

                    # Save the spectrogram with adjusted margins to prevent clipping
                    plt.savefig(output_path, bbox_inches='tight', pad_inches=0.1)  # Slightly increased padding
                    plt.close()

                    print(f"✔ Converted: {input_path} → {output_path})")

                except Exception as e:
                    print(f"❌ Error processing {input_path}: {e}")

# Example usage
root_path = "train_val"  # Root directory containing WAV subfolders
output_root = "train_val_spectrogram"  # New root directory for spectrograms
convert_wav_to_log_mel(root_path, output_root)

✔ Converted: train_val\air_conditioner_train_val\100852-0-0-10.wav → train_val_spectrogram\air_conditioner_train_val\100852-0-0-10.png)
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-15.wav → train_val_spectrogram\air_conditioner_train_val\100852-0-0-15.png)
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-19.wav → train_val_spectrogram\air_conditioner_train_val\100852-0-0-19.png)
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-2.wav → train_val_spectrogram\air_conditioner_train_val\100852-0-0-2.png)
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-20.wav → train_val_spectrogram\air_conditioner_train_val\100852-0-0-20.png)
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-21.wav → train_val_spectrogram\air_conditioner_train_val\100852-0-0-21.png)
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-23.wav → train_val_spectrogram\air_conditioner_train_val\100852-0-0-23.png)
✔ Converted: train_val\air_conditioner_train_val\1

In [9]:
import os
import librosa
import numpy as np

def convert_wav_to_log_mel(root_path, output_root, n_mels=128, sr=16000, fmax=8000):
    # Ensure the output root directory exists
    os.makedirs(output_root, exist_ok=True)

    # Walk through all subdirectories in root_path
    for subdir, _, files in os.walk(root_path):
        relative_subdir = os.path.relpath(subdir, root_path)  # Get relative path
        output_subdir = os.path.join(output_root, relative_subdir)  # New output subdir

        # Ensure corresponding output directory exists
        os.makedirs(output_subdir, exist_ok=True)

        for file in files:
            if file.endswith('.wav'):
                input_path = os.path.join(subdir, file)
                output_path = os.path.join(output_subdir, file.replace('.wav', '.npy'))  # Save as .npy

                try:
                    # Load audio while preserving the original sampling rate
                    y, sr = librosa.load(input_path, sr=sr)

                    # Compute log Mel spectrogram
                    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
                    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

                    # Save the log Mel spectrogram as a .npy file
                    np.save(output_path, log_mel_spec)

                    print(f"✔ Converted: {input_path} → {output_path}")

                except Exception as e:
                    print(f"❌ Error processing {input_path}: {e}")

# Example usage
root_path = "test"  # Root directory containing WAV subfolders
output_root = "test_spectrogram_npy"  # New root directory for spectrograms
convert_wav_to_log_mel(root_path, output_root)

✔ Converted: test\air_conditioner_test\100852-0-0-11.wav → test_spectrogram_npy\air_conditioner_test\100852-0-0-11.npy
✔ Converted: test\air_conditioner_test\100852-0-0-22.wav → test_spectrogram_npy\air_conditioner_test\100852-0-0-22.npy
✔ Converted: test\air_conditioner_test\101729-0-0-26.wav → test_spectrogram_npy\air_conditioner_test\101729-0-0-26.npy
✔ Converted: test\air_conditioner_test\101729-0-0-40.wav → test_spectrogram_npy\air_conditioner_test\101729-0-0-40.npy
✔ Converted: test\air_conditioner_test\121285-0-0-8.wav → test_spectrogram_npy\air_conditioner_test\121285-0-0-8.npy
✔ Converted: test\air_conditioner_test\13230-0-0-5.wav → test_spectrogram_npy\air_conditioner_test\13230-0-0-5.npy
✔ Converted: test\air_conditioner_test\146690-0-0-115.wav → test_spectrogram_npy\air_conditioner_test\146690-0-0-115.npy
✔ Converted: test\air_conditioner_test\146690-0-0-80.wav → test_spectrogram_npy\air_conditioner_test\146690-0-0-80.npy
✔ Converted: test\air_conditioner_test\146690-0-0-93

In [10]:
import os
import librosa
import numpy as np

def convert_wav_to_log_mel(root_path, output_root, n_mels=128, sr=16000, fmax=8000):
    # Ensure the output root directory exists
    os.makedirs(output_root, exist_ok=True)

    # Walk through all subdirectories in root_path
    for subdir, _, files in os.walk(root_path):
        relative_subdir = os.path.relpath(subdir, root_path)  # Get relative path
        output_subdir = os.path.join(output_root, relative_subdir)  # New output subdir

        # Ensure corresponding output directory exists
        os.makedirs(output_subdir, exist_ok=True)

        for file in files:
            if file.endswith('.wav'):
                input_path = os.path.join(subdir, file)
                output_path = os.path.join(output_subdir, file.replace('.wav', '.npy'))  # Save as .npy

                try:
                    # Load audio while preserving the original sampling rate
                    y, sr = librosa.load(input_path, sr=sr)

                    # Compute log Mel spectrogram
                    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, fmax=fmax)
                    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)

                    # Save the log Mel spectrogram as a .npy file
                    np.save(output_path, log_mel_spec)

                    print(f"✔ Converted: {input_path} → {output_path}")

                except Exception as e:
                    print(f"❌ Error processing {input_path}: {e}")

# Example usage
root_path = "train_val"  # Root directory containing WAV subfolders
output_root = "train_val_spectrogram_npy"  # New root directory for spectrograms
convert_wav_to_log_mel(root_path, output_root)

✔ Converted: train_val\air_conditioner_train_val\100852-0-0-10.wav → train_val_spectrogram_npy\air_conditioner_train_val\100852-0-0-10.npy
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-15.wav → train_val_spectrogram_npy\air_conditioner_train_val\100852-0-0-15.npy
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-19.wav → train_val_spectrogram_npy\air_conditioner_train_val\100852-0-0-19.npy
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-2.wav → train_val_spectrogram_npy\air_conditioner_train_val\100852-0-0-2.npy
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-20.wav → train_val_spectrogram_npy\air_conditioner_train_val\100852-0-0-20.npy
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-21.wav → train_val_spectrogram_npy\air_conditioner_train_val\100852-0-0-21.npy
✔ Converted: train_val\air_conditioner_train_val\100852-0-0-23.wav → train_val_spectrogram_npy\air_conditioner_train_val\100852-0-0-23.npy
✔ Converted: train_val\air_co