In [None]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import soundfile as sf

In [None]:
INPUT_DIR = "/content/drive/MyDrive/irmas/IRMAS-TrainingData"
OUTPUT_DIR = "/content/drive/MyDrive/irmas_mels"

In [None]:
#creating folders
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
TARGET_SR = 16000          # Resample to 16k
TARGET_DURATION = 5.0      # Fixed duration (seconds)
MIN_SILENCE_THRESH = 0.01  # Threshold for silence trimming
N_MELS = 128               # Mel bins
HOP_LENGTH = 512

In [None]:
#preprocess function
def preprocess_audio(file_path):
    # Load
    audio, sr = librosa.load(file_path, sr=None, mono=True)

    # Resample
    if sr != TARGET_SR:
        audio = librosa.resample(audio, orig_sr=sr, target_sr=TARGET_SR)
        sr = TARGET_SR

    # Peak normalization
    max_amp = np.max(np.abs(audio))
    if max_amp > 0:
        audio = audio / max_amp

    # Trim silence
    non_silent = np.where(np.abs(audio) > MIN_SILENCE_THRESH)[0]
    if len(non_silent) > 0:
        audio = audio[non_silent[0]: non_silent[-1]]

    # Fix duration
    target_len = int(TARGET_SR * TARGET_DURATION)
    if len(audio) < target_len:
        # pad
        pad_len = target_len - len(audio)
        audio = np.pad(audio, (0, pad_len))
    else:
        # trim
        audio = audio[:target_len]

    return audio, sr

In [None]:
#mel spectrogram function
def save_mel_spectrogram(audio, sr, save_path):
    mel = librosa.feature.melspectrogram(
        y=audio,
        sr=sr,
        n_mels=N_MELS,
        hop_length=HOP_LENGTH
    )
    mel_db = librosa.power_to_db(mel, ref=np.max)

    # Plot and save
    plt.figure(figsize=(3,3))
    plt.axis("off")
    librosa.display.specshow(mel_db, sr=sr, hop_length=HOP_LENGTH, cmap='magma')
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

In [None]:
#loop through all folders
CLASS_FOLDERS = sorted(os.listdir(INPUT_DIR))

for cls in CLASS_FOLDERS:
    cls_input_path = os.path.join(INPUT_DIR, cls)

    # Skip if not a folder
    if not os.path.isdir(cls_input_path):
        continue

    # Create output folder for class
    cls_output_path = os.path.join(OUTPUT_DIR, cls)
    os.makedirs(cls_output_path, exist_ok=True)

    print(f"\nProcessing class: {cls}")

    # Loop through audio files
    files = os.listdir(cls_input_path)

    for file in tqdm(files):
        if not file.lower().endswith(".wav"):
            continue

        input_file = os.path.join(cls_input_path, file)
        output_file = os.path.join(cls_output_path, file.replace(".wav", ".png"))

        # Avoid reprocessing existing files
        if os.path.exists(output_file):
            continue

        try:
            audio, sr = preprocess_audio(input_file)
            save_mel_spectrogram(audio, sr, output_file)
        except Exception as e:
            print("Error:", file, e)

print("✔ Preprocessing complete!")


Processing class: cel


100%|██████████| 388/388 [00:00<00:00, 1511.32it/s]



Processing class: cla


100%|██████████| 505/505 [00:00<00:00, 1305.00it/s]



Processing class: flu


100%|██████████| 451/451 [00:00<00:00, 1889.43it/s]



Processing class: gac


100%|██████████| 637/637 [00:00<00:00, 2212.60it/s]



Processing class: gel


100%|██████████| 760/760 [00:00<00:00, 2272.12it/s]



Processing class: org


100%|██████████| 682/682 [00:00<00:00, 1817.53it/s]



Processing class: pia


100%|██████████| 721/721 [00:00<00:00, 2281.66it/s]



Processing class: sax


100%|██████████| 626/626 [00:00<00:00, 2358.52it/s]



Processing class: tru


100%|██████████| 577/577 [00:00<00:00, 2231.44it/s]



Processing class: vio


100%|██████████| 580/580 [00:00<00:00, 2830.78it/s]



Processing class: voi


100%|██████████| 778/778 [02:07<00:00,  6.08it/s]

✔ Preprocessing complete!



