In [21]:
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

# ======== CONFIG ========
AUDIO_BASE = r"D:\Programming\Alzheimers Detection\dataset_audio"
SPEC_BASE  = r"D:\Programming\Alzheimers Detection\dataset_spectrograms"

TRAIN_IMPAIRED_AUDIO = os.path.join(AUDIO_BASE, "train", "impaired")
TRAIN_NORMAL_AUDIO   = os.path.join(AUDIO_BASE, "train", "normal")
TEST_IMPAIRED_AUDIO  = os.path.join(AUDIO_BASE, "test", "impaired")
TEST_NORMAL_AUDIO    = os.path.join(AUDIO_BASE, "test", "normal")

TRAIN_IMPAIRED_SPEC = os.path.join(SPEC_BASE, "train", "impaired")
TRAIN_NORMAL_SPEC   = os.path.join(SPEC_BASE, "train", "normal")
TEST_IMPAIRED_SPEC  = os.path.join(SPEC_BASE, "test", "impaired")
TEST_NORMAL_SPEC    = os.path.join(SPEC_BASE, "test", "normal")

# Ensure output folders exist
for folder in [
    TRAIN_IMPAIRED_SPEC, TRAIN_NORMAL_SPEC,
    TEST_IMPAIRED_SPEC, TEST_NORMAL_SPEC
]:
    os.makedirs(folder, exist_ok=True)


# ======== MEL SPECTROGRAM FUNCTION ========
def create_mel_spectrogram(y, sr):
    S = librosa.feature.melspectrogram(
        y=y, sr=sr, n_mels=128, fmax=8000
    )
    S_db = librosa.power_to_db(S, ref=np.max)
    S_db = librosa.util.normalize(S_db)
    return S_db


# ======== SAVE IMAGE ========
def save_spectrogram(spec, save_path):
    plt.figure(figsize=(3, 3))
    plt.axis("off")
    librosa.display.specshow(spec, cmap="magma")
    plt.savefig(save_path, bbox_inches="tight", pad_inches=0)
    plt.close()


# ======== PROCESS ONE FILE ========
def process_audio(filepath, output_folder, augment_count=0):
    filename = os.path.splitext(os.path.basename(filepath))[0]

    y, sr = librosa.load(filepath, sr=None)
    base_spec = create_mel_spectrogram(y, sr)

    # Save original
    save_spectrogram(base_spec, os.path.join(output_folder, f"{filename}.jpg"))

    # Augmentation ONLY for impaired class
    for i in range(augment_count):
        spec_aug = base_spec.copy()

        # vertical time-mask
        f, t = spec_aug.shape
        mask_size = np.random.randint(5, max(10, t // 10))
        t0 = np.random.randint(0, max(1, t - mask_size))

        spec_aug[:, t0:t0 + mask_size] = np.min(spec_aug)

        save_spectrogram(spec_aug, os.path.join(output_folder, f"{filename}_aug{i+1}.jpg"))


# ======== PROCESS FOLDER ========
def process_folder(audio_folder, spec_folder, augment_count=0):
    files = [f for f in os.listdir(audio_folder) if f.endswith(".wav")]
    for f in tqdm(files, desc=f"Processing → {spec_folder}"):
        process_audio(
            os.path.join(audio_folder, f),
            spec_folder,
            augment_count=augment_count
        )


# ======== RUN PIPELINE ========
# Impaired (train) → augment ×3
process_folder(TRAIN_IMPAIRED_AUDIO, TRAIN_IMPAIRED_SPEC, augment_count=3)

# Normal (train) → no augmentation
process_folder(TRAIN_NORMAL_AUDIO, TRAIN_NORMAL_SPEC, augment_count=0)

# Test set → no augmentation
process_folder(TEST_IMPAIRED_AUDIO, TEST_IMPAIRED_SPEC, augment_count=0)
process_folder(TEST_NORMAL_AUDIO, TEST_NORMAL_SPEC, augment_count=0)

print("Spectrogram generation completed successfully!")


Processing → D:\Programming\Alzheimers Detection\dataset_spectrograms\train\impaired: 100%|██████████| 66/66 [01:56<00:00,  1.76s/it]
Processing → D:\Programming\Alzheimers Detection\dataset_spectrograms\train\normal: 100%|██████████| 264/264 [01:16<00:00,  3.45it/s]
Processing → D:\Programming\Alzheimers Detection\dataset_spectrograms\test\impaired: 100%|██████████| 16/16 [00:11<00:00,  1.42it/s]
Processing → D:\Programming\Alzheimers Detection\dataset_spectrograms\test\normal: 100%|██████████| 16/16 [00:06<00:00,  2.34it/s]

Spectrogram generation completed successfully!



