In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob

In [None]:
import os

data_dirs = [
    "/content/drive/MyDrive/DS340_Final_Project/Audio_Song_Actors_01-24",
    "/content/drive/MyDrive/DS340_Final_Project/Audio_Speech_Actors_01-24"
]

base_out = "/content/drive/MyDrive/DS340_Final_Project/augmented_spectrograms_noise"

os.makedirs(base_out, exist_ok=True)

In [None]:
noise_levels = [0.005, 0.01, 0.02, 0.05, 0.1, 0.2]

In [None]:
emotion_map = {
    "01":"neutral","02":"calm","03":"happy","04":"sad",
    "05":"angry","06":"fearful","07":"disgust","08":"surprised"
}

In [None]:
for eps in noise_levels:
    # 1) make one shared noise-level folder
    noise_dir = os.path.join(base_out, f"noise_{eps}")
    os.makedirs(noise_dir, exist_ok=True)

    # 2) inside it, one subfolder per emotion
    for emo in emotion_map.values():
        os.makedirs(os.path.join(noise_dir, emo), exist_ok=True)

    # 3) now process *both* data_dirs into this same tree
    for data_path in data_dirs:
        for actor in tqdm(os.listdir(data_path), desc=f"Noise={eps} [{os.path.basename(data_path)}]"):
            actor_path = os.path.join(data_path, actor)
            if not os.path.isdir(actor_path):
                continue

            for fn in os.listdir(actor_path):
                if not fn.lower().endswith(".wav"):
                    continue

                # pull out emotion
                code    = fn.split("-")[2]
                emotion = emotion_map.get(code)
                if emotion is None:
                    continue

                # load + augment
                wav_path = os.path.join(actor_path, fn)
                y, sr    = librosa.load(wav_path, sr=None)
                noise    = np.random.randn(len(y))
                y_noisy  = y + eps * noise

                # mel-spectrogram → dB
                S    = librosa.feature.melspectrogram(y=y_noisy, sr=sr)
                S_db = librosa.power_to_db(S, ref=np.max)

                # save PNG into the *same* noise_dir/emotion
                plt.figure(figsize=(3,3))
                librosa.display.specshow(S_db, sr=sr,
                                         x_axis='time', y_axis='mel')
                plt.axis('off')
                outname   = fn.replace(".wav", f"_noise{eps}.png")
                save_path = os.path.join(noise_dir, emotion, outname)
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
                plt.close()

Noise=0.005 [Audio_Song_Actors_01-24]: 100%|██████████| 24/24 [06:19<00:00, 15.80s/it]
Noise=0.005 [Audio_Speech_Actors_01-24]: 100%|██████████| 25/25 [10:28<00:00, 25.14s/it]
Noise=0.01 [Audio_Song_Actors_01-24]: 100%|██████████| 24/24 [02:30<00:00,  6.25s/it]
Noise=0.01 [Audio_Speech_Actors_01-24]: 100%|██████████| 25/25 [03:12<00:00,  7.70s/it]
Noise=0.02 [Audio_Song_Actors_01-24]: 100%|██████████| 24/24 [02:29<00:00,  6.25s/it]
Noise=0.02 [Audio_Speech_Actors_01-24]: 100%|██████████| 25/25 [03:11<00:00,  7.66s/it]
Noise=0.05 [Audio_Song_Actors_01-24]: 100%|██████████| 24/24 [02:25<00:00,  6.07s/it]
Noise=0.05 [Audio_Speech_Actors_01-24]: 100%|██████████| 25/25 [03:13<00:00,  7.72s/it]
Noise=0.1 [Audio_Song_Actors_01-24]: 100%|██████████| 24/24 [02:26<00:00,  6.12s/it]
Noise=0.1 [Audio_Speech_Actors_01-24]: 100%|██████████| 25/25 [03:19<00:00,  7.97s/it]
Noise=0.2 [Audio_Song_Actors_01-24]: 100%|██████████| 24/24 [02:27<00:00,  6.13s/it]
Noise=0.2 [Audio_Speech_Actors_01-24]: 100%|█