In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os

data_dirs = [
    "/content/drive/MyDrive/DS340_Final_Project/Audio_Song_Actors_01-24",
    "/content/drive/MyDrive/DS340_Final_Project/Audio_Speech_Actors_01-24"
]


base_out = "/content/drive/MyDrive/DS340_Final_Project/augmented_spectrograms_speed"

os.makedirs(base_out, exist_ok=True)

In [None]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob

In [None]:
speed_factors = [0.25, 0.4, 0.5, 0.8, 1.2, 1.5, 2, 2.5]

In [None]:
emotion_map = {
    "01":"neutral","02":"calm","03":"happy","04":"sad",
    "05":"angry","06":"fearful","07":"disgust","08":"surprised"
}

In [None]:
for rate in speed_factors:
    # 1) one shared speed folder
    speed_dir = os.path.join(base_out, f"speed_{rate}")
    os.makedirs(speed_dir, exist_ok=True)

    # 2) emotion subfolders
    for emo in emotion_map.values():
        os.makedirs(os.path.join(speed_dir, emo), exist_ok=True)

    # 3) process both Song & Speech
    for data_path in data_dirs:
        tag = os.path.basename(data_path)
        for actor in tqdm(os.listdir(data_path), desc=f"{tag} @ speed={rate}"):
            actor_path = os.path.join(data_path, actor)
            if not os.path.isdir(actor_path):
                continue

            for fn in os.listdir(actor_path):
                if not fn.lower().endswith(".wav"):
                    continue

                # extract emotion label
                code    = fn.split("-")[2]
                emotion = emotion_map.get(code)
                if emotion is None:
                    continue

                # load & time-stretch
                wav_path = os.path.join(actor_path, fn)
                y, sr    = librosa.load(wav_path, sr=None)
                y_mod    = librosa.effects.time_stretch(y, rate=rate)

                # mel-spectrogram → dB
                S    = librosa.feature.melspectrogram(y=y_mod, sr=sr)
                S_db = librosa.power_to_db(S, ref=np.max)

                # save PNG
                plt.figure(figsize=(3,3))
                librosa.display.specshow(S_db, sr=sr,
                                         x_axis='time', y_axis='mel')
                plt.axis('off')
                outname   = fn.replace(".wav", f"_speed{rate}.png")
                save_path = os.path.join(speed_dir, emotion, outname)
                plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
                plt.close()

Audio_Song_Actors_01-24 @ speed=2: 100%|██████████| 24/24 [03:44<00:00,  9.35s/it]
Audio_Speech_Actors_01-24 @ speed=2: 100%|██████████| 25/25 [06:50<00:00, 16.41s/it]
Audio_Song_Actors_01-24 @ speed=2.5: 100%|██████████| 24/24 [03:14<00:00,  8.08s/it]
Audio_Speech_Actors_01-24 @ speed=2.5: 100%|██████████| 25/25 [04:07<00:00,  9.89s/it]
