In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os

data_path = "/content/drive/MyDrive/DS340_Final_Project/Audio_Speech_Actors_01-24"



base_out = "/content/drive/MyDrive/DS340_Final_Project/speech_augmented_spectrograms_pitch"

os.makedirs(base_out, exist_ok=True)

In [3]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from tqdm import tqdm
import glob

In [4]:
pitch_shifts = [-4, -2, 2, 4]

In [5]:
emotion_map = {
    "01":"neutral","02":"calm","03":"happy","04":"sad",
    "05":"angry","06":"fearful","07":"disgust","08":"surprised"
}

In [6]:
for n_steps in pitch_shifts:
    # 1) make a folder for this pitch shift
    pitch_dir = os.path.join(base_out, f"pitch_{n_steps:+}")
    os.makedirs(pitch_dir, exist_ok=True)

    # 2) inside, one subfolder per emotion
    for emo in emotion_map.values():
        os.makedirs(os.path.join(pitch_dir, emo), exist_ok=True)

    # 3) process all actors/files
    for actor in tqdm(os.listdir(data_path), desc=f"Pitch={n_steps:+}"):
        actor_path = os.path.join(data_path, actor)
        if not os.path.isdir(actor_path):
            continue

        for fn in os.listdir(actor_path):
            if not fn.lower().endswith(".wav"):
                continue

            # extract emotion
            emo_code = fn.split("-")[2]
            emotion = emotion_map.get(emo_code)
            if emotion is None:
                continue

            # load audio
            wav_path = os.path.join(actor_path, fn)
            y, sr = librosa.load(wav_path, sr=None)

            # apply pitch shift
            y_shift = librosa.effects.pitch_shift(y, sr=sr, n_steps=n_steps)

            # compute mel‑spectrogram
            S    = librosa.feature.melspectrogram(
                       y=y_shift, sr=sr, n_mels=128, fmax=8000
                   )
            S_db = librosa.power_to_db(S, ref=np.max)

            # plot & save
            plt.figure(figsize=(3,3))
            librosa.display.specshow(S_db, sr=sr,
                                     x_axis='time', y_axis='mel')
            plt.axis('off')

            outname   = fn.replace(".wav", f"_pitch{n_steps:+}.png")
            save_path = os.path.join(pitch_dir, emotion, outname)
            plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
            plt.close()

Pitch=-4: 100%|██████████| 25/25 [10:49<00:00, 25.96s/it]
Pitch=-2: 100%|██████████| 25/25 [04:01<00:00,  9.67s/it]
Pitch=+2: 100%|██████████| 25/25 [04:15<00:00, 10.20s/it]
Pitch=+4: 100%|██████████| 25/25 [04:20<00:00, 10.44s/it]
