In [1]:
import os
import librosa
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Constants
DATA_DIR = "../data"
SAMPLE_RATE = 22050
DURATION = 3
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION
MAX_PAD_LEN = 130  # Time steps

# Emotion label mapping
def get_label_from_filename(filename):
    parts = filename.split("-")
    emotion = int(parts[2])
    emotion_map = {
        1: "neutral", 2: "calm", 3: "happy", 4: "sad",
        5: "angry", 6: "fearful", 7: "disgust", 8: "surprised"
    }
    return emotion_map.get(emotion)

# MFCC + Delta + Delta-Delta extraction
def extract_mfcc_with_deltas(y, sr, max_pad_len=130, n_mfcc=40):
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    delta1 = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    combined = np.stack([mfcc, delta1, delta2], axis=-1)  # shape: (40, T, 3)

    if combined.shape[1] < max_pad_len:
        pad_width = max_pad_len - combined.shape[1]
        combined = np.pad(combined, ((0, 0), (0, pad_width), (0, 0)), mode='constant')
    else:
        combined = combined[:, :max_pad_len, :]

    return combined

# Feature processing with augmentation
def process_file(file_path, label, augment=True):
    features = []
    labels = []
    try:
        y, sr = librosa.load(file_path, sr=SAMPLE_RATE, duration=DURATION)

        # Original
        mfcc_orig = extract_mfcc_with_deltas(y, sr)
        features.append(mfcc_orig)
        labels.append(label)

        if augment:
            # Add Noise
            y_noise = y + 0.005 * np.random.randn(len(y))
            mfcc_noise = extract_mfcc_with_deltas(y_noise, sr)
            features.append(mfcc_noise)
            labels.append(label)

            # Time Stretch
            try:
                y_stretch = librosa.effects.time_stretch(y, rate=0.9)
                mfcc_stretch = extract_mfcc_with_deltas(y_stretch, sr)
                features.append(mfcc_stretch)
                labels.append(label)
            except:
                pass

            # Pitch Shift
            y_pitch = librosa.effects.pitch_shift(y, sr=sr, n_steps=2)
            mfcc_pitch = extract_mfcc_with_deltas(y_pitch, sr)
            features.append(mfcc_pitch)
            labels.append(label)

    except Exception as e:
        print(f"Failed processing {file_path}: {e}")
    return features, labels

# Step 1: Gather file paths (excluding 'sad')
file_label_pairs = []
for folder in ["Audio_Speech_Actors_01_24", "Audio_Songs_Actors_01_24"]:
    full_path = os.path.join(DATA_DIR, folder)
    for actor_folder in os.listdir(full_path):
        actor_path = os.path.join(full_path, actor_folder)
        if not os.path.isdir(actor_path):
            continue
        for file in os.listdir(actor_path):
            if file.endswith(".wav"):
                file_path = os.path.join(actor_path, file)
                label = get_label_from_filename(file)
                if label and label != "sad":  # Drop 'sad'
                    file_label_pairs.append((file_path, label))

# Step 2: Stratified train/val split
labels_only = [lbl for _, lbl in file_label_pairs]
train_files, val_files = train_test_split(
    file_label_pairs, test_size=0.2, random_state=42, stratify=labels_only
)

# Step 3: Process train set with augmentation
train_data, train_labels = [], []
for file_path, label in tqdm(train_files, desc="Processing training data"):
    feats, lbls = process_file(file_path, label, augment=True)
    train_data.extend(feats)
    train_labels.extend(lbls)

# Step 4: Process validation set without augmentation
val_data, val_labels = [], []
for file_path, label in tqdm(val_files, desc="Processing validation data"):
    feats, lbls = process_file(file_path, label, augment=False)
    val_data.extend(feats)
    val_labels.extend(lbls)

# Step 5: Save extracted features
np.savez("../models/train_features_augmented_no_sad.npz", data=np.array(train_data), labels=np.array(train_labels))
np.savez("../models/val_features_no_sad.npz", data=np.array(val_data), labels=np.array(val_labels))

print("Saved train_features_augmented_no_sad.npz and val_features_no_sad.npz")


  "class": algorithms.Blowfish,
Processing training data: 100%|████████████████████████████████████████████████████| 1660/1660 [04:21<00:00,  6.36it/s]
Processing validation data: 100%|████████████████████████████████████████████████████| 416/416 [00:14<00:00, 27.80it/s]


Saved train_features_augmented_no_sad.npz and val_features_no_sad.npz
