In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Title: Dataset Preparation for Speech Emotion Recognition (SigWavNet) - Emo-DB
Author: Somil Bisht (adapted from Alaa Nfissi)
Date: June 25, 2025
Description: Prepares the Berlin Emotional Speech Database (Emo-DB) for SER model training.
"""

import os
import pandas as pd

# Define the path to the Emo-DB dataset (update this accordingly)
emodb_data_path = "/workspaces/SigWavNet-Learning/EmoDB"  # e.g., "./emodb/"

EMODB_path = os.path.abspath(emodb_data_path)

# Map emotion codes to full names
emotion_map = {
    'W': 'angry',
    'L': 'boredom',
    'E': 'disgust',
    'A': 'fear',
    'F': 'happy',
    'T': 'sad',
    'N': 'neutral'
}

# List all .wav files
audio_files = [os.path.join(EMODB_path, f) for f in os.listdir(EMODB_path) if f.endswith('.wav')]

# Parse emotion from filename
records = []
for file_path in audio_files:
    filename = os.path.basename(file_path)
    emotion_code = filename[5]  # 6th character in filename
    if emotion_code in emotion_map:
        records.append({
            'path': file_path,
            'source': 'EMO-DB',
            'label': emotion_map[emotion_code]
        })

# Create DataFrame and export to CSV
emodb_df = pd.DataFrame(records)
emodb_df.to_csv('EMO_DB_dataset.csv', index=False)
print(emodb_df.head())


                                               path  source    label
0  /workspaces/SigWavNet-Learning/EmoDB/13a02Ec.wav  EMO-DB  disgust
1  /workspaces/SigWavNet-Learning/EmoDB/16a07Lb.wav  EMO-DB  boredom
2  /workspaces/SigWavNet-Learning/EmoDB/15a02Ea.wav  EMO-DB  disgust
3  /workspaces/SigWavNet-Learning/EmoDB/08a04La.wav  EMO-DB  boredom
4  /workspaces/SigWavNet-Learning/EmoDB/09b09Ea.wav  EMO-DB  disgust
