# Speech Emotion Recognition: Dataset

### Import Libraries

In [1]:
# Import libraries
import pandas as pd
import numpy as np
import os
import sys
import subprocess

## Data Preprocessing

In [2]:
# Load datasets used for training the model (wav/mp4)

# CREMA-D (Crowd-sourced Emotional Multimodal Actors Dataset)
# Reference: CREMA-D: Crowd-sourced Emotional Multimodal Actors Dataset. Available at: https://github.com/CheyneyComputerScience/CREMA-D
CREMA = "CREMA/AudioWAV/" # Path to audio files (WAV)

# RAVDESS (Ryerson Audio-Visual Database of Emotional Speech and Song)
# Reference: "The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS)" by Livingstone & Russo is licensed under CC BY-NA-SC 4.0. Available at: https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio
RAVDESS = "RAVDESS/audio_speech_actors_01-24/" # Path to audio files

# SAVEE (Surrey Audio-Visual Expressed Emotion)
# Reference: Surrey Audio-Visual Expressed Emotion (SAVEE) Dataset. [Online]. Available: http://kahlan.eps.surrey.ac.uk/savee/
SAVEE = "SAVEE/ALL/" # Path to audio files (WAV)

# TESS (Toronto Emotional Speech Set)
# Reference: Toronto Emotional Speech Set. [Online]. Available: https://tspace.library.utoronto.ca/handle/1807/24487
TESS = "TESS/" # Path to audio files (WAV)

# ESD (Emotional Speech Dataset)
# This model only use the English speakers (i.e., Mandarin speakers 0-10 are excluded) 
# Reference: Kun Zhou, Berrak Sisman, Rui Liu and Haizhou Li, "Seen and unseen emotional style transfer for voice conversion with a new emotional speech dataset" ICASSP 2021-2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). [Online]. Available: https://github.com/HLTSingapore/Emotional-Speech-Data?tab=readme-ov-file
ESD  = "ESD/" # Path to audio files (WAV)

# MELD (Multimodal EmotionLines Dataset)
# Wav audio files are extracted from video files contained within the dataset.
# References: 
# S. Poria, D. Hazarika, N. Majumder, G. Naik, R. Mihalcea, E. Cambria. MELD: A Multimodal Multi-Party Dataset for Emotion Recognition in Conversation. (2018).
# Chen, S.Y., Hsu, C.C., Kuo, C.C. and Ku, L.W. EmotionLines: An Emotion Corpus of Multi-Party Conversations. arXiv preprint arXiv:1802.08379 (2018).
# Available: https://github.com/declare-lab/MELD
MELD = 'MELD/train_splits/'  # Path to video files (mp4)

### CREMA-D Dataset

In [3]:
# Generate CREMA-D dataframe

# Emotion labels: Maps emotions obtained from the filename to a string value
emotion_map = {
    'SAD': 'sad',
    'ANG': 'angry',
    'DIS': 'disgust',
    'FEA': 'fear',
    'HAP': 'happy',
    'NEU': 'neutral'
}

# Create Panda dataframe using paths and emotion associated with the corresponding file
crema_df = pd.DataFrame([
    {
        'File': os.path.join(CREMA, file),
        'Emotion': emotion_map.get(file.split('_')[2], 'Unknown')
    }
    for file in os.listdir(CREMA)
])

# Print dataframe header
print("CREMA-D - DataFrame head:")
print(crema_df.head(), "\n")

# Print emotion counts 
print("CREMA-D - Emotion counts:")
print(crema_df['Emotion'].value_counts())



CREMA-D - DataFrame head:
                                 File  Emotion
0  CREMA/AudioWAV/1001_DFA_ANG_XX.wav    angry
1  CREMA/AudioWAV/1001_DFA_DIS_XX.wav  disgust
2  CREMA/AudioWAV/1001_DFA_FEA_XX.wav     fear
3  CREMA/AudioWAV/1001_DFA_HAP_XX.wav    happy
4  CREMA/AudioWAV/1001_DFA_NEU_XX.wav  neutral 

CREMA-D - Emotion counts:
Emotion
angry      1271
disgust    1271
fear       1271
happy      1271
sad        1271
neutral    1087
Name: count, dtype: int64


### RAVDESS Dataset

In [4]:
# Generate RAVDESS dataframe

# Emotion labels: Map emotions obtained from the integer values of the filename to a string value
emotion_map = {
    1: 'neutral',
    #2: 'calm',  # Excluding the calm emotion as it's only features in the RAVDESS dataset (N=192 is too small for the model)
    3: 'happy',
    4: 'sad',
    5: 'angry',
    6: 'fear',
    7: 'disgust',
    8: 'surprise'
}

# Create Panda dataframe using paths and emotion associated with the corresponding file (excluding 'calm')
ravdess_df = pd.DataFrame([
    {
        'File': os.path.join(RAVDESS, actor_dir, file_name),
        'Emotion': emotion_map.get(int(file_name.split('-')[2]), 'Unknown')
    }
    for actor_dir in os.listdir(RAVDESS)
    for file_name in os.listdir(os.path.join(RAVDESS, actor_dir))
    if int(file_name.split('-')[2]) != 2  # Exclude 'calm' (emotion with integer value of 2)
])

# Print dataframe header
print("RAVDESS - DataFrame head:")
print(ravdess_df.head(), "\n")

# Print emotion counts 
print("RAVDESS - Emotion counts:")
print(ravdess_df['Emotion'].value_counts())


RAVDESS - DataFrame head:
                                                File  Emotion
0  RAVDESS/audio_speech_actors_01-24/Actor_01\03-...  neutral
1  RAVDESS/audio_speech_actors_01-24/Actor_01\03-...  neutral
2  RAVDESS/audio_speech_actors_01-24/Actor_01\03-...  neutral
3  RAVDESS/audio_speech_actors_01-24/Actor_01\03-...  neutral
4  RAVDESS/audio_speech_actors_01-24/Actor_01\03-...    happy 

RAVDESS - Emotion counts:
Emotion
happy       192
sad         192
angry       192
fear        192
disgust     192
surprise    192
neutral      96
Name: count, dtype: int64


### SAVEE Dataset

In [5]:
# Generate SAVEE dataframe

# Map emotion codes to labels
emotion_map = {
    'a': 'angry',
    'd': 'disgust',
    'f': 'fear',
    'h': 'happy',
    'n': 'neutral',
    'sa': 'sad',
    'su':'surprise'
}

# Create Panda dataframe using paths and emotion associated with the corresponding file (excluding 'calm')
savee_df = pd.DataFrame([
    {
        'File': os.path.join(SAVEE, file),
        'Emotion': emotion_map.get(file.split('_')[1][:-6])  # Map to emotion (excluding the last 6 characters) 
    }
    for file in os.listdir(SAVEE)
])

# Print dataframe header
print("SAVEE - DataFrame head:")
print(savee_df.head(), "\n")

# Print emotion counts 
print("SAVEE - Emotion counts:")
print(savee_df['Emotion'].value_counts())

SAVEE - DataFrame head:
                   File Emotion
0  SAVEE/ALL/DC_a01.wav   angry
1  SAVEE/ALL/DC_a02.wav   angry
2  SAVEE/ALL/DC_a03.wav   angry
3  SAVEE/ALL/DC_a04.wav   angry
4  SAVEE/ALL/DC_a05.wav   angry 

SAVEE - Emotion counts:
Emotion
neutral     120
angry        60
disgust      60
fear         60
happy        60
sad          60
surprise     60
Name: count, dtype: int64


### TESS Dataset

In [6]:
# Generate TESS dataframe

# Create Panda dataframe using paths and emotion associated with the corresponding file (renaming ps to 'surprise')
tess_df = pd.DataFrame([
    {
        'File': os.path.join(TESS, dir, file),
        'Emotion': 'surprise' if file.split('_')[-1].split('.')[0] == 'ps' else file.split('_')[-1].split('.')[0]  # Handle 'ps' as 'surprise' and remove '.wav'
    }
    for dir in os.listdir(TESS)
    for file in os.listdir(os.path.join(TESS, dir))
])

# Print dataframe header
print("TESS - DataFrame head:")
print(tess_df.head(), "\n")

# Print emotion counts 
print("TESS - Emotion counts:")
print(tess_df['Emotion'].value_counts())

TESS - DataFrame head:
                                File Emotion
0  TESS/OAF_angry\OAF_back_angry.wav   angry
1   TESS/OAF_angry\OAF_bar_angry.wav   angry
2  TESS/OAF_angry\OAF_base_angry.wav   angry
3  TESS/OAF_angry\OAF_bath_angry.wav   angry
4  TESS/OAF_angry\OAF_bean_angry.wav   angry 

TESS - Emotion counts:
Emotion
angry       400
disgust     400
fear        400
happy       400
neutral     400
surprise    400
sad         400
Name: count, dtype: int64


### ESD Dataset

In [7]:
# Generate ESD dataframe

# Create Panda dataframe using speaker and emotions paths associated with corresponding file
esd_df = pd.DataFrame([
    {
        'File': os.path.join(ESD, speaker_folder, emotion_folder, audio_file),
        'Emotion': emotion_folder.lower()  # Using the emotion folder as label
    }
    for speaker_folder in os.listdir(ESD)  # Process speaker folders (0011 - 0020)
    for emotion_folder in os.listdir(os.path.join(ESD, speaker_folder)) # Process all emotion folders (e.g., 'Neutral' or 'Happy') 
    for audio_file in os.listdir(os.path.join(ESD, speaker_folder, emotion_folder))  # Process  all audio files
])

# Print dataframe header
print("TESS - DataFrame head:")
print(esd_df.head(), "\n")

# Print emotion value counts 
print("TESS - Emotion counts:")
print(esd_df['Emotion'].value_counts())

TESS - DataFrame head:
                             File Emotion
0  ESD/0011\Angry\0011_000351.wav   angry
1  ESD/0011\Angry\0011_000352.wav   angry
2  ESD/0011\Angry\0011_000353.wav   angry
3  ESD/0011\Angry\0011_000354.wav   angry
4  ESD/0011\Angry\0011_000355.wav   angry 

TESS - Emotion counts:
Emotion
angry       3500
happy       3500
neutral     3500
sad         3500
surprise    3500
Name: count, dtype: int64


### Meld Dataset

In [8]:
# Read CSV file containing corresponding emotions for the videos
MELD_csv = pd.read_csv('MELD/train_sent_emo.csv')  # Path to CSV file containing corresponding emotions

In [9]:
# Directory for extacted audio (Wav format)
MELD_WAV = 'MELD/train_splits_audio' 

# Create the output audio directory (if it doesn't exist)
os.makedirs(MELD_WAV, exist_ok=True)

#### Extract Audio 

In [10]:
# Iterate over the CSV rows and extract audio from the corresponding mp4 video files
for _, row in MELD_csv.iterrows():
    # Obtain video filename based on the corresponding Dialogue_ID and Utterance_ID row values
    video_filename = f'dia{row["Dialogue_ID"]}_utt{row["Utterance_ID"]}.mp4'
    video_path = os.path.join(MELD, video_filename) # Join

    # Define 
    audio_filename = video_filename.replace('.mp4', '.wav') # Audio file name remains the same except for extension type (WAV)
    audio_path = os.path.join(MELD_WAV, audio_filename) 

    # Check if the video file exists
    if os.path.isfile(video_path):
        # Check if the audio file already exists
        if not os.path.isfile(audio_path):
            #print(f"Processing file: {video_path}")
            
            # Extract audio using ffmpeg
            command = f'ffmpeg -i "{video_path}" -q:a 0 -map a "{audio_path}"'
            result = subprocess.run(command, shell=True)
    else:
        print(f"File does not exist: {video_path}")

#### Extract Emotions 

In [11]:
# Mapping of emotions to align with labels from the other datasets
emotion_map = {
    'joy': 'happy',
    'anger': 'angry',
    'sadness': 'sad'
}

# Create Panda dataframe using the audio file and associated entry of the csv document.
meld_df = pd.DataFrame([
    {
        'File': os.path.join(MELD_WAV, f'dia{row["Dialogue_ID"]}_utt{row["Utterance_ID"]}.wav'),
        'Emotion': emotion_map.get(row['Emotion'].lower(), row['Emotion'].lower())
    }
    for _, row in MELD_csv.iterrows()
    if os.path.isfile(os.path.join(MELD_WAV, f'dia{row["Dialogue_ID"]}_utt{row["Utterance_ID"]}.wav'))
])

# Print dataframe header
print("MELD - DataFrame head:")
print(meld_df.head(), "\n")

# Print emotion value counts 
print("MELD - Emotion counts:")
print(meld_df['Emotion'].value_counts())

MELD - DataFrame head:
                                    File   Emotion
0  MELD/train_splits_audio\dia0_utt0.wav   neutral
1  MELD/train_splits_audio\dia0_utt1.wav   neutral
2  MELD/train_splits_audio\dia0_utt2.wav   neutral
3  MELD/train_splits_audio\dia0_utt3.wav   neutral
4  MELD/train_splits_audio\dia0_utt4.wav  surprise 

MELD - Emotion counts:
Emotion
neutral     4709
happy       1743
surprise    1205
angry       1109
sad          683
disgust      271
fear         268
Name: count, dtype: int64


## Concatenate

In [12]:
emotion_df = pd.concat([crema_df, ravdess_df, savee_df, tess_df, esd_df, meld_df], axis = 0)  

# Print combined dataframe header
print("Combined emotion DataFrame header:")
print(emotion_df.head(), "\n")

# Print combined emotion value counts 
print("Combined emotion DataFrame value counts:")
print(emotion_df['Emotion'].value_counts())


Combined emotion DataFrame header:
                                 File  Emotion
0  CREMA/AudioWAV/1001_DFA_ANG_XX.wav    angry
1  CREMA/AudioWAV/1001_DFA_DIS_XX.wav  disgust
2  CREMA/AudioWAV/1001_DFA_FEA_XX.wav     fear
3  CREMA/AudioWAV/1001_DFA_HAP_XX.wav    happy
4  CREMA/AudioWAV/1001_DFA_NEU_XX.wav  neutral 

Combined emotion DataFrame value counts:
Emotion
neutral     9912
happy       7166
angry       6532
sad         6106
surprise    5357
disgust     2194
fear        2191
Name: count, dtype: int64


## Save CSV-file

In [13]:
emotion_df.to_csv("emotion_df.csv",index=False)