# Speech Emotion Recognition - Feature Extraction

Databases used

* The Ryerson Audio-Visual Database of Emotional Speech and Song (RAVDESS)
* Toronto emotional speech set (TESS)

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# import shutil
# import os

# # Define paths
# source_path = "/content/drive/My Drive/Datasets"  # Path in Google Drive
# target_path = "./Datasets"  # Local folder

# # Copy datasets to the local folder
# if not os.path.exists(target_path):
#     os.makedirs(target_path)

# shutil.copytree(source_path, target_path, dirs_exist_ok=True)

# print(f"Datasets copied to: {target_path}")


KeyboardInterrupt: 

### Import Libraries

Import necessary libraries

In [None]:
#!pip3 install pydub numpy python_speech_features
#!pip install resampy


Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python_speech_features
  Downloading python_speech_features-0.6.tar.gz (5.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Building wheels for collected packages: python_speech_features
  Building wheel for python_speech_features (setup.py) ... [?25l[?25hdone
  Created wheel for python_speech_features: filename=python_speech_features-0.6-py3-none-any.whl size=5868 sha256=e5d433a4ee60b76f8b164075a7901f0c5859ee18afadf6217ef5fc082cef3c4f
  Stored in directory: /root/.cache/pip/wheels/5a/9e/68/30bad9462b3926c29e315df16b562216d12bdc215f4d240294
Successfully built python_speech_features
Installing collected packages: python_speech_features, pydub
Successfully installed pydub-0.25.1 python_speech_features-0.6


In [None]:
import glob
import os
import librosa
import time
import numpy as np
import pandas as pd


### Load all files

We will create our numpy array extracting Mel-frequency cepstral coefficients (MFCCs) while the classes to predict will be extracted from the name of the file.

#### Defining emotions to classify

Selecting the emotions to be classified. Note that the emotions 'neutral', 'calm' and 'surprised' are only found in RAVDESS dataset and 'pleasantly surprised' or 'ps' is only available in TESS dataset. To combine all the emotions from both datasets into 8 emotion classes, we have changed 'pleasantly surprised' to 'surprised' and 'fearful' to 'fear'

In [None]:
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fear',
  '07':'disgust',
  '08':'surprised'
}

#defined tess emotions to test on TESS dataset only
tess_emotions=['angry','disgust','fear','ps','happy','sad']

##defined RAVDESS emotions to test on RAVDESS dataset only
ravdess_emotions=['neutral','calm','angry', 'happy','disgust','sad','fear','surprised']

observed_emotions = ['sad','angry','happy','disgust','surprised','neutral','calm','fear']

#### Feature extraction

Using librosa package we can extract the MFCC features. This function loads the file give the file path and after resampling and computing MFCC features, returns the features. We have selected the no. of MFCCs as 40.

https://librosa.org/librosa/generated/librosa.feature.mfcc.html

In [None]:
def extract_feature(file_name, mfcc=True):
    try:
        # Load the audio file
        X, sample_rate = librosa.load(os.path.join(file_name), res_type='kaiser_fast')

        # Initialize result with MFCC features if enabled
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs)) if result.size else mfccs  # Initialize or stack

        return result
    except Exception as e:
        print(f"Error processing file {file_name}: {e}")
        return None


#### Choosing a dataset

Choose the dataset(s) you want to load using the following function

In [None]:
def dataset_options():
    # choose datasets
    ravdess = True
    tess = True
    ravdess_speech = False
    ravdess_song = False
    data = {'ravdess':ravdess, 'ravdess_speech':ravdess_speech, 'ravdess_song':ravdess_song, 'tess':tess}
    print(data)
    return data

#### Load data

Load data from the datasets required which is obtained by calling the function dataset__options(). Extract features from each file with the selected emotions in chosen datasets using the extract_feature() function defined.

In [None]:
def load_data(test_size=0.2):
    x,y=[],[]
    # feature to extract
    mfcc = True
    data = dataset_options()
    paths = []
    if data['ravdess']:
        paths.append("/content/drive/MyDrive/Datasets/RAVDESS/*/Actor_*/*.wav")
    elif data['ravdess_speech']:
        paths.append("/content/drive/MyDrive/Datasets/RAVDESS/Speech/Actor_*/*.wav")
    elif data['ravdess_song']:
        paths.append("/content/drive/MyDrive/Datasets/RAVDESS/Song/Actor_*/*.wav")
    for path in paths:
        for file in glob.glob(path):
            file_name=os.path.basename(file)
            emotion=emotions[file_name.split("-")[2]] #to get emotion according to filename. dictionary emotions is defined above.
            if emotion not in observed_emotions: #options observed_emotions - RAVDESS and TESS, ravdess_emotions for RAVDESS only
                continue
            feature=extract_feature(file, mfcc)
            x.append(feature)
            y.append(emotion)
    if data['tess']:
        for file in glob.glob("/content/drive/MyDrive/Datasets/TESS/*AF*.wav"):
            file_name=os.path.basename(file)
            emotion=file_name.split("_")[2][:-4] #split and remove .wav
            if emotion == 'ps':
                emotion = 'surprised'
            if emotion not in observed_emotions: #options observed_emotions - RAVDESS and TESS, ravdess_emotions for RAVDESS only
                continue
            feature=extract_feature(file, mfcc)
            x.append(feature)
            y.append(emotion)
    return {"X":x,"y":y}

In [None]:
start_time = time.time()

Trial_dict = load_data(test_size = 0.3)

print("--- Data loaded. Loading time: %s seconds ---" % (time.time() - start_time))

{'ravdess': True, 'ravdess_speech': False, 'ravdess_song': False, 'tess': True}
Error processing file /content/drive/MyDrive/Datasets/RAVDESS/Speech/Actor_23/03-01-01-01-01-01-23.wav: 'bool' object is not callable
Error processing file /content/drive/MyDrive/Datasets/RAVDESS/Speech/Actor_23/03-01-02-01-02-02-23.wav: 'bool' object is not callable
Error processing file /content/drive/MyDrive/Datasets/RAVDESS/Speech/Actor_23/03-01-02-01-01-02-23.wav: 'bool' object is not callable
Error processing file /content/drive/MyDrive/Datasets/RAVDESS/Speech/Actor_23/03-01-02-01-02-01-23.wav: 'bool' object is not callable
Error processing file /content/drive/MyDrive/Datasets/RAVDESS/Speech/Actor_23/03-01-02-02-01-02-23.wav: 'bool' object is not callable
Error processing file /content/drive/MyDrive/Datasets/RAVDESS/Speech/Actor_23/03-01-02-01-01-01-23.wav: 'bool' object is not callable
Error processing file /content/drive/MyDrive/Datasets/RAVDESS/Speech/Actor_23/03-01-01-01-02-01-23.wav: 'bool' objec

In [None]:
X = pd.DataFrame(Trial_dict["X"])
y = pd.DataFrame(Trial_dict["y"])

In [None]:
X.shape, y.shape

((5252, 40), (5252, 1))

In [None]:
#renaming the label column to emotion
y=y.rename(columns= {0: 'emotion'})

In [None]:
#concatinating the attributes and label into a single dataframe
data = pd.concat([X, y], axis =1)

In [None]:
data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,31,32,33,34,35,36,37,38,39,emotion
0,-647.23822,65.720482,-8.224729,12.704829,8.271534,-2.352998,-7.441112,-0.913728,-12.620566,-2.754216,...,-1.569902,-2.871437,-1.603948,-2.730715,-0.072253,-0.398586,-0.207679,-0.370083,0.046289,neutral
1,-660.834106,58.47802,-3.08392,12.249708,5.201293,0.073609,-6.054542,-0.375419,-10.726009,-0.60555,...,-2.051799,-1.612795,-2.357852,-1.556595,-0.134125,-0.359325,0.182042,-1.439706,-2.266487,calm
2,-704.192261,61.750927,-0.234338,14.295746,6.214006,3.631025,-5.72832,-0.666088,-8.709855,-1.566127,...,-1.334364,-2.851598,-3.173694,-3.772836,-2.210445,-0.33849,2.104458,0.52522,0.650368,calm
3,-660.871704,58.284138,-4.250993,11.015914,4.093412,-0.148607,-5.153929,-1.326707,-9.730042,-1.225371,...,-0.997503,-1.711664,-1.531417,-0.395728,0.520581,-0.330974,0.436171,-0.689941,-1.341115,calm
4,-685.525208,68.543106,-0.107866,12.490178,7.141981,5.01181,-6.713319,-1.402928,-8.615516,-1.116701,...,-1.131711,-1.547446,-2.388347,-2.888326,-1.461788,-0.810914,0.310213,-0.631204,-0.00396,calm


## Shuffling data

In [None]:
#reindexing to shuffle the data at random
data = data.reindex(np.random.permutation(data.index))

In [None]:
# Storing shuffled ravdess and tess data to avoid loading again
data.to_csv("RAVTESS_MFCC_Observed.csv")