In [2]:
import pandas as pd
import librosa
import numpy as np

In [3]:
df = pd.read_csv('RAVDESS_speech.csv', index_col=False)
df.head()

Unnamed: 0,filePath,emotion,isFemale,actorId
0,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,5,True,16
1,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,6,True,16
2,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,6,True,16
3,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,5,True,16
4,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,7,True,16


In [4]:
df['isCalmOrNeutral'] = [1 if x < 3 else 0 for x in df['emotion']]
df['isCalmOrNeutralOrHappy'] = [1 if x < 4 else 0 for x in df['emotion']]

In [5]:
df.head()

Unnamed: 0,filePath,emotion,isFemale,actorId,isCalmOrNeutral,isCalmOrNeutralOrHappy
0,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,5,True,16,0,0
1,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,6,True,16,0,0
2,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,6,True,16,0,0
3,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,5,True,16,0,0
4,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,7,True,16,0,0


In [6]:
df.isCalmOrNeutral.value_counts()

0    1152
1     288
Name: isCalmOrNeutral, dtype: int64

In [21]:
df.isCalmOrNeutralOrHappy.value_counts()

0    960
1    480
Name: isCalmOrNeutralOrHappy, dtype: int64

In [30]:
def mel_spectogram_db(file_name, mels=3, sr=8000):
    """Returns the mel spectrogram for a given audio file and mel count
        Args:
            file_name (str): File name of the WAV file
            mels (int): Target length in seconds
            sr: sampling rate
        Returns:
            numpy.ndarray: Numpy array of mel spectrogram values
    """
    data, sample_rate = librosa.load(file_name, res_type="kaiser_fast", sr=sr)
    mel_spectogram = librosa.feature.melspectrogram(y=data, sr=sample_rate, n_mels=mels)
    mel_db = librosa.power_to_db(mel_spectogram, ref=np.max)
    return mel_db.T

In [31]:
mel = []
df['mel'] = [mel_spectogram_db(x) for x in df.filePath]

In [7]:
def getSamples(file_name):
    samples, sr = librosa.load(file_name)
    return samples
    
df['samples'] = [getSamples(x) for x in df.filePath]

In [16]:
df.to_csv('RAVDESS_speech_withMel_withSamples.csv', index=False)

In [33]:
#df.to_csv('RAVDESS_speech_withMel.csv', index=False)

In [41]:
np.shape(df.iloc[0].mel)

(62, 3)

In [23]:
df_old['samples_trimmed'] = df['samples_trimmed']
df_old['samples_trimmed_duration'] = df['samples_trimmed_duration']


In [24]:
df_old.to_csv('RAVDESS_speech_withMelSamplesTrimmed.csv', index=False)

In [25]:
df.head()

Unnamed: 0,filePath,emotion,isFemale,actorId,isCalmOrNeutral,isCalmOrNeutralOrHappy,samples,samples_trimed,samples_trimed_duration
0,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,5,True,16,0,0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[9.2048276e-07, -7.6235585e-07, 6.551878e-07, ...",2.275556
1,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,6,True,16,0,0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[2.3352652e-06, -1.8194497e-06, 1.3951085e-06,...",2.182676
2,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,6,True,16,0,0,"[3.0036153e-05, 2.7443759e-05, 9.890327e-07, 6...","[0.0006767583, 0.000115308, -0.00037835707, -0...",2.995374
3,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,5,True,16,0,0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-0.00034478158, -0.00014136413, 5.1286745e-05...",2.345215
4,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,7,True,16,0,0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.3189371e-07, 1.5032063e-07, -1.5891466e-07...",2.484535


In [26]:
import math
df['duration_rounded'] = [ math.ceil(x) for x in df['samples_trimmed_duration']]


In [27]:
# show the counts for different duration
df.duration_rounded.value_counts()

4    719
3    519
2    109
5     91
6      2
Name: duration_rounded, dtype: int64

In [28]:
df[df.duration_rounded==6]

Unnamed: 0,filePath,emotion,isFemale,actorId,isCalmOrNeutral,isCalmOrNeutralOrHappy,samples,samples_trimed,samples_trimed_duration,duration_rounded
336,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_1...,7,False,19,0,0,"[-2.310229e-05, -2.2669916e-05, 2.4835879e-06,...","[0.00010436475, 0.00015135646, 0.00014301769, ...",5.016553,6
1362,data/RAVDESS/Audio_Speech_Actors_01-24/Actor_0...,6,False,1,0,0,"[0.00031686827, 0.00046282235, 0.00010160641, ...","[0.00031686827, 0.00046282235, 0.00010160641, ...",5.005034,6


In [29]:
df_old['duration_rounded'] = df['duration_rounded']
df_old.to_csv('RAVDESS_speech_withMelSamplesTrimmedDuree.csv', index=False)