In [11]:
import librosa
import soundfile
import os, glob, pickle
import numpy as np

In [25]:
#DataFlair - Extract features (mfcc, chroma, mel) from a sound file
def extract_feature(file_name, mfcc, chroma, mel):
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate=sound_file.samplerate
        if chroma:
            stft=np.abs(librosa.stft(X))
        result=np.array([])
        if mfcc:
            mfccs=np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result=np.hstack((result, mfccs))
        if chroma:
            chroma=np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result=np.hstack((result, chroma))
        if mel:
            mel=np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result=np.hstack((result, mel))
    return result

In [5]:
!pwd

/home/iases/code/pankaj-lewagon/ser/notebooks


In [38]:
file_name = '../raw_data/ravdess_data/Actor_01/03-01-01-01-01-02-01.wav'

extract_feature(file_name, True, True, True)

array([-6.83346802e+02,  4.88892593e+01, -1.68344450e+00,  1.91830025e+01,
        1.96959043e+00, -7.99272358e-01, -4.79502773e+00, -1.23425665e+01,
       -5.44743872e+00,  2.11286807e+00, -4.21495914e+00,  1.38785517e+00,
       -5.12799263e+00,  2.01476741e+00, -4.01003027e+00, -2.78699923e+00,
       -2.45019031e+00,  2.90786058e-01, -5.53608131e+00, -1.82486665e+00,
       -2.49778628e+00, -5.44946194e+00,  3.34946632e-01, -4.97791004e+00,
       -1.91664660e+00, -3.40251923e-01, -1.90425861e+00, -5.89618497e-02,
       -3.87117290e+00, -1.81754279e+00, -3.48097730e+00, -2.37625694e+00,
       -9.95904684e-01, -1.86546052e+00, -4.27006626e+00, -2.39640570e+00,
       -4.60663700e+00, -1.18474185e+00, -2.14025235e+00, -2.95848393e+00,
        6.60874784e-01,  7.05912471e-01,  7.03555584e-01,  6.72466695e-01,
        6.93958759e-01,  7.28634834e-01,  7.16797113e-01,  7.58790731e-01,
        7.85981476e-01,  7.86870778e-01,  7.73195446e-01,  7.21628845e-01,
        8.24575011e-07,  

In [27]:
#DataFlair - Emotions in the RAVDESS dataset
emotions={
  '01':'neutral',
  '02':'calm',
  '03':'happy',
  '04':'sad',
  '05':'angry',
  '06':'fearful',
  '07':'disgust',
  '08':'surprised'
}
#DataFlair - Emotions to observe
observed_emotions=['calm', 'happy', 'fearful', 'disgust']

In [28]:
#DataFlair - Load the data and extract features for each sound file
def load_data(test_size=0.2):
    x,y=[],[]
    for file in glob.glob("../raw_data/ravdess_data/Actor_*/*.wav"):
        file_name=os.path.basename(file)
        emotion=emotions[file_name.split("-")[2]]
#         if emotion not in observed_emotions:
#             continue
        feature=extract_feature(file, mfcc=True, chroma=True, mel=True)
        x.append(feature)
        y.append(emotion)
    return x,y  #train_test_split(np.array(x), y, test_size=test_size, random_state=9)

In [29]:
x, y = load_data()

In [30]:
len(y)

1440

In [43]:
np.array(x).shape

(1440, 180)