In [1]:
!pip install soundfile



In [5]:
import soundfile
import numpy as np
import librosa
import glob
import os
from sklearn.model_selection import train_test_split


int2emotion = {
    "01": "neutral",
    "02": "calm",
    "03": "happy",
    "04": "sad",
    "05": "angry",
    "06": "fearful",
    "07": "disgust",
    "08": "surprised"
}


AVAILABLE_EMOTIONS = {
    "angry",
    "sad",
    "neutral",
    "happy"
}

In [6]:
def extract_feature(file_name, **kwargs):
    
    mfcc = kwargs.get("mfcc")
    chroma = kwargs.get("chroma")
    mel = kwargs.get("mel")
    contrast = kwargs.get("contrast")
    tonnetz = kwargs.get("tonnetz")
    with soundfile.SoundFile(file_name) as sound_file:
        X = sound_file.read(dtype="float32")
        sample_rate = sound_file.samplerate
        if chroma or contrast:
            stft = np.abs(librosa.stft(X))
        result = np.array([])
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
            result = np.hstack((result, tonnetz))
    return result

In [7]:
def load_data(test_size=0.2):
    X, y = [], []
    try :
      for file in glob.glob("C:\\Users\\Dell\\Downloads\\ML lab\\speech emotion\\Actor_*\\*.wav"):
          
          basename = os.path.basename(file)
          print(basename)
          
          emotion = int2emotion[basename.split("-")[2]]
          
          if emotion not in AVAILABLE_EMOTIONS:
              continue
          
          features = extract_feature(file, mfcc=True, chroma=True, mel=True)
          
          X.append(features)
          y.append(emotion)
    except :
         pass
    
    return train_test_split(np.array(X), y, test_size=test_size, random_state=7)