## Extract feature with speaker dependent

In [1]:
# import library
import glob
import os
import librosa
import numpy as np

In [2]:
# clone dataset from github
!git clone https://github.com/miftanurfarid/speech_emotion_recognition/

Cloning into 'speech_emotion_recognition'...
remote: Enumerating objects: 6987, done.[K
remote: Counting objects: 100% (126/126), done.[K
remote: Compressing objects: 100% (59/59), done.[K
remote: Total 6987 (delta 77), reused 116 (delta 67), pack-reused 6861[K
Receiving objects: 100% (6987/6987), 745.63 MiB | 24.64 MiB/s, done.
Resolving deltas: 100% (397/397), done.
Checking out files: 100% (5322/5322), done.


In [3]:
cd speech_emotion_recognition

/content/speech_emotion_recognition


In [4]:
pwd

'/content/speech_emotion_recognition'

In [5]:
data_path = 'data/song/'
files = glob.glob(os.path.join(data_path + '/*/', '*.wav'))
files.sort()
files[0]

'data/song/Actor_01/03-02-01-01-01-01-01.wav'

In [6]:
files

['data/song/Actor_01/03-02-01-01-01-01-01.wav',
 'data/song/Actor_01/03-02-01-01-01-02-01.wav',
 'data/song/Actor_01/03-02-01-01-02-01-01.wav',
 'data/song/Actor_01/03-02-01-01-02-02-01.wav',
 'data/song/Actor_01/03-02-02-01-01-01-01.wav',
 'data/song/Actor_01/03-02-02-01-01-02-01.wav',
 'data/song/Actor_01/03-02-02-01-02-01-01.wav',
 'data/song/Actor_01/03-02-02-01-02-02-01.wav',
 'data/song/Actor_01/03-02-02-02-01-01-01.wav',
 'data/song/Actor_01/03-02-02-02-01-02-01.wav',
 'data/song/Actor_01/03-02-02-02-02-01-01.wav',
 'data/song/Actor_01/03-02-02-02-02-02-01.wav',
 'data/song/Actor_01/03-02-03-01-01-01-01.wav',
 'data/song/Actor_01/03-02-03-01-01-02-01.wav',
 'data/song/Actor_01/03-02-03-01-02-01-01.wav',
 'data/song/Actor_01/03-02-03-01-02-02-01.wav',
 'data/song/Actor_01/03-02-03-02-01-01-01.wav',
 'data/song/Actor_01/03-02-03-02-01-02-01.wav',
 'data/song/Actor_01/03-02-03-02-02-01-01.wav',
 'data/song/Actor_01/03-02-03-02-02-02-01.wav',
 'data/song/Actor_01/03-02-04-01-01-01-0

In [7]:
# function to extract feature
def extract_feature(file_name):
    X, sample_rate = librosa.load(file_name, sr=None)
    stft = np.abs(librosa.stft(X)) #short time fourier transform
    mfcc = np.mean(librosa.feature.mfcc(
        y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
    return (mfcc)

In [8]:
# create empty list to store features and labels
feat = [] # spectral feature
lab = [] # label emosi

In [9]:
# iterate over all files
for file in files:
    print("Extracting features from ", file)
    feat_i = np.hstack(extract_feature(file))
    lab_i = os.path.basename(file).split('-')[2]
    feat.append(feat_i)
    lab.append(int(lab_i)-1)  # make labels start from 0

Extracting features from  data/song/Actor_01/03-02-01-01-01-01-01.wav
Extracting features from  data/song/Actor_01/03-02-01-01-01-02-01.wav
Extracting features from  data/song/Actor_01/03-02-01-01-02-01-01.wav
Extracting features from  data/song/Actor_01/03-02-01-01-02-02-01.wav
Extracting features from  data/song/Actor_01/03-02-02-01-01-01-01.wav
Extracting features from  data/song/Actor_01/03-02-02-01-01-02-01.wav
Extracting features from  data/song/Actor_01/03-02-02-01-02-01-01.wav
Extracting features from  data/song/Actor_01/03-02-02-01-02-02-01.wav
Extracting features from  data/song/Actor_01/03-02-02-02-01-01-01.wav
Extracting features from  data/song/Actor_01/03-02-02-02-01-02-01.wav
Extracting features from  data/song/Actor_01/03-02-02-02-02-01-01.wav
Extracting features from  data/song/Actor_01/03-02-02-02-02-02-01.wav
Extracting features from  data/song/Actor_01/03-02-03-01-01-01-01.wav
Extracting features from  data/song/Actor_01/03-02-03-01-01-02-01.wav
Extracting features 

In [10]:
np.save(data_path + 'x_ajrana.npy', feat)
np.save(data_path + 'y_ajrana.npy', lab)