In [1]:
import pandas as pd
import audioread
import numpy as np
import librosa
import math
import re
import os
from multiprocessing import Pool
import warnings
warnings.filterwarnings('ignore')

In [2]:
data_audio = pd.read_csv('../Handout/label_train.csv')

In [3]:
#Mã label: neutral(0), happy(1), sad(2), anger(3), fear(4), disgust(5)
label = {0:'neutral', 1:'happy', 2:'sad', 3:'anger', 4:'fear', 5:'disgust'}
data_audio.head()

Unnamed: 0,File,Label
0,PAEP-004139_noise.png,0
1,PAEP-000458_stretch.png,4
2,PAEP-004966_shift.png,1
3,PAEP-004845_noise.png,4
4,PAEP-004196_stretch.png,3


In [4]:
def getfeature(fname):
    timeseries_length=128
    hop_length = 512
    data = np.zeros((timeseries_length, 33), dtype=np.float64)

    y, sr = librosa.load(fname)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13)
    spectral_center = librosa.feature.spectral_centroid(y=y, sr=sr, hop_length=hop_length)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr, hop_length=hop_length)
    spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=hop_length)

    filelength = timeseries_length if mfcc.shape[1] >= timeseries_length else mfcc.shape[1]
    

    data[-filelength:, 0:13] = mfcc.T[0:timeseries_length, :]
    data[-filelength:, 13:14] = spectral_center.T[0:timeseries_length, :]
    data[-filelength:, 14:26] = chroma.T[0:timeseries_length, :]
    data[-filelength:, 26:33] = spectral_contrast.T[0:timeseries_length, :]

    return data

In [5]:
def processtrain(fname):
    data = getfeature(fname)
    name = fname.split('/')[-1] 
    emotion = data_audio[data_audio["File"] == name]["Label"].item()
    return data, emotion

def processtest(fname):
    data = getfeature(fname)
    name = fname.split('/')[-1]   
    return data, name

In [6]:
def absoluteFilePaths(directory):
    for dirpath,_,filenames in os.walk(directory):
        for f in filenames:
            yield os.path.abspath(os.path.join(dirpath, f))

In [7]:
def train():
    files = list(absoluteFilePaths('../Handout/Train/'))
    p = Pool(4)    
    data = p.map(processtrain, files)
    X = [data[i][0] for i in range(len(data))]
    X = np.asarray(X)

    emotion = [data[i][1] for i in range(len(data))]

    np.savez('../npz/train_augmentaion', X=X, emotion=emotion)
    print('Save train npz successfully !')
    
def test():
    files = list(absoluteFilePaths('../Public_Test/Public_Test/'))
    p = Pool(4) 
    data = p.map(processtest, files)
    
    X = [data[i][0] for i in range(len(data))]
    X = np.asarray(X)
    
    name = [data[i][1] for i in range(len(data))]
    np.savez('../npz/test_augmentation', X=X, name=name)
    print('Save test npz successfully !')

In [None]:
# train()
# test()