In [25]:
import os
import librosa
import json
import math

In [26]:
DATASET_PATH = "../../data/AudioWAV/"
JSON_PATH = "data_pos_neg.json"
SAMPLE_RATE = 22050


AUDIO_DURATION = 2.5 # measured in seconds
SAMPLES_PER_AUDIO = SAMPLE_RATE * AUDIO_DURATION

In [27]:
def save_mfcc(dataset_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=1):
    
    print(dataset_path)
    
    # Build a dict to store data
    data = {
        "mapping": [],
        "mfcc": [],
        "labels": []
    }
    
    samples_per_segment = int(SAMPLES_PER_AUDIO / num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment / hop_length)
    
    # loop trough to all the emotions
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
            
        for index, (f) in enumerate(filenames):
            print(f"#{index} | processing: {f}")
            emo = f[9:12]
            

            if emo not in data["mapping"]:
                data["mapping"].append(emo)

                
            # load audio file    
            file_path = os.path.join(dirpath, f)
            signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
            
            # process all segments of audio file
            for d in range(num_segments):

                # calculate start and finish sample for current segment
                start = samples_per_segment * d
                finish = start + samples_per_segment

                # extract mfcc
                mfcc = librosa.feature.mfcc(signal[start:finish], sr, n_mfcc=n_mfcc, n_fft=n_fft, hop_length=hop_length)



                # store only mfcc feature with expected number of vectors
                if len(mfcc) == num_mfcc_vectors_per_segment:
                    data["mfcc"].append(mfcc.tolist())

                    data["labels"].append(data["mapping"].index(emo))


                    data["labels"].append(data["mapping"].index(emo))
                

                    print(f"{file_path}, segment:{d+1}")
    
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
        
    print("DONE!!!")

In [28]:
save_mfcc(DATASET_PATH, JSON_PATH)

../../data/AudioWAV/
#0 | processing: 1038_TSI_FEA_XX.wav
#1 | processing: 1054_TAI_ANG_XX.wav
#2 | processing: 1028_IEO_FEA_MD.wav
#3 | processing: 1009_IWL_NEU_XX.wav
../../data/AudioWAV/1009_IWL_NEU_XX.wav, segment:1
#4 | processing: 1030_ITS_FEA_XX.wav
../../data/AudioWAV/1030_ITS_FEA_XX.wav, segment:1
#5 | processing: 1004_IEO_DIS_HI.wav
../../data/AudioWAV/1004_IEO_DIS_HI.wav, segment:1
#6 | processing: 1047_TSI_HAP_XX.wav
#7 | processing: 1020_IOM_HAP_XX.wav
#8 | processing: 1035_IEO_ANG_LO.wav
#9 | processing: 1089_ITS_HAP_XX.wav
../../data/AudioWAV/1089_ITS_HAP_XX.wav, segment:1
#10 | processing: 1076_TAI_NEU_XX.wav
../../data/AudioWAV/1076_TAI_NEU_XX.wav, segment:1
#11 | processing: 1037_IWL_SAD_XX.wav
../../data/AudioWAV/1037_IWL_SAD_XX.wav, segment:1
#12 | processing: 1063_ITH_DIS_XX.wav
../../data/AudioWAV/1063_ITH_DIS_XX.wav, segment:1
#13 | processing: 1030_IEO_DIS_HI.wav
#14 | processing: 1069_TIE_HAP_XX.wav
#15 | processing: 1004_TSI_HAP_XX.wav
#16 | processing: 1056_T