In [1]:
# refactoring get voice data


# this is the playground for fixing mfcc addition

# initialize
from tqdm import tqdm
from time import sleep

import glob
import parselmouth
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# needed for mfcc calculation
import statistics
import speechpy
from scipy.io import wavfile



In [34]:
_path = test_path = "/Users/leochoo/dev/VoiceDisorderSVM/data/SVD/test_audio/healthy"
# select .wav files only
wav_files = glob.glob(_path + "/*.wav")

In [35]:
_type = _path.split("/")[-1] # identify type: my_data, healthy, functional etc...

In [45]:
data = []
# for each audio file,
for wav_file in tqdm(wav_files): # tqdm shows the progress bar
    sound = parselmouth.Sound(wav_file) # sound object from wav file
    pitch = sound.to_pitch()
    pulses = parselmouth.praat.call([sound, pitch], "To PointProcess (cc)")

    # name analysis
    name = os.path.basename(wav_file).split(".")[0]  

    ## tone
    tone = ""
    if "l" in name:
        tone = "l"
    elif "n" in name:
        tone = "n"
    elif "h" in name:
        tone = "h"

    ## syllable
    syllab = ""
    if "a" in name:
        syllab = "a"
    elif "i" in name:
        syllab = "i"
    elif "u" in name:
        syllab = "u"
        
    # jitter
    jitter = parselmouth.praat.call(pulses, "Get jitter (local)", 0.0, 0.0, 0.0001, 0.02, 1.3) * 100

    # shimmer
    shimmer = parselmouth.praat.call([sound, pulses], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)

    # HNR
    harmonicity = parselmouth.praat.call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
    hnr = parselmouth.praat.call(harmonicity, "Get mean", 0, 0)
    
    # append a bit before adding mfcc
    data_row = [name, _type, tone, syllab, jitter, shimmer, hnr]

    # MFCC, d1, d2
    samplerate, wav_data = wavfile.read(wav_file)
    mfccs = speechpy.feature.mfcc(wav_data, samplerate, num_cepstral = 12)
    mfccs = mfccs.T # transform to handle wav_data easily 
    derivatives = speechpy.feature.extract_derivative_feature(mfccs) # this now looks like: [c#][frame#][[mfcc, d1, d2]]
    
    mfcc_list = []
    mfcc_d1 = []
    mfcc_d2 = []
    
    # for each coefficient,
    for i in range(0, len(derivatives)):
        mfcc_vars = derivatives[i].T # mfcc, d1, d2
        
        # take the average across the entire time frame
        mfcc = statistics.mean(mfcc_vars[0])
        d1 = statistics.mean(mfcc_vars[1])
        d2 = statistics.mean(mfcc_vars[2])
        
        # append to the list
        mfcc_list.append(mfcc)
        mfcc_d1.append(d1)
        mfcc_d2.append(d2)
    
    data_row = data_row + mfcc_list + mfcc_d1 + mfcc_d2
    
    # append to data
    data.append(data_row)


100%|██████████| 18/18 [00:02<00:00,  8.73it/s]


In [47]:
len(data)

18

In [48]:
data[0]

['1-i_l',
 'healthy',
 'l',
 'i',
 0.23877859121534234,
 0.01904494021583791,
 19.410767595912546,
 19.72641161472804,
 4.980334897641276,
 0.36437318414112246,
 10.569142934505988,
 2.299545821110475,
 -4.075228372724043,
 -0.06780992728021518,
 1.037051022424722,
 -1.8922893932866314,
 1.0048342177739633,
 -0.63523602267375,
 -1.727505072013799,
 5.9174527591811605,
 1.497128768002517,
 0.10810837186058794,
 3.170446541960481,
 0.6915741822616674,
 -1.2213791008389006,
 -0.014785856677514163,
 0.31527833550829065,
 -0.5662688993519015,
 0.30044856219010707,
 -0.1930743616718904,
 -0.5229540468987276,
 1.7750238560819367,
 0.450628386448033,
 0.031770934667015534,
 0.9505294939273355,
 0.20742952913598586,
 -0.36548991116798324,
 -0.002616298333508518,
 0.0957703986111242,
 -0.16974289627377193,
 0.08962381785467943,
 -0.05887055109250875,
 -0.15873702032568346]

In [42]:
columns = ["Name", "Type", "Tone", "Syllab", "Jitter", "Shimmer", "HNR"]

for i in range(0,12):
    columns.append("MFCC-"+str(i))
for i in range(0,12):
    columns.append("MFCC-"+str(i)+"_d1")
for i in range(0,12):
    columns.append("MFCC-"+str(i)+"_d2")
columns

['Name',
 'Type',
 'Tone',
 'Syllab',
 'Jitter',
 'Shimmer',
 'HNR',
 'MFCC-0',
 'MFCC-1',
 'MFCC-2',
 'MFCC-3',
 'MFCC-4',
 'MFCC-5',
 'MFCC-6',
 'MFCC-7',
 'MFCC-8',
 'MFCC-9',
 'MFCC-10',
 'MFCC-11',
 'MFCC-0_d1',
 'MFCC-1_d1',
 'MFCC-2_d1',
 'MFCC-3_d1',
 'MFCC-4_d1',
 'MFCC-5_d1',
 'MFCC-6_d1',
 'MFCC-7_d1',
 'MFCC-8_d1',
 'MFCC-9_d1',
 'MFCC-10_d1',
 'MFCC-11_d1',
 'MFCC-0_d2',
 'MFCC-1_d2',
 'MFCC-2_d2',
 'MFCC-3_d2',
 'MFCC-4_d2',
 'MFCC-5_d2',
 'MFCC-6_d2',
 'MFCC-7_d2',
 'MFCC-8_d2',
 'MFCC-9_d2',
 'MFCC-10_d2',
 'MFCC-11_d2']

In [49]:
# # create dataframe
# df = pd.DataFrame(data, columns)
df = pd.DataFrame(data, columns=columns)
df

Unnamed: 0,Name,Type,Tone,Syllab,Jitter,Shimmer,HNR,MFCC-0,MFCC-1,MFCC-2,...,MFCC-2_d2,MFCC-3_d2,MFCC-4_d2,MFCC-5_d2,MFCC-6_d2,MFCC-7_d2,MFCC-8_d2,MFCC-9_d2,MFCC-10_d2,MFCC-11_d2
0,1-i_l,healthy,l,i,0.238779,0.019045,19.410768,19.726412,4.980335,0.364373,...,0.031771,0.950529,0.20743,-0.36549,-0.002616,0.09577,-0.169743,0.089624,-0.058871,-0.158737
1,2-u_h,healthy,h,u,0.349111,0.023441,28.655604,24.231631,14.833635,5.13261,...,0.452091,-0.004018,-0.117941,-0.168529,-0.045855,-0.026354,-0.046411,-0.096588,-0.154689,-0.104487
2,1-i_n,healthy,n,i,0.209544,0.007423,26.996682,22.370851,9.1316,1.832111,...,0.166658,0.585586,0.135321,-0.225097,-0.087052,0.182036,-0.260597,-0.08415,-0.037017,-0.158376
3,2-u_l,healthy,l,u,1.069854,0.041115,25.108378,22.546297,14.963159,5.392483,...,0.478775,0.23598,-0.0434,-0.060782,-0.008986,-0.034862,0.064217,0.006862,-0.062482,0.004279
4,2-u_n,healthy,n,u,0.413457,0.031538,24.573556,22.691574,14.178967,5.5981,...,0.509009,0.18569,-0.010915,-0.085739,-0.056782,-0.037253,0.028907,0.041877,-0.035037,0.02332
5,1-i_h,healthy,h,i,0.108868,0.003879,36.252385,23.609964,13.226365,1.971031,...,0.188857,0.349159,0.029562,-0.330614,-0.242098,-0.129982,-0.276545,-0.205492,-0.167437,-0.116816
6,2-i_n,healthy,n,i,0.501351,0.030291,21.938383,21.447527,9.822897,5.394811,...,0.494651,0.438286,0.153198,-0.130906,0.142898,0.027833,-0.117025,0.045572,-0.070199,0.056019
7,1-u_h,healthy,h,u,0.108561,0.006418,39.652435,24.425951,14.24783,4.088618,...,0.37356,0.239715,-0.216446,-0.225385,-0.22029,-0.139295,-0.313206,-0.233803,-0.206238,-0.195633
8,2-i_l,healthy,l,i,0.264186,0.018319,24.200508,22.479712,9.092559,4.246562,...,0.387086,0.576704,0.113323,-0.122292,0.122009,0.128587,-0.201775,0.081214,-0.113203,0.040102
9,2-i_h,healthy,h,i,0.364832,0.030243,23.385832,23.690125,9.706376,4.815239,...,0.426536,0.29381,0.091869,-0.230152,-0.038644,0.02435,-0.308798,-0.034317,-0.159747,-0.025115


In [None]:
soundfile = _path+"/1-a_h.wav"

In [None]:
# MFCC, d1, d2
samplerate, data = wavfile.read(soundfile)
mfcc = speechpy.feature.mfcc(data, samplerate, num_cepstral = 12)
mfcc = mfcc.T # transform to handle data easily
derivatives = speechpy.feature.extract_derivative_feature(mfcc)

In [None]:
mfcc

In [None]:
derivatives

In [None]:
len(derivatives)

In [None]:
# mfcc-0 list
mfcc0_list = derivatives[0].T[0] # mfcc, d1, d2
mfcc0_list

In [None]:
len(mfcc0_list)

In [None]:
mfcc = statistics.mean(mfcc0_list)
mfcc

In [None]:
# [c#][frame#][[mfcc, d1, d2]]

for j in range(len(derivatives[0])):
    # get average of mfcc-0
    statistics.mean(derivatives[0][j][0])
    

In [None]:
# Process wav files to get Jitter, Shimmer, HNR, and MFCC and its derivatives

def get_voice_data(_path): 
    
    # initial vars
    
    n = 0
    d1 = 0
    d2 = 0
    mfcc_n = {}
    mfcc_d1 = {}
    mfcc_d2 = {}

    # create empty dataframe - [name, type, tone, syllab, jitter, shimmer, hnr, mfcc, mfcc_d1, mfcc_d2]

    df = pd.DataFrame({"Name":pd.Series(n_list),
                        "Type": np.nan,
                        "Tone": pd.Series(tone_list),
                        "Syllab": pd.Series(syllab_list),
                           "Jitter":pd.Series(j_list),
                           "Shimmer":pd.Series(s_list),
                           "HNR":pd.Series(h_list)})
    df["Type"]= _path.split("/")[-1] # identify type: my_data, healthy, functional etc...
    new_df = pd.concat([df, mfcc_n_df, mfcc_d1_df, mfcc_d2_df], axis=1, sort=False)

    
    # select .wav files only
    wav_files = glob.glob(_path + "/*.wav")
    
    
    # for wav_file in wav_files:
    for wav_file in tqdm(wav_files): # tqdm shows the progress bar
        sound = parselmouth.Sound(wav_file) # sound object from wav file
        pitch = sound.to_pitch()
        pulses = parselmouth.praat.call([sound, pitch], "To PointProcess (cc)")

        # name analysis
        name = os.path.basename(wav_file).split(".")[0]  
        
        ## tone
        if "l" in name:
            tone_list.append("l")
        elif "n" in name:
            tone_list.append("n")
        elif "h" in name:
            tone_list.append("h")

        ## syllable
        if "a" in name:
            syllab_list.append("a")
        elif "i" in name:
            syllab_list.append("i")
        elif "u" in name:
            syllab_list.append("u")
        # jitter
        jitter_local = parselmouth.praat.call(pulses, "Get jitter (local)", 0.0, 0.0, 0.0001, 0.02, 1.3) * 100

        # shimmer
        shimmer_local = parselmouth.praat.call([sound, pulses], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)

        # HNR
        harmonicity = parselmouth.praat.call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
        hnr = parselmouth.praat.call(harmonicity, "Get mean", 0, 0)
        
        # Append to numpy array
        n_list.append(name)
        j_list.append(jitter_local)
        s_list.append(shimmer_local)
        h_list.append(hnr)

        # MFCC - parselmouth (PRAAT)
#         mfcc_object = sound.to_mfcc(number_of_coefficients=13)
#         mfcc_arr = mfcc_object.to_array()
#         mfcc_dic = {}
#         for i in range(0,len(mfcc_arr)):
#             mfcc_dic["MFCC-"+str(i)] = [statistics.mean(mfcc_arr[i])]
#         mfcc_df = pd.DataFrame.from_dict(mfcc_dic)
        
        
        # MFCC, d1, d2
        samplerate, data = wavfile.read(wav_file)
        mfcc = speechpy.feature.mfcc(data, samplerate, num_cepstral = 12)
        mfcc = mfcc.T # transform to handle data easily
        derivatives = speechpy.feature.extract_derivative_feature(mfcc)


        for i in range(0,len(derivatives)):
            ders = derivatives[i].T # transform to handle data easily
            n = [statistics.mean(ders[0])]
            d1 = [statistics.mean(ders[1])]
            d2 = [statistics.mean(ders[2])]
            mfcc_n["MFCC-"+str(i)] = n
            mfcc_d1["MFCC-"+str(i)+"_d1"] = d1
            mfcc_d2["MFCC-"+str(i)+"_d2"] = d2
            
            mfcc_n_df = pd.DataFrame.from_dict(mfcc_n)
            mfcc_d1_df = pd.DataFrame.from_dict(mfcc_d1)
            mfcc_d2_df = pd.DataFrame.from_dict(mfcc_d2)


    
    return new_df
