In [47]:
# initialize
from tqdm import tqdm
from time import sleep

import glob
import parselmouth
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

# filepath for the dataset
# root = "/Users/leochoo/dev/GP2-dev/SVD"
root = "./testSVD"

my_data_path = root + "/my_data"
healthy_path = root + "/healthy"
patho_path = root + "/pathological"

!ls {patho_path}

[1m[36mfunctional[m[m      [1m[36mhyperfunctional[m[m [1m[36morganic[m[m         [1m[36mpsychogenic[m[m


In [48]:
# select .wav files only
wav_files = glob.glob(healthy_path + "/*.wav")
sound = parselmouth.Sound(wav_files[0])
mfcc_object = sound.to_mfcc(number_of_coefficients=13)
mfcc_arr = mfcc_object.to_array()
mfcc_arr


array([[ 1.39613135e+03,  1.38619030e+03,  1.38684501e+03, ...,
         1.35356808e+03,  1.33500060e+03,  1.32359337e+03],
       [ 2.56702752e+02,  2.61810061e+02,  2.68087472e+02, ...,
         2.87938409e+02,  2.95965446e+02,  3.14273394e+02],
       [-1.48133340e+02, -1.48002875e+02, -1.46724574e+02, ...,
        -1.14477246e+02, -1.16706150e+02, -1.20875314e+02],
       ...,
       [-5.76328840e+00,  4.83712684e+00, -1.10068093e+00, ...,
        -1.88414058e+01, -1.21156526e+01, -2.26338062e+01],
       [ 9.04656916e+00, -2.33473876e+00,  5.64878956e+00, ...,
         2.49397951e+01,  2.74963185e+01,  2.20009511e+01],
       [-1.32063864e+01, -1.26460031e+01, -2.49337689e+01, ...,
        -1.97745245e+01, -1.54438861e+01, -5.84506151e-01]])

In [49]:
mfcc_arr.shape

(14, 375)

In [50]:
len(mfcc_arr)

14

In [51]:
mfcc_dic = {}
for i in range(len(mfcc_arr)):
    mfcc_dic["MFCC-"+str(i+1)] = mfcc_arr[i]
mfcc_df = pd.DataFrame.from_dict(mfcc_dic)
mfcc_df

Unnamed: 0,MFCC-1,MFCC-2,MFCC-3,MFCC-4,MFCC-5,MFCC-6,MFCC-7,MFCC-8,MFCC-9,MFCC-10,MFCC-11,MFCC-12,MFCC-13,MFCC-14
0,1396.131346,256.702752,-148.133340,205.060101,111.282256,101.539802,-74.793621,-88.604220,18.203361,-31.728012,6.090303,-5.763288,9.046569,-13.206386
1,1386.190301,261.810061,-148.002875,193.000834,128.906446,88.901586,-72.260847,-85.285534,10.029533,-20.978374,-5.397301,4.837127,-2.334739,-12.646003
2,1386.845013,268.087472,-146.724574,181.467363,124.150890,98.697423,-74.296126,-85.761370,21.560042,-31.170035,-5.421529,-1.100681,5.648790,-24.933769
3,1393.481577,266.048210,-157.203746,192.061213,125.064207,92.477609,-68.649688,-90.702311,16.725571,-31.948416,0.843608,-5.656708,0.256675,-11.032325
4,1386.449619,275.535347,-162.343226,196.299865,131.201307,83.244074,-60.801208,-91.682097,10.257518,-19.305974,0.016578,-5.873233,0.637123,-11.448358
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
370,1305.493947,312.148992,-108.351660,126.880456,132.940716,94.637615,-86.706665,-62.726697,-2.576372,-16.609815,-14.548897,-38.314219,32.029325,-12.031402
371,1306.554946,315.169387,-119.628433,133.394685,140.986267,77.256996,-68.881929,-74.013040,-5.751746,-6.548424,-31.482480,-24.218555,30.917750,-17.587251
372,1353.568083,287.938409,-114.477246,149.510981,114.228398,95.472448,-70.324525,-79.538107,16.378728,-24.450151,-26.133150,-18.841406,24.939795,-19.774525
373,1335.000596,295.965446,-116.706150,147.106124,120.534923,97.923599,-71.058033,-79.394888,14.840639,-23.958165,-32.985326,-12.115653,27.496319,-15.443886


In [59]:
# include MFCC data as well

def get_voice_data(_path):
    # select .wav files only
    wav_files = glob.glob(_path + "/*.wav")

    n_list = []
    tone_list = []
    syllab_list = []

    j_list = []
    s_list = []
    h_list = []

    # for wav_file in wav_files:
    for wav_file in tqdm(wav_files): # tqdm shows the progress bar
        sound = parselmouth.Sound(wav_file) # sound object from wav file
        pitch = sound.to_pitch()
        pulses = parselmouth.praat.call([sound, pitch], "To PointProcess (cc)")

        # name analysis
        name = os.path.basename(wav_file).split(".")[0]  
        
        ## tone
        if "l" in name:
            tone_list.append("l")
        elif "n" in name:
            tone_list.append("n")
        elif "h" in name:
            tone_list.append("h")

        ## syllable
        if "a" in name:
            syllab_list.append("a")
        elif "i" in name:
            syllab_list.append("i")
        elif "u" in name:
            syllab_list.append("u")
        # jitter
        jitter_local = parselmouth.praat.call(pulses, "Get jitter (local)", 0.0, 0.0, 0.0001, 0.02, 1.3) * 100

        # shimmer
        shimmer_local = parselmouth.praat.call([sound, pulses], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)

        # HNR
        harmonicity = parselmouth.praat.call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
        hnr = parselmouth.praat.call(harmonicity, "Get mean", 0, 0)
        
        # Append to numpy array
        n_list.append(name)
        j_list.append(jitter_local)
        s_list.append(shimmer_local)
        h_list.append(hnr)

        # MFCC
        mfcc_object = sound.to_mfcc(number_of_coefficients=13)
        mfcc_arr = mfcc_object.to_array()
        mfcc_dic = {}
        for i in range(1,len(mfcc_arr)):
            mfcc_dic["MFCC-"+str(i)] = mfcc_arr[i]
        mfcc_df = pd.DataFrame.from_dict(mfcc_dic)

    # create dataframe
    df = pd.DataFrame({"Name":pd.Series(n_list),
                        "Type": np.nan,
                        "Tone": pd.Series(tone_list),
                        "Syllab": pd.Series(syllab_list),
                           "Jitter":pd.Series(j_list),
                           "Shimmer":pd.Series(s_list),
                           "HNR":pd.Series(h_list)})
    df["Type"]= _path.split("/")[-1] # identify type: my_data, healthy, functional etc...
    new_df = pd.concat([df, mfcc_df], axis=1, sort=False)
    new_df = new_df.dropna() # some data are missing jitter, shimmer, hnr for some reason it seems..?
    return new_df


In [60]:
healthy_df = get_voice_data(healthy_path)
functional_df = get_voice_data(patho_path + "/functional")
hyperfunctional_df = get_voice_data(patho_path + "/hyperfunctional")
organic_df = get_voice_data(patho_path + "/organic")
psychogenic_df = get_voice_data(patho_path + "/psychogenic")

100%|██████████| 18/18 [00:01<00:00, 10.23it/s]
100%|██████████| 18/18 [00:01<00:00, 14.75it/s]
100%|██████████| 18/18 [00:01<00:00, 10.18it/s]
100%|██████████| 18/18 [00:01<00:00,  9.92it/s]
100%|██████████| 27/27 [00:02<00:00, 12.11it/s]


In [64]:
new_df = pd.concat(frames)
new_df = new_df.dropna()
new_df

Unnamed: 0,Name,Type,Tone,Syllab,Jitter,Shimmer,HNR,MFCC-1,MFCC-2,MFCC-3,MFCC-4,MFCC-5,MFCC-6,MFCC-7,MFCC-8,MFCC-9,MFCC-10,MFCC-11,MFCC-12,MFCC-13
0,1-i_l,healthy,l,i,0.238779,0.019045,19.410768,401.683204,-106.091207,-16.204958,-177.451287,-80.432412,-167.325435,-70.950945,60.705756,-18.078235,-92.995773,-13.111788,-4.964332,-29.427566
1,2-u_h,healthy,h,u,0.349111,0.023441,28.655604,405.945214,-107.319951,-6.467390,-168.654549,-83.775023,-164.000390,-57.471754,48.452583,-6.290534,-82.880371,-21.278743,10.481437,-38.131368
2,1-i_n,healthy,n,i,0.209544,0.007423,26.996682,412.992202,-105.668520,4.367001,-173.748357,-77.660894,-151.323178,-68.017679,71.309964,-10.580220,-88.565720,-7.091164,3.198551,-33.208541
3,2-u_l,healthy,l,u,1.069854,0.041115,25.108378,414.991160,-97.516727,14.790359,-171.141081,-67.202782,-142.208466,-72.015559,82.295822,-4.728251,-89.755092,6.052225,-1.277937,-27.437695
4,2-u_n,healthy,n,u,0.413457,0.031538,24.573556,406.918197,-86.251020,10.266493,-182.298364,-60.985385,-148.072712,-66.949520,75.605251,-3.342546,-89.780277,-3.320846,8.403894,-32.433375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22,151-a_l,psychogenic,l,a,0.100698,0.010602,32.236174,542.278005,-8.265111,-30.852985,-106.179846,-30.792265,-22.565280,-42.890834,29.361151,26.303133,-40.181743,-47.681269,38.188908,-16.714789
23,366-u_n,psychogenic,n,u,0.201356,0.015767,31.531414,536.969358,-3.689975,-21.333714,-120.413892,-17.823602,-23.398328,-48.960628,31.284078,32.925703,-45.128236,-41.538127,34.274904,-24.623071
24,741-a_h,psychogenic,h,a,0.281613,0.026882,28.802297,536.136757,-8.291563,-18.436971,-128.908856,-12.679275,-27.813144,-46.305927,37.134118,20.319001,-47.234094,-41.778117,34.873738,-26.046758
25,366-u_l,psychogenic,l,u,0.389981,0.039966,23.895287,543.660395,0.800456,-29.287699,-121.155722,-10.923893,-32.041889,-46.673164,35.578120,20.406814,-39.499129,-40.591833,23.920612,-20.058146


In [65]:
new_df.to_csv ("./testMFCCdata.csv", index = False, header=True)

# Investing gating the cause of NaN

In [25]:
psychogenic_df

Unnamed: 0,Name,Type,Tone,Syllab,Jitter,Shimmer,HNR,MFCC-1,MFCC-2,MFCC-3,MFCC-4,MFCC-5,MFCC-6,MFCC-7,MFCC-8,MFCC-9,MFCC-10,MFCC-11,MFCC-12,MFCC-13
0,741-i_l,psychogenic,l,i,0.430327,0.010875,31.476183,500.852508,-42.229477,9.338521,-105.915162,-33.302194,-22.609324,-42.780447,36.257394,13.09893,-49.530096,-19.076714,4.464144,-35.553634
1,741-i_n,psychogenic,n,i,0.173764,0.008791,35.010031,509.732989,-33.354517,-8.461371,-98.688461,-26.044541,-46.293121,-25.604598,28.356222,12.804219,-50.011145,-19.701448,9.926725,-45.747181
2,151-i_h,psychogenic,h,i,0.078759,0.004855,37.420852,520.317858,-41.985792,-10.35601,-94.843118,-38.981324,-37.91815,-36.561927,29.849487,16.920229,-58.498787,-17.889718,5.76889,-48.209089
3,151-i_l,psychogenic,l,i,0.148556,0.00663,30.717961,518.85187,-43.611927,-7.159445,-95.731118,-34.720833,-32.352922,-47.498631,41.629133,12.156952,-59.096146,-11.404653,4.278653,-43.423203
4,741-i_h,psychogenic,h,i,0.173467,0.006333,32.859001,519.323305,-49.506652,1.783797,-107.091937,-35.902711,-34.177557,-49.125474,44.81025,11.434974,-51.971935,-24.161926,0.701134,-35.054246
5,151-i_n,psychogenic,n,i,0.167417,0.020019,26.521576,516.794956,-49.275001,-0.125593,-107.546622,-29.941505,-41.646402,-42.067728,40.35122,3.345454,-40.278048,-28.743083,-3.333465,-27.222582
6,366-a_l,psychogenic,l,a,0.508194,0.076313,20.207709,526.657351,-45.899978,1.069204,-101.127115,-32.599329,-37.78788,-38.752697,45.529162,7.969082,-39.951085,-30.075008,6.429265,-29.120442
7,741-u_h,psychogenic,h,u,0.220549,0.011138,35.7305,529.732134,-40.30172,6.193586,-104.572379,-32.613566,-27.906607,-46.329687,53.235656,11.767016,-46.616703,-23.13224,8.891499,-31.951181
8,151-u_n,psychogenic,n,u,0.335014,0.028482,31.699526,529.17815,-39.765737,5.020934,-103.895563,-36.120211,-28.076771,-41.05668,48.590235,12.770809,-49.695049,-20.606085,11.271466,-33.18111
9,151-u_l,psychogenic,l,u,0.073962,0.00706,38.761321,518.773427,-39.818959,0.874212,-104.771075,-36.661602,-35.759304,-45.38475,46.851395,15.805132,-55.082445,-21.259875,7.79478,-41.313929


In [10]:
# Combine to one dataframe
frames = [healthy_df, functional_df, hyperfunctional_df, organic_df, psychogenic_df]
total_df = pd.concat(frames, keys=["healthy_df", "functional", "hyperfunctional", "organic", "psychogenic"])

total_df.to_csv ("./SVDwithMFCC.csv", index = False, header=True)
total_df

Unnamed: 0,Unnamed: 1,Name,Type,Tone,Syllab,Jitter,Shimmer,HNR,MFCC-1,MFCC-2,MFCC-3,...,MFCC-5,MFCC-6,MFCC-7,MFCC-8,MFCC-9,MFCC-10,MFCC-11,MFCC-12,MFCC-13,MFCC-14
healthy_df,0,1-i_l,healthy,l,i,0.238779,0.019045,19.410768,1619.318328,401.683204,-106.091207,...,-177.451287,-80.432412,-167.325435,-70.950945,60.705756,-18.078235,-92.995773,-13.111788,-4.964332,-29.427566
healthy_df,1,2-u_h,healthy,h,u,0.349111,0.023441,28.655604,1626.995527,405.945214,-107.319951,...,-168.654549,-83.775023,-164.000390,-57.471754,48.452583,-6.290534,-82.880371,-21.278743,10.481437,-38.131368
healthy_df,2,1-i_n,healthy,n,i,0.209544,0.007423,26.996682,1634.863523,412.992202,-105.668520,...,-173.748357,-77.660894,-151.323178,-68.017679,71.309964,-10.580220,-88.565720,-7.091164,3.198551,-33.208541
healthy_df,3,2-u_l,healthy,l,u,1.069854,0.041115,25.108378,1650.599494,414.991160,-97.516727,...,-171.141081,-67.202782,-142.208466,-72.015559,82.295822,-4.728251,-89.755092,6.052225,-1.277937,-27.437695
healthy_df,4,2-u_n,healthy,n,u,0.413457,0.031538,24.573556,1649.523806,406.918197,-86.251020,...,-182.298364,-60.985385,-148.072712,-66.949520,75.605251,-3.342546,-89.780277,-3.320846,8.403894,-32.433375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
psychogenic,219,,,,,,,,1425.268788,477.890387,77.901283,...,-131.193611,19.216965,-13.084615,-68.621546,52.837812,26.308861,-53.550530,-46.487608,7.025833,-13.183455
psychogenic,220,,,,,,,,1402.104634,484.697187,74.604136,...,-116.158126,0.560382,2.260587,-71.893943,36.841585,49.080442,-63.165426,-58.592670,12.776798,-8.807712
psychogenic,221,,,,,,,,1396.971122,483.266421,76.293911,...,-116.294730,4.737167,-6.699290,-87.502561,40.179559,52.509001,-58.939465,-61.573861,5.833538,-20.068425
psychogenic,222,,,,,,,,1394.240980,483.486214,75.728016,...,-110.087278,12.259885,-10.067334,-75.767620,26.598434,34.854771,-31.147879,-57.792766,-6.452486,-19.412881


In [34]:
from tqdm import tqdm
from time import sleep

def get_voice_data(_path):
    # select .wav files only
    wav_files = glob.glob(_path + "/*.wav")

    n_list = []
    tone_list = []
    syllab_list = []

    j_list = []
    s_list = []
    h_list = []

    # for wav_file in wav_files:
    for wav_file in tqdm(wav_files): # tqdm shows the progress bar
        sound = parselmouth.Sound(wav_file) # sound object from wav file
        pitch = sound.to_pitch()
        pulses = parselmouth.praat.call([sound, pitch], "To PointProcess (cc)")

        # name analysis
        name = os.path.basename(wav_file).split(".")[0]  
        
        ## tone
        if "l" in name:
            tone_list.append("l")
        elif "n" in name:
            tone_list.append("n")
        elif "h" in name:
            tone_list.append("h")

        ## syllable
        if "a" in name:
            syllab_list.append("a")
        elif "i" in name:
            syllab_list.append("i")
        elif "u" in name:
            syllab_list.append("u")
        # jitter
        jitter_local = parselmouth.praat.call(pulses, "Get jitter (local)", 0.0, 0.0, 0.0001, 0.02, 1.3) * 100

        # shimmer
        shimmer_local = parselmouth.praat.call([sound, pulses], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)

        # HNR
        harmonicity = parselmouth.praat.call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
        hnr = parselmouth.praat.call(harmonicity, "Get mean", 0, 0)

        # Append to dataframe
        n_list.append(name)
        j_list.append(jitter_local)
        s_list.append(shimmer_local)
        h_list.append(hnr)


    # create dataframe
    df = pd.DataFrame({"Name":pd.Series(n_list),
                        "Type": np.nan,
                        "Tone": pd.Series(tone_list),
                        "Syllab": pd.Series(syllab_list),
                           "Jitter":pd.Series(j_list),
                           "Shimmer":pd.Series(s_list),
                           "HNR":pd.Series(h_list)})
    df["Type"]= _path.split("/")[-1] # identify type: my_data, healthy, functional etc...
    df = df.dropna()
    return df


In [15]:
healthy_df = get_voice_data(healthy_path)
functional_df = get_voice_data(patho_path + "/functional")
hyperfunctional_df = get_voice_data(patho_path + "/hyperfunctional")
organic_df = get_voice_data(patho_path + "/organic")
psychogenic_df = get_voice_data(patho_path + "/psychogenic")

# Combine to one dataframe
frames = [healthy_df, functional_df, hyperfunctional_df, organic_df, psychogenic_df]
total_df = pd.concat(frames, keys=["healthy_df", "functional", "hyperfunctional", "organic", "psychogenic"])

# total_df.to_csv ("./testSVD_data1.csv", index = False, header=True)
total_df

100%|██████████| 18/18 [00:01<00:00, 13.92it/s]
100%|██████████| 18/18 [00:00<00:00, 18.81it/s]
100%|██████████| 18/18 [00:01<00:00, 13.44it/s]
100%|██████████| 18/18 [00:01<00:00, 13.50it/s]
100%|██████████| 27/27 [00:01<00:00, 15.40it/s]


Unnamed: 0,Unnamed: 1,Name,Type,Tone,Syllab,Jitter,Shimmer,HNR
healthy_df,0,1-i_l,healthy,l,i,0.238779,0.019045,19.410768
healthy_df,1,2-u_h,healthy,h,u,0.349111,0.023441,28.655604
healthy_df,2,1-i_n,healthy,n,i,0.209544,0.007423,26.996682
healthy_df,3,2-u_l,healthy,l,u,1.069854,0.041115,25.108378
healthy_df,4,2-u_n,healthy,n,u,0.413457,0.031538,24.573556
...,...,...,...,...,...,...,...,...
psychogenic,22,151-a_l,psychogenic,l,a,0.100698,0.010602,32.236174
psychogenic,23,366-u_n,psychogenic,n,u,0.201356,0.015767,31.531414
psychogenic,24,741-a_h,psychogenic,h,a,0.281613,0.026882,28.802297
psychogenic,25,366-u_l,psychogenic,l,u,0.389981,0.039966,23.895287


In [36]:
healthy_df = get_voice_data(healthy_path)
healthy_df

100%|██████████| 18/18 [00:01<00:00, 13.40it/s]


Unnamed: 0,Name,Type,Tone,Syllab,Jitter,Shimmer,HNR
0,1-i_l,healthy,l,i,0.238779,0.019045,19.410768
1,2-u_h,healthy,h,u,0.349111,0.023441,28.655604
2,1-i_n,healthy,n,i,0.209544,0.007423,26.996682
3,2-u_l,healthy,l,u,1.069854,0.041115,25.108378
4,2-u_n,healthy,n,u,0.413457,0.031538,24.573556
5,1-i_h,healthy,h,i,0.108868,0.003879,36.252385
6,2-i_n,healthy,n,i,0.501351,0.030291,21.938383
7,1-u_h,healthy,h,u,0.108561,0.006418,39.652435
8,2-i_l,healthy,l,i,0.264186,0.018319,24.200508
9,2-i_h,healthy,h,i,0.364832,0.030243,23.385832


## 2020-07-28 
It is mysterious how I'm getting a bunch of NaN when I run the functiosn above. It seems that when i run without MFCC, healthy data is fine but when i run with MFCC it is not okay.

I need to look into this. But for today, I will just cut out all the NaN values and proceed with it.