In [1]:
# initialize
from tqdm import tqdm
from time import sleep

import glob
import parselmouth
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

import statistics



In [2]:
# Process wav files to get Jitter, Shimmer, HNR, and MFCC

def get_voice_data(_path):
    # select .wav files only
    wav_files = glob.glob(_path + "/*.wav")

    n_list = []
    tone_list = []
    syllab_list = []

    j_list = []
    s_list = []
    h_list = []

    # for wav_file in wav_files:
    for wav_file in tqdm(wav_files): # tqdm shows the progress bar
        sound = parselmouth.Sound(wav_file) # sound object from wav file
        pitch = sound.to_pitch()
        pulses = parselmouth.praat.call([sound, pitch], "To PointProcess (cc)")

        # name analysis
        name = os.path.basename(wav_file).split(".")[0]  
        
        ## tone
        if "l" in name:
            tone_list.append("l")
        elif "n" in name:
            tone_list.append("n")
        elif "h" in name:
            tone_list.append("h")

        ## syllable
        if "a" in name:
            syllab_list.append("a")
        elif "i" in name:
            syllab_list.append("i")
        elif "u" in name:
            syllab_list.append("u")
        # jitter
        jitter_local = parselmouth.praat.call(pulses, "Get jitter (local)", 0.0, 0.0, 0.0001, 0.02, 1.3) * 100

        # shimmer
        shimmer_local = parselmouth.praat.call([sound, pulses], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)

        # HNR
        harmonicity = parselmouth.praat.call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
        hnr = parselmouth.praat.call(harmonicity, "Get mean", 0, 0)
        
        # Append to numpy array
        n_list.append(name)
        j_list.append(jitter_local)
        s_list.append(shimmer_local)
        h_list.append(hnr)

        # MFCC
        mfcc_object = sound.to_mfcc(number_of_coefficients=13)
        mfcc_arr = mfcc_object.to_array()
        mfcc_dic = {}
        for i in range(1,len(mfcc_arr)):
            mfcc_dic["MFCC-"+str(i)] = [statistics.mean(mfcc_arr[i])]
        mfcc_df = pd.DataFrame.from_dict(mfcc_dic)

    # create dataframe
    df = pd.DataFrame({"Name":pd.Series(n_list),
                        "Type": np.nan,
                        "Tone": pd.Series(tone_list),
                        "Syllab": pd.Series(syllab_list),
                           "Jitter":pd.Series(j_list),
                           "Shimmer":pd.Series(s_list),
                           "HNR":pd.Series(h_list)})
    df["Type"]= _path.split("/")[-1] # identify type: my_data, healthy, functional etc...
    new_df = pd.concat([df, mfcc_df], axis=1, sort=False)
#     new_df = new_df.dropna() # some data are missing jitter, shimmer, hnr for some reason it seems..?
    return new_df


In [3]:
def generate_jshmfcc(dataset_type, dataset_path):
    healthy_df = get_voice_data(dataset_path + "/healthy")
    functional_df = get_voice_data(dataset_path + "/pathological/functional")
    hyperfunctional_df = get_voice_data(dataset_path + "/pathological/hyperfunctional")
    organic_df = get_voice_data(dataset_path + "/pathological/organic")
    psychogenic_df = get_voice_data(dataset_path + "/pathological/psychogenic")

    # Combine the results into one dataframe
    frames = [healthy_df, functional_df, hyperfunctional_df, organic_df, psychogenic_df]
    combined_df = pd.concat(frames)
    combined_df = combined_df.dropna()
    return combined_df


In [4]:
# filepath for the test and train datasets
test_path = "/Users/leochoo/dev/VoiceDisorderSVM/data/SVD/test_audio"
train_path = "/Users/leochoo/dev/VoiceDisorderSVM/data/SVD/train_audio"

In [5]:
# generate voice report for test dataset
test_report = generate_jshmfcc("test", test_path)
test_report

100%|██████████| 18/18 [00:01<00:00,  9.80it/s]
100%|██████████| 18/18 [00:01<00:00, 13.40it/s]
100%|██████████| 18/18 [00:01<00:00,  9.14it/s]
100%|██████████| 18/18 [00:01<00:00,  9.01it/s]
100%|██████████| 27/27 [00:02<00:00,  9.55it/s]


Unnamed: 0,Name,Type,Tone,Syllab,Jitter,Shimmer,HNR,MFCC-1,MFCC-2,MFCC-3,MFCC-4,MFCC-5,MFCC-6,MFCC-7,MFCC-8,MFCC-9,MFCC-10,MFCC-11,MFCC-12,MFCC-13
0,1-i_l,healthy,l,i,0.238779,0.019045,19.410768,396.361323,-116.619975,-21.260483,-203.293634,-91.003209,-165.913936,-92.061193,27.434481,12.731777,-78.369763,-37.069225,-9.464482,-21.454042
0,350-a_n,functional,n,a,0.149364,0.024658,28.264699,432.290058,144.986895,155.268071,26.666257,-35.264318,-77.632088,-32.772081,40.711723,-48.415133,-47.907517,4.998487,-21.403137,-42.813633
0,106-u_n,hyperfunctional,n,u,0.207256,0.026797,27.546843,294.025549,-25.849874,251.935085,66.566817,69.400784,-48.979465,-77.377201,-40.517057,29.949832,-14.281045,-45.914693,-28.561137,4.476441
0,445-i_l,organic,l,i,0.540254,0.018591,24.334806,436.088876,72.598253,33.070889,-160.105034,-59.053231,-84.042075,-9.538565,58.779513,-23.37605,-100.873218,3.757949,-40.562095,-55.067486
0,741-i_l,psychogenic,l,i,0.430327,0.010875,31.476183,513.125351,44.361501,-14.416312,-140.739681,1.11233,-29.635538,-74.153029,46.199038,26.294957,-44.223189,-30.63107,8.792338,-25.712919


In [6]:
# # generate voice report for train dataset
# train_report = generate_jshmfcc("train", train_path)
# train_report

In [7]:
# Save the outputs to the processed data directory
test_report.to_csv ("./data/processed/test_SVD_j_s_hnr_mfcc.csv", index = False, header=True)
print("Test data exported")
# train_report.to_csv ("./data/processed/train_SVD_j_s_hnr_mfcc.csv", index = False, header=True)
# print("Train data exported")




Test data exported


In [None]:
# 20201105 
# so i recognized the problem with mfcc calculation so I'm re-doing it correctly.