<a href="https://colab.research.google.com/github/leochoo/audiocnn/blob/main/notebooks/Generate_j%2C_s%2C_h%2C_mfcc.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install praat-parselmouth seaborn tqdm



In [None]:
# initialize
from tqdm import tqdm
from time import sleep

import glob
import parselmouth
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

import statistics

In [None]:
# Process wav files to get Jitter, Shimmer, HNR, and MFCC

def get_voice_data(_path):
    # select .wav files only
    wav_files = glob.glob(_path + "/*.wav")

    n_list = []
    tone_list = []
    syllab_list = []

    j_list = []
    s_list = []
    h_list = []

    # for wav_file in wav_files:
    for wav_file in tqdm(wav_files): # tqdm shows the progress bar
        sound = parselmouth.Sound(wav_file) # sound object from wav file
        pitch = sound.to_pitch()
        pulses = parselmouth.praat.call([sound, pitch], "To PointProcess (cc)")

        # name analysis
        name = os.path.basename(wav_file).split(".")[0]  
        
        ## tone
        if "l" in name:
            tone_list.append("l")
        elif "n" in name:
            tone_list.append("n")
        elif "h" in name:
            tone_list.append("h")

        ## syllable
        if "a" in name:
            syllab_list.append("a")
        elif "i" in name:
            syllab_list.append("i")
        elif "u" in name:
            syllab_list.append("u")
        # jitter
        jitter_local = parselmouth.praat.call(pulses, "Get jitter (local)", 0.0, 0.0, 0.0001, 0.02, 1.3) * 100

        # shimmer
        shimmer_local = parselmouth.praat.call([sound, pulses], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)

        # HNR
        harmonicity = parselmouth.praat.call(sound, "To Harmonicity (cc)", 0.01, 75, 0.1, 1.0)
        hnr = parselmouth.praat.call(harmonicity, "Get mean", 0, 0)
        
        # Append to numpy array
        n_list.append(name)
        j_list.append(jitter_local)
        s_list.append(shimmer_local)
        h_list.append(hnr)

        # MFCC
        mfcc_object = sound.to_mfcc(number_of_coefficients=12)
        mfcc_arr = mfcc_object.to_array()
        mfcc_dic = {}
        for i in range(0,len(mfcc_arr)):
            mfcc_dic["MFCC-"+str(i)] = [statistics.mean(mfcc_arr[i])]
        mfcc_df = pd.DataFrame.from_dict(mfcc_dic)

    # create dataframe
    df = pd.DataFrame({"Name":pd.Series(n_list),
                        "Type": np.nan,
                        "Tone": pd.Series(tone_list),
                        "Syllab": pd.Series(syllab_list),
                           "Jitter":pd.Series(j_list),
                           "Shimmer":pd.Series(s_list),
                           "HNR":pd.Series(h_list)})
    df["Type"]= _path.split("/")[-1] # identify type: my_data, healthy, functional etc...
    new_df = pd.concat([df, mfcc_df], axis=1, sort=False)
#     new_df = new_df.dropna() # some data are missing jitter, shimmer, hnr for some reason it seems..?
    return new_df


In [None]:
def generate_jshmfcc(dataset_path):
    healthy_df = get_voice_data(dataset_path + "/healthy")
    functional_df = get_voice_data(dataset_path + "/pathological/functional")
    hyperfunctional_df = get_voice_data(dataset_path + "/pathological/hyperfunctional")
    organic_df = get_voice_data(dataset_path + "/pathological/organic")
    psychogenic_df = get_voice_data(dataset_path + "/pathological/psychogenic")

    # Combine the results into one dataframe
    frames = [healthy_df, functional_df, hyperfunctional_df, organic_df, psychogenic_df]
    combined_df = pd.concat(frames)
    combined_df = combined_df.dropna()
    return combined_df


# Mount Data

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
!ls /content/gdrive/MyDrive/CompressedData/

DysphoniaSVD-TAR.tar.gz  Test.tar.gz  UrbanSound8K.tar.gz


In [None]:
# !tar -xvzf /content/gdrive/MyDrive/CompressedData/DysphoniaSVD-TAR.tar.gz -C /content/

In [None]:
!ls /content/DysphoniaSVD/healthy/ | wc -l
!ls /content/DysphoniaSVD/pathological/hyperfunctional/ | wc -l
!ls /content/DysphoniaSVD/pathological/functional/ | wc -l
!ls /content/DysphoniaSVD/pathological/organic/ | wc -l
!ls /content/DysphoniaSVD/pathological/psychogenic/ | wc -l

3141
1916
1008
909
819


In [None]:
3141+1916+1008+909+819

7793

In [None]:
data_path = "/content/DysphoniaSVD/"

In [None]:
total_df = generate_jshmfcc(data_path)

100%|██████████| 3141/3141 [09:31<00:00,  5.50it/s]
100%|██████████| 1008/1008 [02:59<00:00,  5.60it/s]
100%|██████████| 1916/1916 [05:57<00:00,  5.36it/s]
100%|██████████| 909/909 [02:37<00:00,  5.79it/s]
100%|██████████| 819/819 [02:20<00:00,  5.82it/s]


In [None]:
total_df

Unnamed: 0,Name,Type,Tone,Syllab,Jitter,Shimmer,HNR,MFCC-0,MFCC-1,MFCC-2,MFCC-3,MFCC-4,MFCC-5,MFCC-6,MFCC-7,MFCC-8,MFCC-9,MFCC-10,MFCC-11,MFCC-12
0,831-a_h,healthy,h,a,0.197048,0.011417,30.370048,1055.041233,464.748232,253.672024,211.880523,-7.220064,-25.344796,-68.484316,-89.092073,27.53002,-0.209429,-12.265432,18.510297,29.035445
0,2417-a_n,functional,n,a,0.306778,0.014159,27.638194,1602.668213,289.84873,35.298471,219.578984,76.411743,94.112887,-7.842234,-56.415737,6.660792,-6.674514,-9.24702,-22.713022,-37.243954
0,669-a_n,hyperfunctional,n,a,0.48039,0.023566,22.16589,1304.143643,436.795862,211.83341,132.357304,-80.160507,-61.908708,-38.743092,-72.877078,3.77931,-38.499176,-10.272015,-22.381768,-29.016693
0,2000-a_l,organic,l,a,0.380039,0.038401,22.634162,1212.56007,331.769122,269.948353,301.313079,-55.776398,-14.284108,-38.795154,-92.874596,-29.121905,-37.768206,-25.329755,38.009249,28.506557
0,2310-i_l,psychogenic,l,i,0.251221,0.016296,28.772017,1482.613637,483.231693,-12.167514,84.282908,137.316927,115.62297,-13.810243,-96.603894,-28.00282,-41.461302,-28.367279,11.4906,-48.424881


In [None]:
total_df.shape

(5, 20)

In [None]:
# Save the outputs to the processed data directory
test_report.to_csv ("./data/processed/test_SVD_j_s_hnr_mfcc.csv", index = False, header=True)
print("Test data exported")
# train_report.to_csv ("./data/processed/train_SVD_j_s_hnr_mfcc.csv", index = False, header=True)
print("Train data exported")




Test data exported
Train data exported


In [None]:
# 20201105 
# so i recognized the problem with mfcc calculation so I'm re-doing it correctly.

# 1105 09:02 now generating new dataset with the correct average mfcc value. no d1 d2 included here.