In [6]:
import glob
import numpy as np
import pandas as pd
import parselmouth 
import statistics
import pickle
from parselmouth.praat import call
from scipy.stats.mstats import zscore
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

In [3]:


def measurePitch(voiceID, f0min, f0max, unit):
    sound = parselmouth.Sound(voiceID) # read the sound
    duration = call(sound, "Get total duration") # duration
    pitch = call(sound, "To Pitch", 0.0, f0min, f0max) #create a praat pitch object
    meanF0 = call(pitch, "Get mean", 0, 0, unit) # get mean pitch
    stdevF0 = call(pitch, "Get standard deviation", 0 ,0, unit) # get standard deviation
    harmonicity = call(sound, "To Harmonicity (cc)", 0.01, f0min, 0.1, 1.0)
    hnr = call(harmonicity, "Get mean", 0, 0)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    localJitter = call(pointProcess, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    localabsoluteJitter = call(pointProcess, "Get jitter (local, absolute)", 0, 0, 0.0001, 0.02, 1.3)
    rapJitter = call(pointProcess, "Get jitter (rap)", 0, 0, 0.0001, 0.02, 1.3)
    ppq5Jitter = call(pointProcess, "Get jitter (ppq5)", 0, 0, 0.0001, 0.02, 1.3)
    ddpJitter = call(pointProcess, "Get jitter (ddp)", 0, 0, 0.0001, 0.02, 1.3)
    localShimmer =  call([sound, pointProcess], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    localdbShimmer = call([sound, pointProcess], "Get shimmer (local_dB)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq3Shimmer = call([sound, pointProcess], "Get shimmer (apq3)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    aqpq5Shimmer = call([sound, pointProcess], "Get shimmer (apq5)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    apq11Shimmer =  call([sound, pointProcess], "Get shimmer (apq11)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    ddaShimmer = call([sound, pointProcess], "Get shimmer (dda)", 0, 0, 0.0001, 0.02, 1.3, 1.6)
    
    return duration, meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer

In [4]:
# This function measures formants using Formant Position formula
def measureFormants(sound, wave_file, f0min,f0max):
    sound = parselmouth.Sound(sound) # read the sound
    pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, 0.01, 0.35, 0.14, f0max)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    
    formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
    numPoints = call(pointProcess, "Get number of points")

    f1_list = []
    f2_list = []
    f3_list = []
    f4_list = []
    
    # Measure formants only at glottal pulses
    for point in range(0, numPoints):
        point += 1
        t = call(pointProcess, "Get time from index", point)
        f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
        f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
        f3 = call(formants, "Get value at time", 3, t, 'Hertz', 'Linear')
        f4 = call(formants, "Get value at time", 4, t, 'Hertz', 'Linear')
        f1_list.append(f1)
        f2_list.append(f2)
        f3_list.append(f3)
        f4_list.append(f4)
    
    f1_list = [f1 for f1 in f1_list if str(f1) != 'nan']
    f2_list = [f2 for f2 in f2_list if str(f2) != 'nan']
    f3_list = [f3 for f3 in f3_list if str(f3) != 'nan']
    f4_list = [f4 for f4 in f4_list if str(f4) != 'nan']
    
    # calculate mean formants across pulses
    f1_mean = statistics.mean(f1_list)
    f2_mean = statistics.mean(f2_list)
    f3_mean = statistics.mean(f3_list)
    f4_mean = statistics.mean(f4_list)
    
    # calculate median formants across pulses, this is what is used in all subsequent calcualtions
    # you can use mean if you want, just edit the code in the boxes below to replace median with mean
    f1_median = statistics.median(f1_list)
    f2_median = statistics.median(f2_list)
    f3_median = statistics.median(f3_list)
    f4_median = statistics.median(f4_list)
    
    return f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median


This function runs a 2-factor Principle Components Analysis (PCA) on Jitter and Shimmer


In [16]:

def runPCA(df):
    # z-score the Jitter and Shimmer measurements
    measures = ['localJitter', 'localabsoluteJitter', 'rapJitter', 'ppq5Jitter', 'ddpJitter',
                'localShimmer', 'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 'apq11Shimmer', 'ddaShimmer']
    x = df.loc[:, measures].values
    print(x)
    x = StandardScaler().fit_transform(x)
    # PCA
    pca = PCA(n_components=2)
    principalComponents = pca.fit_transform(x)
    pickle.dump(pca, open('models/pca_model.pkl', 'wb'))
    principalDf = pd.DataFrame(data = principalComponents, columns = ['JitterPCA', 'ShimmerPCA'])
    return principalDf

This block of code runs the above functions on all of the '.wav' files in the /audio folder


In [5]:

# create lists to put the results
file_list = []
duration_list = []
mean_F0_list = []
sd_F0_list = []
hnr_list = []
localJitter_list = []
localabsoluteJitter_list = []
rapJitter_list = []
ppq5Jitter_list = []
ddpJitter_list = []
localShimmer_list = []
localdbShimmer_list = []
apq3Shimmer_list = []
aqpq5Shimmer_list = []
apq11Shimmer_list = []
ddaShimmer_list = []
f1_mean_list = []
f2_mean_list = []
f3_mean_list = []
f4_mean_list = []
f1_median_list = []
f2_median_list = []
f3_median_list = []
f4_median_list = []



In [8]:
# Go through all the wave files in the folder and measure all the acoustics
for wave_file in glob.glob("data/Audio/*.wav"):
    print(f"Processing file {wave_file}")
    sound = parselmouth.Sound(wave_file)
    (duration, meanF0, stdevF0, hnr, localJitter, localabsoluteJitter, rapJitter, ppq5Jitter, ddpJitter, 
     localShimmer, localdbShimmer, apq3Shimmer, aqpq5Shimmer, apq11Shimmer, ddaShimmer) = measurePitch(
        sound, 75, 300, "Hertz")
    (f1_mean, f2_mean, f3_mean, f4_mean, f1_median, f2_median, f3_median, f4_median) = measureFormants(
        sound, wave_file, 75, 300)
    file_list.append(wave_file) # make an ID list
    duration_list.append(duration) # make duration list
    mean_F0_list.append(meanF0) # make a mean F0 list
    sd_F0_list.append(stdevF0) # make a sd F0 list
    hnr_list.append(hnr) #add HNR data
    
    # add raw jitter and shimmer measures
    localJitter_list.append(localJitter)
    localabsoluteJitter_list.append(localabsoluteJitter)
    rapJitter_list.append(rapJitter)
    ppq5Jitter_list.append(ppq5Jitter)
    ddpJitter_list.append(ddpJitter)
    localShimmer_list.append(localShimmer)
    localdbShimmer_list.append(localdbShimmer)
    apq3Shimmer_list.append(apq3Shimmer)
    aqpq5Shimmer_list.append(aqpq5Shimmer)
    apq11Shimmer_list.append(apq11Shimmer)
    ddaShimmer_list.append(ddaShimmer)
    
    # add the formant data
    f1_mean_list.append(f1_mean)
    f2_mean_list.append(f2_mean)
    f3_mean_list.append(f3_mean)
    f4_mean_list.append(f4_mean)
    f1_median_list.append(f1_median)
    f2_median_list.append(f2_median)
    f3_median_list.append(f3_median)
    f4_median_list.append(f4_median)

Processing file data/Audio/PP72.wav
Processing file data/Audio/PP70.wav
Processing file data/Audio/P10.wav
Processing file data/Audio/P25.wav
Processing file data/Audio/P47.wav
Processing file data/Audio/PP43.wav
Processing file data/Audio/PP24.wav
Processing file data/Audio/PP64.wav
Processing file data/Audio/PP13.wav
Processing file data/Audio/P64.wav
Processing file data/Audio/PP53.wav
Processing file data/Audio/PP77.wav
Processing file data/Audio/PP59.wav
Processing file data/Audio/PP60.wav
Processing file data/Audio/P12.wav
Processing file data/Audio/P83.wav
Processing file data/Audio/P1.wav
Processing file data/Audio/P61.wav
Processing file data/Audio/PP5.wav
Processing file data/Audio/P67.wav
Processing file data/Audio/PP89.wav
Processing file data/Audio/PP73.wav
Processing file data/Audio/P44.wav
Processing file data/Audio/P71.wav
Processing file data/Audio/PP15.wav
Processing file data/Audio/P72.wav
Processing file data/Audio/PP65.wav
Processing file data/Audio/PP37.wav
Proces

This block of code adds all of that data we just generated to a Pandas data frame



In [1]:

# Add the data to Pandas
df = pd.DataFrame(np.column_stack([file_list, duration_list, mean_F0_list, sd_F0_list, hnr_list, 
                                   localJitter_list, localabsoluteJitter_list, rapJitter_list, 
                                   ppq5Jitter_list, ddpJitter_list, localShimmer_list, 
                                   localdbShimmer_list, apq3Shimmer_list, aqpq5Shimmer_list, 
                                   apq11Shimmer_list, ddaShimmer_list, f1_mean_list, 
                                   f2_mean_list, f3_mean_list, f4_mean_list, 
                                   f1_median_list, f2_median_list, f3_median_list, 
                                   f4_median_list]),
                                   columns=['voiceID', 'duration', 'meanF0Hz', 'stdevF0Hz', 'HNR', 
                                            'localJitter', 'localabsoluteJitter', 'rapJitter', 
                                            'ppq5Jitter', 'ddpJitter', 'localShimmer', 
                                            'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
                                            'apq11Shimmer', 'ddaShimmer', 'f1_mean', 'f2_mean', 
                                            'f3_mean', 'f4_mean', 'f1_median', 
                                            'f2_median', 'f3_median', 'f4_median'])



NameError: name 'pd' is not defined

In [17]:
# df = pd.read_csv('pp_data/praat_features.csv')
# df = df[['voiceID', 'duration', 'meanF0Hz', 'stdevF0Hz', 'HNR', 
#                                             'localJitter', 'localabsoluteJitter', 'rapJitter', 
#                                             'ppq5Jitter', 'ddpJitter', 'localShimmer', 
#                                             'localdbShimmer', 'apq3Shimmer', 'apq5Shimmer', 
#                                             'apq11Shimmer', 'ddaShimmer', 'f1_mean', 'f2_mean', 
#                                             'f3_mean', 'f4_mean', 'f1_median', 
#                                             'f2_median', 'f3_median', 'f4_median']]

In [14]:
df.head()

Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,...,apq11Shimmer,ddaShimmer,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median
0,data/Audio/PP72.wav,348.936,207.001181,35.963224,11.362822,0.017873,8.6e-05,0.009466,0.010231,0.028397,...,0.158406,0.221373,567.187553,1677.305175,2605.826897,3798.854641,537.9948,1682.222813,2634.362501,3889.124583
1,data/Audio/PP70.wav,395.496,223.556389,40.975198,12.055452,0.017426,7.8e-05,0.009249,0.010249,0.027746,...,0.144471,0.202001,563.470554,1771.030775,2646.934228,3787.894565,524.207432,1784.035717,2672.958334,3897.344018
2,data/Audio/P10.wav,426.0,114.481739,26.141558,10.727324,0.017104,0.00015,0.00868,0.009942,0.02604,...,0.17186,0.204353,435.27641,1471.587486,2430.757947,3286.074487,422.740558,1461.087015,2432.324651,3233.344552
3,data/Audio/P25.wav,382.992,189.843925,59.738564,6.363512,0.030454,0.000159,0.015865,0.016503,0.047594,...,0.199536,0.255298,529.20239,1653.479034,2579.042035,3624.695193,497.86193,1631.144148,2623.216633,3662.797273
4,data/Audio/P47.wav,210.0,134.873856,36.376957,8.345053,0.017441,0.00013,0.008994,0.009639,0.026981,...,0.143249,0.242588,459.552242,1457.497049,2433.183331,3421.820345,434.99805,1443.451641,2420.409596,3405.182302


In [18]:
pcaData = runPCA(df) # Run jitter and shimmer PCA
df = pd.concat([df, pcaData], axis=1) # Add PCA data
# reload the data so it's all numbers
df.to_csv("processed_results.csv", index=False)
df = pd.read_csv('processed_results.csv', header=0)
df.sort_values('voiceID').head(20)

[[1.78733446e-02 8.63625646e-05 9.46571479e-03 ... 1.00186147e-01
  1.58406321e-01 2.21373079e-01]
 [1.74263280e-02 7.78887301e-05 9.24857027e-03 ... 9.22432114e-02
  1.44470958e-01 2.02001251e-01]
 [1.71041332e-02 1.49805971e-04 8.68012230e-03 ... 9.96810868e-02
  1.71860451e-01 2.04353178e-01]
 ...
 [1.57351455e-02 1.27224074e-04 7.90214134e-03 ... 9.81893267e-02
  1.61131401e-01 2.03133388e-01]
 [2.35761179e-02 1.83198561e-04 1.26545182e-02 ... 1.15307714e-01
  1.85625660e-01 2.45984247e-01]
 [2.69825042e-02 1.59979178e-04 1.43876009e-02 ... 1.26198153e-01
  2.13319315e-01 2.71286734e-01]]


Unnamed: 0,voiceID,duration,meanF0Hz,stdevF0Hz,HNR,localJitter,localabsoluteJitter,rapJitter,ppq5Jitter,ddpJitter,...,f1_mean,f2_mean,f3_mean,f4_mean,f1_median,f2_median,f3_median,f4_median,JitterPCA,ShimmerPCA
16,data/Audio/P1.wav,196.992,131.537716,26.486533,7.272724,0.022846,0.000174,0.012203,0.013365,0.036608,...,512.007052,1595.364075,2474.122424,3479.227742,497.039337,1606.457114,2497.217693,3493.359852,1.343619,-0.391921
2,data/Audio/P10.wav,426.0,114.481739,26.141558,10.727324,0.017104,0.00015,0.00868,0.009942,0.02604,...,435.27641,1471.587486,2430.757947,3286.074487,422.740558,1461.087015,2432.324651,3233.344552,-2.169249,-0.194072
35,data/Audio/P11.wav,271.992,187.218182,33.740344,9.663128,0.016613,8.9e-05,0.008864,0.009308,0.026593,...,570.71668,1691.548348,2629.5337,3756.141072,544.520348,1700.794941,2693.282896,3846.362806,-2.308809,-0.649668
14,data/Audio/P12.wav,204.984,184.71923,43.00665,11.951336,0.015756,8.5e-05,0.008078,0.008329,0.024234,...,553.466031,1500.138156,2454.89368,3511.267905,533.307504,1481.267748,2446.639662,3489.17779,-3.862268,0.097504
126,data/Audio/P13.wav,294.0,182.851936,41.577345,10.538509,0.022307,0.000122,0.011997,0.011932,0.035992,...,488.395889,1690.211383,2636.272989,3775.665557,464.790138,1740.770085,2656.798166,3831.363318,0.29213,-0.352857
59,data/Audio/P14.wav,246.0,190.062451,42.960465,9.946238,0.02086,0.00011,0.011046,0.011789,0.033137,...,543.202376,1663.696378,2631.252959,3726.273314,499.839587,1708.80598,2644.023735,3706.143916,-1.678878,0.773602
73,data/Audio/P15.wav,472.992,169.358208,37.866355,8.969828,0.019566,0.000115,0.010293,0.010716,0.03088,...,528.04401,1651.792915,2604.742924,3659.662773,496.540552,1633.616206,2631.183401,3707.866576,-1.615188,0.102637
52,data/Audio/P16.wav,490.008,214.466322,31.495178,10.544512,0.015887,7.4e-05,0.008345,0.009366,0.025036,...,559.90639,1694.283545,2711.277287,3676.468393,509.988253,1682.97932,2772.123842,3692.308487,-3.399962,-0.05788
61,data/Audio/P17.wav,270.0,183.554992,54.637379,9.304439,0.015676,8.5e-05,0.008278,0.008824,0.024833,...,535.372622,1622.341023,2666.474796,3678.732198,492.045604,1644.458158,2694.342063,3701.791338,-3.269884,-0.281538
120,data/Audio/P20.wav,234.0,200.408593,46.880651,15.674437,0.019877,9.9e-05,0.010826,0.011261,0.032479,...,505.462693,1643.881568,2693.313104,3737.380151,457.411973,1643.195073,2739.035614,3771.337988,-2.438169,1.01954



Next we calculate the vocal-tract length estimates
Formant position

In [12]:
df['pF'] = (zscore(df.f1_median) + zscore(df.f2_median) + zscore(df.f3_median) + zscore(df.f4_median)) / 4
df['fdisp'] = (df['f4_median'] - df['f1_median']) / 3

df['avgFormant'] = (df['f1_median'] + df['f2_median'] + df['f3_median'] + df['f4_median']) / 4

df['mff'] = (df['f1_median'] * df['f2_median'] * df['f3_median'] * df['f4_median']) ** 0.25
# reload the data again
df.to_csv("processed_results.csv", index=False)
df = pd.read_csv('processed_results.csv', header=0)

df['fitch_vtl'] = ((1 * (35000 / (4 * df['f1_median']))) +
                   (3 * (35000 / (4 * df['f2_median']))) + 
                   (5 * (35000 / (4 * df['f3_median']))) + 
                   (7 * (35000 / (4 * df['f4_median'])))) / 4
xysum = (0.5 * df['f1_median']) + (1.5 * df['f2_median']) + (2.5 * df['f3_median']) + (3.5 * df['f4_median'])
xsquaredsum = (0.5 ** 2) + (1.5 ** 2) + (2.5 ** 2) + (3.5 ** 2)
df['delta_f'] = xysum / xsquaredsum

df['vtl_delta_f'] = 35000 / (2 * df['delta_f'])
#Write out the final dataframe
df.to_csv("processed_results2.csv", index=False)


print("finished")


finished
