# calculate vocal tract lengths, deltaf, and formant ratios
Meg Cychosz & Keith Johnson

Note that this script is now sewn into 2b_vtl_results.Rmd

In [None]:
import os, sys, fnmatch
import re
import pandas as pd
from sys import argv
import numpy as np
#import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# read in data
formants = pd.read_csv('/Users/Meg/Box Sync/Dissertation/Experiment_1/analysis/analysis_scripts/cleaned_vowels.csv') # all formants from all speakers

In [None]:
def get_vtl(s):
    deltaf = (np.mean(s.f1_midpt_med)/0.5 + 
              np.mean(s.f2_midpt_med)/1.5 + 
              np.mean(s.f3_midpt_med)/2.5 + 
              np.mean(s.f4_midpt_med)/3.5)/4
    vtl = 34000/(2*deltaf)

    # Here is the Lammert & Narayanan formula:  229 + 0.03F1 + 0.082*F2/3 + 0.124*F3/5 + 0.354*F4/7
    #phi = 229 + 0.030*np.mean(d.f1) + 0.02733*np.mean(d.f2) + 0.0248*np.mean(d.f3) + 0.05057*np.mean(d.f4)
    #vtl = 34000/(4*phi)
    #deltaf = phi*2
    
    return deltaf,vtl

### perform calculation for adults where we have F1-F4

In [None]:
adults = formants.loc[(formants.age_yrs=='adult')]

slist = []
deltaf_list = []
vtl_list = []

for speaker in adults.spkr.unique():
            
    sub_df = adults.loc[(adults.spkr==speaker)]  # select speaker
    
    get_vtl(sub_df) # where sub_df is the data for an individual speaker
    
    slist.append(speaker)
    deltaf_list.append(get_vtl(sub_df)[0])
    vtl_list.append(get_vtl(sub_df)[1])
    
# put lists together
adult_calculations = pd.DataFrame(
    {'spkr': slist, 
     'deltaf': deltaf_list, 
     'vtl': vtl_list
    })

# merge calculation results with original data
adult_results = pd.merge(adults, adult_calculations, on='spkr')
adult_results.head()

### Now do the same for children, but just using F1-F3

In [None]:
def get_vtl_child(s):
    deltaf = (np.mean(s.f1_midpt_med)/0.5 + np.mean(s.f2_midpt_med)/1.5 + np.mean(s.f3_midpt_med)/2.5)/3
    vtl = 34000/(2*deltaf)

    # Here is the Lammert & Narayanan formula:  229 + 0.03F1 + 0.082*F2/3 + 0.124*F3/5 + 0.354*F4/7
    #phi = 229 + 0.030*np.mean(d.f1) + 0.02733*np.mean(d.f2) + 0.0248*np.mean(d.f3) + 0.05057*np.mean(d.f4)
    #vtl = 34000/(4*phi)
    #deltaf = phi*2
    
    return deltaf,vtl

In [None]:
children = formants.loc[(formants.age_yrs!='adult')]

cslist = []
cdeltaf_list = []
cvtl_list = []

for speaker in children.spkr.unique():
            
    sub_df = children.loc[(children.spkr==speaker)]  # select speaker
    
    get_vtl_child(sub_df) # where sub_df is the data for an individual speaker
        
    cslist.append(speaker)
    cdeltaf_list.append(get_vtl_child(sub_df)[0]) # and append speaker at the same time
    cvtl_list.append(get_vtl_child(sub_df)[1])

    
# put lists together
child_calculations = pd.DataFrame(
    {'spkr': cslist, 
     'deltaf': cdeltaf_list, 
     'vtl': cvtl_list
    })

# merge calculation results with original data
child_results = pd.merge(children, child_calculations, on='spkr')

# Measure cavity ratios and pharynx length

In [None]:
### First define a function to calculate the ratios of back cavity length
# to front cavity length
# we also measure F2 of [i] which is a standing wave in the pharynx
# and thus an indicator of pharynx length (in reality this measure of 
# pharynx length didn't work very well)

def get_bcl(s): 
    a_f1f2_ratio = (np.mean(s.f1_midpt_med[(s.Phone=='a')])) / (np.mean(s.f2_midpt_med[(s.Phone=='a')])) 
    a_f2f3_ratio = (np.mean(s.f2_midpt_med[(s.Phone=='a')])) / (np.mean(s.f3_midpt_med[(s.Phone=='a')]))
    i_f1f2_ratio = (np.mean(s.f1_midpt_med[(s.Phone=='i')])) / (np.mean(s.f2_midpt_med[(s.Phone=='i')]))
    i_f2f3_ratio = (np.mean(s.f2_midpt_med[(s.Phone=='i')])) / (np.mean(s.f3_midpt_med[(s.Phone=='i')]))
    u_f2f3_ratio = (np.mean(s.f2_midpt_med[(s.Phone=='u')])) / (np.mean(s.f3_midpt_med[(s.Phone=='u')]))
    u_f1f3_ratio = (np.mean(s.f1_midpt_med[(s.Phone=='u')])) / (np.mean(s.f3_midpt_med[(s.Phone=='u')])) 
    return  a_f1f2_ratio,a_f2f3_ratio,i_f1f2_ratio,i_f2f3_ratio,u_f2f3_ratio,u_f1f3_ratio 
def get_pl(s):
    phar_length = (3*34000) / (np.mean(s.f2_midpt_med)*4)
    return phar_length

In [None]:
# now calculate the ratios between formants for each speaker
# later, we'll use the ratios, by phone, to predict who is more child-like,
# and who is more adult-like, in their articulatory strategies

bcslist = [] # speaker list
a_f1f2_list = []
a_f2f3_list = []
i_f1f2_list = []
i_f2f3_list = []
u_f2f3_list = []
u_f1f3_list = []


for speaker in formants.spkr.unique():
            
    sub_df = formants.loc[(formants.spkr==speaker)]  # select speaker
    
    get_bcl(sub_df) # where sub_df is the data for an individual speaker
        
    bcslist.append(speaker)
    a_f1f2_list.append(get_bcl(sub_df)[0])
    a_f2f3_list.append(get_bcl(sub_df)[1])
    i_f1f2_list.append(get_bcl(sub_df)[2])
    i_f2f3_list.append(get_bcl(sub_df)[3])
    u_f2f3_list.append(get_bcl(sub_df)[4])
    u_f1f3_list.append(get_bcl(sub_df)[5])

# put lists together
bcl_calculations = pd.DataFrame(
    {'spkr': bcslist, 
     'a_ratiof1f2': a_f1f2_list,
     'a_ratiof2f3': a_f2f3_list,
     'i_ratiof1f2': i_f1f2_list,
     'i_ratiof2f3': i_f2f3_list,
     'u_ratiof2f3': u_f2f3_list,
     'u_ratiof1f3': u_f1f3_list
    })

In [None]:
# now calculate length of pharynx using F2 of [i]
# pharyngeal cavity should grow with age, and 
# the ratio of pharynx:vtl should get larger with age
# as pharynx grows faster than oral cavity

ees = formants.loc[(formants.Phone=='i')]

plslist = []
pl_list = []

for speaker in ees.spkr.unique():
            
    sub_df = ees.loc[(ees.spkr==speaker)]  # select speaker
    
    get_pl(sub_df) # where sub_df is the data for an individual speaker
        
    plslist.append(speaker)
    pl_list.append(get_pl(sub_df)) # and append speaker at the same time

# put lists together
pl_calculations = pd.DataFrame(
    {'spkr': plslist, 
     'phar_length': pl_list 
    })

# and combine with cavity ratio results
cavity_results = pd.merge(pl_calculations, bcl_calculations, on='spkr')

In [None]:
# combine adult and child vtl results
almost_final_results = pd.concat([child_results, adult_results])

# combine those results with bcl results
final_results = pd.merge(almost_final_results, cavity_results, on='spkr')

In [None]:
# spit out measurements
final_results.to_csv('/Users/Meg/Box Sync/Dissertation/Experiment_1/analysis/analysis_scripts/formants_vtlength.csv', index=False, header=True)

In [None]:
final_results.head()