In [3]:
import parselmouth
import glob
import os
import statistics
import numpy as np
import pandas as pd
from parselmouth.praat import call
from typing import Dict

In [61]:
# Returned sorted file paths
def getFilePaths(path):
    files = glob.glob(path)
    files.sort(key = sortKey)
    return files

def sortKey(path):
    parts = path.split('_')
    utterance = int(parts[7][1:])
    return utterance

# Get f0min and f0max
def getPitchRange(files):
    pitches = []
    for file in files:
        try:
            sound = parselmouth.Sound(file)
        except praat.ValueError:
            print("Skipping: " + file + ", not a viable sound file")
            continue
        pitch = call(sound, "To Pitch", 0.0, 80, 450) #create a praat pitch object
        meanF0 = call(pitch, "Get mean", 0, 0, "Hertz") # get mean pitch for each utterance
        pitches.append(meanF0)
    f0min = min(pitches)
    f0max = max(pitches)
    return f0min, f0max 

# Get formants (from PraatScripts by https://github.com/drfeinberg/PraatScripts/, modified to remove median)
def measureFormants(file, f0min, f0max):
    sound = parselmouth.Sound(file) # read the sound
    pitch = call(sound, "To Pitch (cc)", 0, f0min, 15, 'no', 0.03, 0.45, 0.01, 0.35, 0.14, f0max)
    pointProcess = call(sound, "To PointProcess (periodic, cc)", f0min, f0max)
    
    formants = call(sound, "To Formant (burg)", 0.0025, 5, 5000, 0.025, 50)
    numPoints = call(pointProcess, "Get number of points")

    f1_list = []
    f2_list = []
    f3_list = []
    f4_list = []
    
    # Measure formants only at glottal pulses
    for point in range(0, numPoints):
        point += 1
        t = call(pointProcess, "Get time from index", point)
        f1 = call(formants, "Get value at time", 1, t, 'Hertz', 'Linear')
        f2 = call(formants, "Get value at time", 2, t, 'Hertz', 'Linear')
        f3 = call(formants, "Get value at time", 3, t, 'Hertz', 'Linear')
        f4 = call(formants, "Get value at time", 4, t, 'Hertz', 'Linear')
        f1_list.append(f1)
        f2_list.append(f2)
        f3_list.append(f3)
        f4_list.append(f4)
    
    f1_list = [f1 for f1 in f1_list if str(f1) != 'nan']
    f2_list = [f2 for f2 in f2_list if str(f2) != 'nan']
    f3_list = [f3 for f3 in f3_list if str(f3) != 'nan']
    f4_list = [f4 for f4 in f4_list if str(f4) != 'nan']
    
    # calculate mean formants across pulses
    try:
        f1_mean = statistics.mean(f1_list)
        f2_mean = statistics.mean(f2_list)
        f3_mean = statistics.mean(f3_list)
        f4_mean = statistics.mean(f4_list)
    except statistics.StatisticsError:
        print("Skipping: " + file + ", no formants extracted")
        return None
    
    return f1_mean, f2_mean, f3_mean, f4_mean

# (F1_actual + F1_feedback)/F1_actual
def getPerturbationValues(actual, feedback, f0min, f0max):
    perturbation_values: Dict[str, float] = {
        "f1": [],
        "f2": [],
        "f3": [],
        "f4": [],
    }
    
    first_trial = True
    
    for i in range(1, len(actual)):
        print("Fetching formants from " + actual[i])
        
        # Get formants
        formants = measureFormants(actual[i], f0min, f0max)
        if formants == None:
            continue
        else:
            (f1_actual, f2_actual, f3_actual, f4_actual) = measureFormants(actual[i], f0min, f0max)
            (f1_feedback, f2_feedback, f3_feedback, f4_feedback) = measureFormants(feedback[i], f0min, f0max)
            f1_perturb = (f1_actual + f1_feedback)/f1_actual
            f2_perturb = (f2_actual + f2_feedback)/f2_actual
            f3_perturb = (f3_actual + f3_feedback)/f3_actual
            f4_perturb = (f4_actual + f4_feedback)/f4_actual
        
        perturbation_values["f1"].append(f1_perturb)
        perturbation_values["f2"].append(f2_perturb)
        perturbation_values["f3"].append(f3_perturb)
        perturbation_values["f4"].append(f4_perturb)
        
    perturbation_values = pd.DataFrame(perturbation_values)
    return perturbation_values

# # (F1_actual + F1_feedback)/F1_actual
# def getPerturbationValue(actual, feedback, f0min, f0max):
#     perturbation_values: Dict[str, float] = {
#         "f1": [],
#         "f2": [],
#         "f3": [],
#         "f4": [],
#     }
    
#     first_trial = True
    
#     for i in len(files):
#         print("Fetching formants from " + file)
        
#         # Get formants
#         try: 
#             (f1_actual, f2, f3, f4) = measureFormants(file, f0min, f0max)
#         except TypeError:
#             continue
        
#         # Save baseline formant values
#         if first_trial == True:
#             (f1_baseline, f2_baseline, f3_baseline, f4_baseline) = (f1, f2, f3, f4)
#             first_trial = False
        
#         # Compute percentage change
#         f1_change = (f1 - f1_baseline)/f1_baseline
#         f2_change = (f2 - f2_baseline)/f2_baseline
#         f3_change = (f3 - f3_baseline)/f3_baseline
#         f4_change = (f4 - f4_baseline)/f4_baseline
        
#         perturbation_values["f1"].append(f1_change)
#         perturbation_values["f2"].append(f2_change)
#         perturbation_values["f3"].append(f3_change)
#         perturbation_values["f4"].append(f4_change)
        
#     perturbation_values = pd.DataFrame(perturbation_values)
#     return perturbation_values

# Get mean pitch (value of "O") (col 2)
# def getPerturbationValue(files, f0min, f0max):
#     perturbation_values: Dict[str, float] = {
#         "f1": [],
#         "f2": [],
#         "f3": [],
#         "f4": [],
#     }
    
#     first_trial = True
    
#     for file in files:
#         print("Fetching formants from " + file)
        
#         # Get formants
#         try: 
#             (f1, f2, f3, f4) = measureFormants(file, f0min, f0max)
#         except TypeError:
#             continue
        
#         # Save baseline formant values
#         if first_trial == True:
#             (f1_baseline, f2_baseline, f3_baseline, f4_baseline) = (f1, f2, f3, f4)
#             first_trial = False
        
#         # Compute percentage change
#         f1_change = (f1 - f1_baseline)/f1_baseline
#         f2_change = (f2 - f2_baseline)/f2_baseline
#         f3_change = (f3 - f3_baseline)/f3_baseline
#         f4_change = (f4 - f4_baseline)/f4_baseline
        
#         perturbation_values["f1"].append(f1_change)
#         perturbation_values["f2"].append(f2_change)
#         perturbation_values["f3"].append(f3_change)
#         perturbation_values["f4"].append(f4_change)
        
#     perturbation_values = pd.DataFrame(perturbation_values)
#     return perturbation_values

In [62]:
# Get f0min and f0max of subject
pre_files = glob.glob("data/pre/*_O_*.wav")
(f0min, f0max) = getPitchRange(pre_files)

# Get perturbation values
actual = getFilePaths("data/ramp_up/*_O_*.wav")
feedback = getFilePaths("data/ramp_up/*_I_*.wav")
perturbation_values = getPerturbationValues(actual, feedback, f0min, f0max)

# Get actual uttered formants

Fetching formants from data/ramp_up/S19_FIH_P5_R2_T1_F2444_U28_HECK_O_BSUBJECT19_20170130_230_[3].wav
Fetching formants from data/ramp_up/S19_FIH_P5_R3_T1_F2445_U29_HECK_O_BSUBJECT19_20170130_230_[3].wav
Fetching formants from data/ramp_up/S19_FIH_P5_R4_T1_F2446_U30_HECK_O_BSUBJECT19_20170130_230_[3].wav
Fetching formants from data/ramp_up/S19_FIH_P5_R5_T1_F2447_U31_HECK_O_BSUBJECT19_20170130_230_[3].wav
Fetching formants from data/ramp_up/S19_FIH_P5_R6_T1_F2448_U32_HECK_O_BSUBJECT19_20170130_230_[3].wav
Fetching formants from data/ramp_up/S19_FIH_P5_R7_T1_F2449_U33_HECK_O_BSUBJECT19_20170130_230_[3].wav
Fetching formants from data/ramp_up/S19_FIH_P5_R8_T1_F2450_U34_HECK_O_BSUBJECT19_20170130_230_[3].wav
Fetching formants from data/ramp_up/S19_FIH_P5_R9_T1_F2451_U35_HECK_O_BSUBJECT19_20170130_230_[3].wav
Fetching formants from data/ramp_up/S19_FIH_P5_R10_T1_F2452_U36_HECK_O_BSUBJECT19_20170130_230_[3].wav
Fetching formants from data/ramp_up/S19_FIH_P5_R11_T1_F2453_U37_HECK_O_BSUBJECT19