In [None]:
!pip install praat-parselmouth
import parselmouth
from parselmouth.praat import call


Collecting praat-parselmouth
  Downloading praat_parselmouth-0.4.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (10.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.7/10.7 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: praat-parselmouth
Successfully installed praat-parselmouth-0.4.3


In [None]:
import math
import numpy as np
import os
import pandas as pd
import torchaudio




"""## Acoustic Feature Methods"""

def syllable_nuclei(fileAddress):
    silencedb = -25
    mindip = 2
    minpause = 0.3
    sound = parselmouth.Sound(fileAddress)
    originaldur = sound.get_total_duration()
    intensity = sound.to_intensity(50)
    start = call(intensity, "Get time from frame number", 1)
    nframes = call(intensity, "Get number of frames")
    end = call(intensity, "Get time from frame number", nframes)
    min_intensity = call(intensity, "Get minimum", 0, 0, "Parabolic")
    max_intensity = call(intensity, "Get maximum", 0, 0, "Parabolic")

    # get .99 quantile to get maximum (without influence of non-speech sound bursts)
    max_99_intensity = call(intensity, "Get quantile", 0, 0, 0.99)

    # estimate Intensity threshold
    threshold = max_99_intensity + silencedb
    threshold2 = max_intensity - max_99_intensity
    threshold3 = silencedb - threshold2
    if threshold < min_intensity:
        threshold = min_intensity

    # get pauses (silences) and speakingtime
    textgrid = call(intensity, "To TextGrid (silences)", threshold3, minpause, 0.1, "silent", "sounding")
    #print(textgrid)
    silencetier =  call(textgrid, "Extract tier", 1)
    #print(silencetier)
    try:
      silencetable = call(silencetier, "Down to TableOfReal", "sounding")
    except:
       return {
        'speechrate(nsyll / dur)': 0,
        'Average Syllable Duration (speakingtot/voicedcount)': np.nan,
        "articulation rate(nsyll / phonationtime)": np.nan,
        "Speech-to-pause ratio": np.nan,
        "Mean Syllable Period": np.nan,
        "Variability of Syllable Period": np.nan,
        "Mean Pause Duration": np.nan,
        "Variability of Pause Duration": np.nan,
        "Time (secs)": originaldur,
        "# of pauses": 0
    }
    silencetable = call(silencetier, "Down to TableOfReal", "sounding")
    npauses = call(silencetable, "Get number of rows")
    speakingtot = 0
    for ipause in range(npauses):
        pause = ipause + 1
        beginsound = call(silencetable, "Get value", pause, 1)
        endsound = call(silencetable, "Get value", pause, 2)
        speakingdur = endsound - beginsound
        speakingtot += speakingdur

    intensity_matrix = call(intensity, "Down to Matrix")
    # sndintid = sound_from_intensity_matrix
    sound_from_intensity_matrix = call(intensity_matrix, "To Sound (slice)", 1)
    # use total duration, not end time, to find out duration of intdur (intensity_duration)
    # in order to allow nonzero starting times.
    intensity_duration = call(sound_from_intensity_matrix, "Get total duration")
    intensity_max = call(sound_from_intensity_matrix, "Get maximum", 0, 0, "Parabolic")
    point_process = call(sound_from_intensity_matrix, "To PointProcess (extrema)", "Left", "yes", "no", "Sinc70")
    # estimate peak positions (all peaks)
    numpeaks = call(point_process, "Get number of points")
    t = [call(point_process, "Get time from index", i + 1) for i in range(numpeaks)]

    # fill array with intensity values
    timepeaks = []
    peakcount = 0
    intensities = []
    for i in range(numpeaks):
        value = call(sound_from_intensity_matrix, "Get value at time", t[i], "Cubic")
        if value > threshold:
            peakcount += 1
            intensities.append(value)
            timepeaks.append(t[i])

    # fill array with valid peaks: only intensity values if preceding
    # dip in intensity is greater than mindip
    validpeakcount = 0

    if len(timepeaks) == 0:
      return {
        'speechrate(nsyll / dur)': 0,
        'Average Syllable Duration (speakingtot/voicedcount)': np.nan,
        "articulation rate(nsyll / phonationtime)": np.nan,
        "Speech-to-pause ratio": np.nan,
        "Mean Syllable Period": np.nan,
        "Variability of Syllable Period": np.nan,
        "Mean Pause Duration": np.nan,
        "Variability of Pause Duration": np.nan,
        "Time (secs)": originaldur,
        "# of pauses": npauses
    }
    currenttime = timepeaks[0]
    currentint = intensities[0]
    validtime = []

    for p in range(peakcount - 1):
        following = p + 1
        followingtime = timepeaks[p + 1]
        dip = call(intensity, "Get minimum", currenttime, timepeaks[p + 1], "None")
        diffint = abs(currentint - dip)
        if diffint > mindip:
            validpeakcount += 1
            validtime.append(timepeaks[p])
        currenttime = timepeaks[following]
        currentint = call(intensity, "Get value at time", timepeaks[following], "Cubic")

    # Look for only voiced parts
    pitch = sound.to_pitch_ac(0.02, 30, 4, False, 0.03, 0.25, 0.01, 0.35, 0.25, 450)
    voicedcount = 0
    voicedpeak = []

    for time in range(validpeakcount):
        querytime = validtime[time]
        whichinterval = call(textgrid, "Get interval at time", 1, querytime)
        whichlabel = call(textgrid, "Get label of interval", 1, whichinterval)
        value = pitch.get_value_at_time(querytime)
        if not math.isnan(value):
            if whichlabel == "sounding":
                voicedcount += 1
                voicedpeak.append(validtime[time])

    # calculate time correction due to shift in time for Sound object versus
    # intensity object
    timecorrection = originaldur / intensity_duration

    # Insert voiced peaks in TextGrid
    call(textgrid, "Insert point tier", 1, "syllables")
    for i in range(len(voicedpeak)):
        position = (voicedpeak[i] * timecorrection)
        call(textgrid, "Insert point", 1, position, "")


    mean_syllable_period = originaldur / voicedcount if voicedcount != 0 else 0
    variability_syllable_period = np.std([time * timecorrection for time in voicedpeak]) if voicedcount > 1 else 0
    mean_pause_duration = (originaldur - speakingtot) / npauses if npauses != 0 else 0
    variability_pause_duration = np.std([call(silencetable, "Get value", i + 1, 2) - call(silencetable, "Get value", i + 1, 1) for i in range(npauses)]) if npauses > 1 else 0


    # return results
    speakingrate = voicedcount / originaldur
    articulationrate = voicedcount / speakingtot
    npause = npauses - 1
    asd = np.nan
    if voicedcount != 0:
      asd = speakingtot / voicedcount
    speech_to_pause_ratio = np.nan
    if originaldur - speakingtot != 0:
      speech_to_pause_ratio = speakingtot/ (originaldur - speakingtot)
    #speechrate_dictionary = {'speechrate(nsyll / dur)': speakingrate,
    #"articulation rate(nsyll / phonationtime)":articulationrate,
    #"Speech-to-pause ratio":speech_to_pause_ratio}

    speechrate_dictionary = {
        'speechrate(nsyll / dur)': speakingrate,
        'Average Syllable Duration (speakingtot/voicedcount)': asd,
        "articulation rate(nsyll / phonationtime)": articulationrate,
        "Speech-to-pause ratio": speech_to_pause_ratio,
        "Mean Syllable Period": mean_syllable_period,
        "Variability of Syllable Period": variability_syllable_period,
        "Mean Pause Duration": mean_pause_duration,
        "Variability of Pause Duration": variability_pause_duration,
        "Time (secs)": originaldur,
        "# of pauses": npauses
    }
    return speechrate_dictionary


finalAcousticFeatSet = ['Speech-to-pause ratio',
'articulation rate(nsyll / phonationtime)','speechrate(nsyll / dur)',
'Average Syllable Duration (speakingtot/voicedcount)',"Mean Syllable Period", "Variability of Syllable Period",
"Mean Pause Duration", "Variability of Pause Duration"]




def extractAndOutputAcousticFeatures(fileAddress):
  syllable_nuclei_dictionary = syllable_nuclei(fileAddress)
  import pandas as pd
  allAcousticFeatures_df = pd.DataFrame()
  #adding syllable nuceli features to df
  for key in list(syllable_nuclei_dictionary.keys()):
    allAcousticFeatures_df[key] = syllable_nuclei_dictionary[key]
  allAcousticFeatures_df = allAcousticFeatures_df[finalAcousticFeatSet]
  #allAcousticFeatures_df.insert(0,'Sample_ID',fileName)
  return allAcousticFeatures_df



In [None]:
!pip install pydub


Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
from pydub import AudioSegment
import os

def split_and_save_audio_segments(audio_file_path, output_folder,numSecondsPerSegment):
    # Load the audio file
    audio = AudioSegment.from_file(audio_file_path)

    # Get the duration of the audio in milliseconds
    duration = len(audio)

    # Set the segment length in milliseconds
    segment_length = numSecondsPerSegment * 1000

    # Calculate the number of segments
    num_segments = duration // segment_length
    print(num_segments)

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Split the audio into two-second segments and save each segment as a separate file
    for i in range(num_segments): #
        start_time = i * segment_length
        end_time = (i + 1) * segment_length

        # Extract the segment
        segment = audio[start_time:end_time]

        # Create the output file path
        output_file_path = os.path.join(output_folder, f'segment_{i + 1}.wav')

        # Export the segment as a separate audio file
        segment.export(output_file_path, format='wav')
    return num_segments

In [None]:
!rm -r Sept9TwoSecondSegments

In [None]:
split_and_save_audio_segments('aDBS012_2022-09-09_audio_amplitude.wav.wav', 'Sept9TwoSecondSegments',2)

568


568

In [None]:
1136//2

568

In [None]:
split_and_save_audio_segments('aDBS012_2022-09-09_audio_amplitude.wav.wav', 'Sept9OneSecondSegments',1)

1136

In [None]:
def extractSecondLevelPraatFeatures(numSegments, secondSegmentFolder,numSecondsPerSegment):
    eachSecondAcousticFeats = []
    for i in range(numSegments):
        file_address = os.path.join(secondSegmentFolder, 'segment_' + str((i+1)) + '.wav')
        features = syllable_nuclei(file_address)
        eachSecondAcousticFeats.append(features)
    outputDF = pd.DataFrame(eachSecondAcousticFeats)
    return outputDF


In [None]:
def splitAndExtractSecondLevelPraatFeatures(audio_file_path, secondSegmentFolder,numSecondsPerSegment):
  numSegments = split_and_save_audio_segments(audio_file_path, secondSegmentFolder,numSecondsPerSegment)
  return extractSecondLevelPraatFeatures(numSegments, secondSegmentFolder,numSecondsPerSegment)



In [None]:
!rm -r Sept9TwoSecondSegments/

In [None]:
sept9DF = splitAndExtractSecondLevelPraatFeatures('noModeratorSpeech_aDBS012_2022-09-09_audio_amplitude.wav.wav', 'Sept9TwoSecondSegments',2)
sept9DF.head()

568


  textgrid = call(intensity, "To TextGrid (silences)", threshold3, minpause, 0.1, "silent", "sounding")


Unnamed: 0,speechrate(nsyll / dur),Average Syllable Duration (speakingtot/voicedcount),articulation rate(nsyll / phonationtime),Speech-to-pause ratio,Mean Syllable Period,Variability of Syllable Period,Mean Pause Duration,Variability of Pause Duration,Time (secs),# of pauses
0,0.0,,,,,,,,2.0,1
1,0.0,,,,,,,,2.0,1
2,0.0,,,,,,,,2.0,1
3,0.0,,,,,,,,2.0,1
4,0.0,,,,,,,,2.0,1


In [None]:
sept9DF.to_csv('noModeartorSpeech_Sept9PerTwoSecondPraatFeatures.csv')

In [None]:
sept19DF = splitAndExtractSecondLevelPraatFeatures('noModeratorSpeech_aDBS012_2022-09-19_audio_amplitude.wav.wav', 'Sept19TwoSecondSegments',2)
sept19DF.head()

564


  textgrid = call(intensity, "To TextGrid (silences)", threshold3, minpause, 0.1, "silent", "sounding")


Unnamed: 0,speechrate(nsyll / dur),Average Syllable Duration (speakingtot/voicedcount),articulation rate(nsyll / phonationtime),Speech-to-pause ratio,Mean Syllable Period,Variability of Syllable Period,Mean Pause Duration,Variability of Pause Duration,Time (secs),# of pauses
0,0.0,,,,,,,,2.0,1
1,0.0,,,,,,,,2.0,1
2,0.0,,,,,,,,2.0,1
3,1.5,0.397333,2.516779,1.475248,0.666667,0.242323,0.808,0.0,2.0,1
4,0.0,,,,,,,,2.0,1


In [None]:
sept19DF.to_csv('noModeratorSpeech_Sept19PerTwoSecondPraatFeatures.csv')

In [None]:
nov15DF = splitAndExtractSecondLevelPraatFeatures('noModeratorSpeech_aDBS012_2022-11-15_audio_amplitude.wav.wav', 'Nov15TwoSecondSegments',2)
nov15DF.head()

550


  textgrid = call(intensity, "To TextGrid (silences)", threshold3, minpause, 0.1, "silent", "sounding")


Unnamed: 0,speechrate(nsyll / dur),Average Syllable Duration (speakingtot/voicedcount),articulation rate(nsyll / phonationtime),Speech-to-pause ratio,Mean Syllable Period,Variability of Syllable Period,Mean Pause Duration,Variability of Pause Duration,Time (secs),# of pauses
0,0.0,,,,,,,,2.0,1
1,0.0,,,,,,,,2.0,1
2,0.0,,,,,,,,2.0,1
3,1.5,0.445333,2.245509,2.012048,0.666667,0.274689,0.664,0.0,2.0,1
4,3.0,0.230667,4.33526,2.246753,0.333333,0.254371,0.616,0.0,2.0,1


In [None]:
nov15DF.to_csv('noModeratorSpeech_Nov15PerTwoSecondPraatFeatures.csv')

In [None]:
oct4DF = splitAndExtractSecondLevelPraatFeatures('noModeratorSpeech_aDBS012_2022_10_04_audio_amplitude.wav.wav', 'Oct4TwoSecondSegments',2)
oct4DF.head()

553


  textgrid = call(intensity, "To TextGrid (silences)", threshold3, minpause, 0.1, "silent", "sounding")


Unnamed: 0,speechrate(nsyll / dur),Average Syllable Duration (speakingtot/voicedcount),articulation rate(nsyll / phonationtime),Speech-to-pause ratio,Mean Syllable Period,Variability of Syllable Period,Mean Pause Duration,Variability of Pause Duration,Time (secs),# of pauses
0,0.0,,,,,,,,2.0,1
1,0.0,,,,,,,,2.0,1
2,0.0,,,,,,,,2.0,1
3,0.0,,,,,,,,2.0,1
4,1.5,0.224,4.464286,0.506024,0.666667,0.123033,1.328,0.0,2.0,1


In [None]:
oct4DF.to_csv('noModeratorSpeech_Oct4PerTwoSecondPraatFeatures.csv')

In [None]:
feb27ExperimentOneDF = splitAndExtractSecondLevelPraatFeatures('noModeratorSpeech_aDBS012_2023-02-27_Experiment1_audio_amplitude.wav', 'Feb23ExperimentOneSecondSegments',2)
feb27ExperimentOneDF.head()

190


  textgrid = call(intensity, "To TextGrid (silences)", threshold3, minpause, 0.1, "silent", "sounding")


Unnamed: 0,speechrate(nsyll / dur),Average Syllable Duration (speakingtot/voicedcount),articulation rate(nsyll / phonationtime),Speech-to-pause ratio,Mean Syllable Period,Variability of Syllable Period,Mean Pause Duration,Variability of Pause Duration,Time (secs),# of pauses
0,0.0,,,,,,,,2.0,1
1,0.0,,,,,,,,2.0,1
2,0.0,,,,,,,,2.0,1
3,0.0,,,,,,,,2.0,1
4,0.0,,,,,,,,2.0,1


In [None]:
feb27ExperimentOneDF.to_csv('Feb27ExperimentOnePerTwoSecondPraatFeatures.csv')

In [None]:
feb27ExperimentFiveDF = splitAndExtractSecondLevelPraatFeatures('noModeratorSpeech_aDBS012_2023-02-27_Experiment5_audio_amplitude.wav', 'Feb23ExperimentFiveTwoSecondSegments',2)
feb27ExperimentFiveDF.head()

182


  textgrid = call(intensity, "To TextGrid (silences)", threshold3, minpause, 0.1, "silent", "sounding")


Unnamed: 0,speechrate(nsyll / dur),Average Syllable Duration (speakingtot/voicedcount),articulation rate(nsyll / phonationtime),Speech-to-pause ratio,Mean Syllable Period,Variability of Syllable Period,Mean Pause Duration,Variability of Pause Duration,Time (secs),# of pauses
0,0.0,,,,,,,,2.0,0
1,2.0,0.5,2.0,,0.5,0.393564,0.0,0.0,2.0,1
2,1.5,0.56,1.785714,5.25,0.666667,0.331196,0.16,0.32,2.0,2
3,0.0,,0.0,0.612903,0.0,0.0,1.24,0.0,2.0,1
4,2.5,0.4,2.5,,0.4,0.479806,0.0,0.0,2.0,1


In [None]:
feb27ExperimentFiveDF.to_csv('Feb27ExperimentFivePerTwoSecondPraatFeatures.csv')