## Extract words for speech perception stimuli

This script reads Praat textgrid files and extracts audio clips of selected words.

It uses the python wrapper for sox, and Ronald Sprouse's audiolabel library and dir2df function.

The script also converts the audio to 22050 sampling rate, one channel,
and normalizes the amplitude so the peak amplitude is 2dB below max

    tfm.convert(samplerate=22050, n_channels=1)
    tfm.norm(-2)
 

In [None]:
from pathlib import Path
from sox import Transformer
from audiolabel import read_label   # Ronald Sprouse's audiolabel library - to read TextGrid files
from phonlab.utils import dir2df    # Ronald Sprouse's dir2df - to make a dataframe from a directory

## Context

Set some context variables

In [None]:
target_words = ["PEOPLE","TOLD"]   # list the words that we want to extract (if not all of them)
pad = 0.01  # pad the duration taken (before and after), in seconds

word_tier = "Word"  # the name of the word tier in the textgrids

# where to find the text grids and wav files
tgdir = Path('/Users/kjohnson/Downloads/Sound files and text grids')
wavdir = Path('/Users/kjohnson/Downloads/Sound files and text grids')
outdir = Path('/Users/kjohnson/Downloads/Sound files and text grids/words_for_perception_test')

In [None]:
# get a list of all of the text grids in this corpus

tgdf = dir2df(tgdir, fnpat='\.TextGrid$', addcols=['barename'])
print(f'Found {len(tgdf)} .TextGrid files.')

tgdf.head()

## Working on things

Debugging and making sure that this works okay on one example textgrid file
before letting the thing loose on all of the textgrids in the corpus

In [None]:
row = tgdf.iloc[0]  # an example file

wddf = read_label(str(tgdir / row.relpath / row.fname), ftype='praat',tiers=word_tier)[0]
input_audio = str(wavdir / row.relpath / row.barename)+".wav"

word_list = {}  # a dictionary to count instances of words

print(input_audio)

# two versions of this loop

for index, word_label in wddf[wddf[word_tier]!=''].iterrows():  # extract all words
#for index, word_row in wddf[wddf[word_tier].isin(target_words)].iterrows():  # extract target words

    word = word_label[word_tier]
        
    try:
        word_list[word] += 1   # if there are more than one instance of word [word] they will be numbered
    except:
        word_list[word] = 0
    
    start = word_label.t1 - pad
    end = word_label.t2 + pad
    output_wav = '_'.join((row.barename,word,str(word_list[word])))+".wav"
    output_mp3 = '_'.join((row.barename,word,str(word_list[word])))+".mp3"

    print(str(outdir / output_wav), start, end)
    
    # use sox to clip out the word
    tfm = Transformer()  # create a transformer object
    
    tfm.trim(start,end)   # set the parameters of the transformer
    tfm.convert(samplerate=22050, n_channels=1)
    tfm.norm(-2)
    
    #tfm.build(input_audio,str(outdir / output_wav))  
    #tfm.build(input_audio,str(outdir / output_mp3))
    
wddf.head()
word_row


## Process all files.

Now we can loop over all of the files in the corpus, extracting our target words.


In [None]:
for row in tgdf.itertuples():   # loop through all of the textgrids

    wddf = read_label(str(tgdir / row.relpath / row.fname), ftype='praat',tiers=word_tier)[0]
    input_audio = str(wavdir / row.relpath / row.barename)+".wav"
    
    word_list = {}  # a dictionary to count instances of words

    print(input_audio)
    
    # two versions of this loop

    for index, word_label in wddf[wddf[word_tier]!=''].iterrows():  # extract all words
    #for index, word_row in wddf[wddf[word_tier].isin(target_words)].iterrows():  # extract target words

        word = word_label[word_tier]
        
        try:
            word_list[word] += 1   # if there are more than one instance of word [word] they will be numbered
        except:
            word_list[word] = 0
    
        start = word_label.t1 - pad
        end = word_label.t2 + pad
        output_wav = '_'.join((row.barename,word,str(word_list[word])))+".wav"
        output_mp3 = '_'.join((row.barename,word,str(word_list[word])))+".mp3"

        #print(str(outdir / output_wav), start, end)
    
        # use sox to clip out the word
        tfm = Transformer()  # create a transformer object
        tfm.trim(start,end)   # set the parameters of the transformer
        tfm.convert(samplerate=22050, n_channels=1)
        tfm.norm(-2)
        tfm.build(input_audio,str(outdir / output_wav))  
        tfm.build(input_audio,str(outdir / output_mp3))
    