In [1]:
import os, sys
import numpy as np

In [23]:
def gentle_fill_missing_words(alignment_fn):
    '''
    A simple way to fill missing aligned words
    '''
    
    # load the alignment file
    with open(alignment_fn, encoding='utf-8') as f:
        content = json.load(f)
        
    for ix, word in enumerate(content['words']):
        if word['case'] != 'success':
            prev_end, next_start = align_missing_word(content, ix)
            content['words'][ix]['word'].update({'start': prev_end, 'end': next_start, 'case': 'assumed'})
            
    return content

In [46]:
def df_preproc_to_textgrid(df_fn, audio_fn):
    """
    Take a filename and its associated transcription and fill in all the gaps
    """
    
    with contextlib.closing(wave.open(audio_fn, 'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
    rearranged_words = []
    file_ons = 0
    
    df = pd.read_csv(df_fn)
    
    rearranged_words = []

    for ix, word in df.iterrows():

    #         if word['case'] == 'success' or word['case'] == 'assumed':
        word_ons = np.round(word['Onset'], 3)
        word_off = np.round(word['Offset'], 3)
        target = word['Word_Written']
        rearranged_words.append((word_ons, word_off, target))
    #         else:
    #             # search forwards and backwards to find the previous and next word
    #             # use the end and start times to get word times 
    #             target = content['words'][ix]['word']
    #             prev_end, next_start = align_missing_word(content, ix)

    #             rearranged_words.append((prev_end, next_start, target))
    # adjust for overlap in times

    for ix, word_times in enumerate(rearranged_words):
        if ix != 0:
            prev_start, prev_end, prev_word = rearranged_words[ix-1]
            curr_start, curr_end, curr_word = word_times

            # if the current start time is before the previous end --> adjust
            if curr_start < prev_end:
                rearranged_words[ix] = (prev_end, curr_end, curr_word)
    
    tg = tgio.Textgrid()
    tg.addTier(tgio.IntervalTier('word', rearranged_words))
    return tg

In [24]:
task = 'black'

base_dir = '/dartfs/rc/lab/F/FinnLab/tommy/isc_asynchrony_behavioral/'
stim_dir = os.path.join(base_dir, 'stimuli')
gentle_dir = os.path.join(stim_dir, 'gentle')

alignment_fn = os.path.join(gentle_dir, task, 'align.json')
audio_fn = os.path.join(gentle_dir, task, 'a.wav')

preproc_fn = os.path.join(stim_dir, 'preprocessed', task, 'black_transcript_preprocessed.csv')


In [48]:
textgrid = df_preproc_to_textgrid(preproc_fn, audio_fn)

In [53]:
tg = tgio.openTextgrid("black_longtext_minute_1_adjusted.TextGrid", False)

In [58]:
tg.getTier('word').entries

(Interval(start=0.24, end=0.5759313145352838, label='so'),
 Interval(start=0.68, end=1.26, label='i'),
 Interval(start=1.959897688436246, end=2.2141860510701776, label='was'),
 Interval(start=2.2141860510701776, end=2.45, label='a'),
 Interval(start=2.46, end=3.0969761681667007, label='junior'),
 Interval(start=3.0969761681667007, end=3.41, label='in'),
 Interval(start=3.41, end=4.2, label='college'),
 Interval(start=4.79, end=4.97097858805688, label='when'),
 Interval(start=4.97097858805688, end=5.09, label='i'),
 Interval(start=5.09, end=5.339080447147778, label='got'),
 Interval(start=5.35, end=5.59, label='my'),
 Interval(start=5.637829885931753, end=6.31216774544281, label='first'),
 Interval(start=7.11, end=7.661464030371316, label='paying'),
 Interval(start=7.661464030371316, end=8.3, label='job'),
 Interval(start=8.65, end=8.93, label='in'),
 Interval(start=8.93, end=9.2, label='my'),
 Interval(start=9.21, end=9.99, label='field'),
 Interval(start=10.29, end=10.490408521738907,

In [6]:
"""
This script will take a CSV of transcription, file path pairs and make textgrids for you
These are saved in a new folder (textgrids)
"""

import pandas as pd
from contextlib import contextmanager
import signal, json, wave, contextlib
from a import textgrid as tgio

def alignment_to_textgrid(alignment_fn, path):
    """
    Take a filename and its associated transcription and fill in all the gaps
    """
    with contextlib.closing(wave.open(path, 'r')) as f:
        frames = f.getnframes()
        rate = f.getframerate()
        duration = frames / float(rate)
    rearranged_words = []
    file_ons = 0
    
    # load the alignment file
    with open(alignment_fn, encoding='utf-8') as f:
        content = json.load(f)
    all_ons = content['words'][0]['start']
    
    for ix, word in enumerate(content['words']):
        # if the word was successfully aligned
        if word['case'] == 'success' or word['case'] == 'assumed':
            word_ons = np.round(word['start'], 3)
            word_off = np.round(word['end'], 3)
            target = word['alignedWord']
            rearranged_words.append((word_ons, word_off, target))
        else:
            # search forwards and backwards to find the previous and next word
            # use the end and start times to get word times 
            target = content['words'][ix]['word']
            prev_end, next_start = align_missing_word(content, ix)
            
            rearranged_words.append((prev_end, next_start, target))
    
    # adjust for overlap in times
    for ix, word_times in enumerate(rearranged_words):
        if ix != 0:
            prev_start, prev_end, prev_word = rearranged_words[ix-1]
            curr_start, curr_end, curr_word = word_times

            # if the current start time is before the previous end --> adjust
            if curr_start < prev_end:
                rearranged_words[ix] = (prev_end, curr_end, curr_word)
    
    tg = tgio.Textgrid()
    tg.addTier(tgio.IntervalTier('word', rearranged_words))
    return content, tg

def align_missing_word(content, ix):
    '''
    Searches from a word in both directions and then distributes time evenly
    '''
    # keep track of how many are missing
    forward_ix = ix
    forward_missing = 0
    
    # search forward
    while True:
        # move one forward
        forward_ix += 1
        if content['words'][forward_ix]['case'] == 'success':
            next_start = np.round(content['words'][forward_ix]['start'], 3)
            break
        else:
            forward_missing += 1
    
    # keep track of how many are missing
    back_ix = ix
    back_missing = 0
    
    while True:
        # move one backwards
        back_ix -= 1
        
        if content['words'][back_ix]['case'] == 'success':
            prev_end = np.round(content['words'][back_ix]['end'], 3)
            break
        else:
            back_missing += 1
    
    # space evenly between the number of missing items
    total_missing = back_missing + forward_missing + 1 # add one to include current item
    x_vals = np.linspace(prev_end, next_start, total_missing + 2)[1:-1] # add 2 to pad the points on either side
    
    # if there is anything missing
    # normalize indices to 0
    missing_ixs = np.arange(ix-back_missing,ix+forward_missing+1)
    
    # index of the value in the interpolated array
    arr_ix = np.argwhere(ix == missing_ixs)
    
    # then extract value from that array and round
    next_start = x_vals[arr_ix].squeeze()
    next_start = np.round(next_start, 3)
    
    # have to adjust prev end to be the interpolated value
    if len(missing_ixs) > 1 and arr_ix:
        prev_end = x_vals[np.argwhere(ix == missing_ixs)-1].squeeze()
        prev_end = np.round(prev_end, 3)
    
    return prev_end, next_start

def save_textgrid(textgrid, path):
    if path is not None:
        textgrid.save(path)

In [7]:
base_dir = '/dartfs/rc/lab/F/FinnLab/tommy/isc_asynchrony_behavioral/'
gentle_dir = os.path.join(base_dir, 'stimuli', 'gentle')

alignment_fn = os.path.join(gentle_dir, 'black', 'align.json')
audio_fn = os.path.join(gentle_dir, 'black', 'a.wav')

aligned, rearranged = alignment_to_textgrid(alignment_fn, audio_fn)

In [15]:
word

{'alignedWord': 'you',
 'case': 'success',
 'end': 799.98,
 'endOffset': 7786,
 'phones': [{'duration': 0.01, 'phone': 'y_B'},
  {'duration': 0.01, 'phone': 'uw_E'}],
 'start': 799.96,
 'startOffset': 7783,
 'word': 'you'}

In [20]:
missing['start'] = 0

In [18]:
for word in aligned['words']:
    if word['case'] != 'success':
        missing = word
        print (word)

{'case': 'not-found-in-audio', 'endOffset': 3688, 'startOffset': 3683, 'word': 'black'}
{'case': 'not-found-in-audio', 'endOffset': 4310, 'startOffset': 4307, 'word': 'mic'}
{'case': 'not-found-in-audio', 'endOffset': 6891, 'startOffset': 6888, 'word': 'You'}
{'case': 'not-found-in-audio', 'endOffset': 6896, 'startOffset': 6892, 'word': 'know'}
{'case': 'not-found-in-audio', 'endOffset': 6899, 'startOffset': 6898, 'word': 'I'}


In [4]:
rearranged

<praatio.data_classes.textgrid.Textgrid at 0x2aea239e4d10>

In [5]:
tg = tgio.openTextgrid("black_longtext_minute_1_adjusted.TextGrid", False)

In [14]:
tg.tierNames

('word',)

In [5]:
rearranged.getTier('word').entries

(Interval(start=0.24, end=0.63, label='so'),
 Interval(start=0.68, end=1.26, label='i'),
 Interval(start=1.96, end=2.3, label='was'),
 Interval(start=2.3, end=2.45, label='a'),
 Interval(start=2.46, end=3.14, label='junior'),
 Interval(start=3.14, end=3.41, label='in'),
 Interval(start=3.41, end=4.2, label='college'),
 Interval(start=4.79, end=5.02, label='when'),
 Interval(start=5.02, end=5.09, label='i'),
 Interval(start=5.09, end=5.33, label='got'),
 Interval(start=5.35, end=5.59, label='my'),
 Interval(start=5.61, end=6.34, label='first'),
 Interval(start=7.11, end=7.66, label='paying'),
 Interval(start=7.66, end=8.3, label='job'),
 Interval(start=8.65, end=8.93, label='in'),
 Interval(start=8.93, end=9.2, label='my'),
 Interval(start=9.21, end=9.99, label='field'),
 Interval(start=10.29, end=10.5, label='on'),
 Interval(start=10.5, end=10.62, label='the'),
 Interval(start=10.62, end=11.36, label='radio'),
 Interval(start=11.82, end=11.97, label='this'),
 Interval(start=11.97, end=

In [18]:
def textgrid_to_gentle(textgrid_fn, alignment_fn):
    '''
    Given adjusted alignment times from Praat, convert back into gentle format
    '''

In [44]:
tg.getTier('word').entries

(Interval(start=0.24, end=0.5759313145352838, label='so'),
 Interval(start=0.68, end=1.26, label='i'),
 Interval(start=1.959897688436246, end=2.2141860510701776, label='was'),
 Interval(start=2.2141860510701776, end=2.45, label='a'),
 Interval(start=2.46, end=3.0969761681667007, label='junior'),
 Interval(start=3.0969761681667007, end=3.41, label='in'),
 Interval(start=3.41, end=4.2, label='college'),
 Interval(start=4.79, end=4.97097858805688, label='when'),
 Interval(start=4.97097858805688, end=5.09, label='i'),
 Interval(start=5.09, end=5.339080447147778, label='got'),
 Interval(start=5.35, end=5.59, label='my'),
 Interval(start=5.637829885931753, end=6.31216774544281, label='first'),
 Interval(start=7.11, end=7.661464030371316, label='paying'),
 Interval(start=7.661464030371316, end=8.3, label='job'),
 Interval(start=8.65, end=8.93, label='in'),
 Interval(start=8.93, end=9.2, label='my'),
 Interval(start=9.21, end=9.99, label='field'),
 Interval(start=10.29, end=10.490408521738907,

In [None]:
os.listdir()

In [None]:
tg = tgio.Textgrid()
# tg.addTier(tgio.IntervalTier('word', rearranged_words))

In [None]:
tg.addTier(tgio.IntervalTier('word', rearranged))

In [None]:

# json.loads(alignment_fn.to_json())

In [None]:
data.to_json()

In [None]:
tg.save('test', 'long_textgrid', True)