In [1]:
import os
from glob import glob
import audiolabel as al
import re
import numpy as np
import pandas as pd
import subprocess
import math
from scipy.io import wavfile

import parselmouth as ps
import matplotlib.pyplot as plt

In [2]:
debug = 0 # this will toggle printing if you want to debug

# Notebook doesn't require these first two lines, but if importing this script somewhere else,
# shebang and utf-8 encoding lines will tell command line to use python, etc.

'''VOT_290.py

VOT_290.py  - measure VOT in all of the stops in a given sound file (as
              found in files used by ling290 fall 2015).

Usage: VOT_290.py soundfile_name
# in terminal, navigate to folder containing script and files
# ./VOT_290.py *.wav > data.txt

Arguments:
  soundfile_name   a soundfile to be analyzed.
  
Assumption
    there is also a file soundfile_name.TextGrid that has a phone tier and a word tier
    
'''

# Authors: Keith Johnson (keithjohnson@berkeley.edu)
# 
# Copyright (c) 2015, The Regents of the University of California
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
# 
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
# 
# * Neither the name of the University of California nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


import subprocess
import math
from scipy.io import wavfile

#-----------------------------
# This script uses the following functions from the ESPS library for speech processing
#   - hditem: get information from a wav or fea file header
#   - fft: spectral analysis, producing a single spectrum or a spectrogram
#   - melspec: convert an fft spectrum into a "Mel transformed" auditory spectrum
#   - pplain: print values from fea files (spectra) to plain text

# Read about these and other ESPS routines in the Berkeley Phonetics Machine 
# using the 'man' command.   For example:
#       > man fft 
# will show the manual page for the fft routine
#-------------------------

w_score = [0.0,0.0,0.0]
s_score = [0.0,0.0,0.0]
w_time = [0.0,0.0,0.0]
s_time = [0.0,0.0,0.0]
step = 0.005  # 5 ms steps

def usage():
    print("burst(): expected three arguments: sound_file, start_time, end_time")


# assigns -1 or 1 depending on if neg_peak or pos_peak has a greater magnitude/abs value
def polarity(d=[]):
    neg_peak = 0
    pos_peak = 0
    
    for i in range(len(d)):
        if d[i] < neg_peak:
            neg_peak = d[i]
        if d[i] > pos_peak:
            pos_peak = d[i]
    if -neg_peak > pos_peak:
        return -1
    else:
        return 1
    
# return T or F for if 'j' is a peak or a valley
def is_peak(i,j,k):
    return ((i<j) & (j>k))

def is_valley(i,j,k):
    return ((i>j) & (j<k))

# define stop burst in waveform?
# loop over all the sample data and determine if current value is a peak or valley
def wave_burst(t,sf,pol, d=[]):
    global w_score
    w_score[:] = [0.0,0.0,0.0]
    global w_time
    w_time[:] = [0.0,0.0,0.0]
    
    for loc in range(t,len(d)-2):
        if ((pol>0 and is_peak(d[loc],d[loc+1],d[loc+2])) or 
            (pol<0 and is_valley(d[loc],d[loc+1],d[loc+2]))):
                ave=0
                for i in range(t,1,-1):
                    ave += math.fabs(d[loc-i] - d[loc-(i+1)])
                ave /= t
                change = math.fabs(d[loc]-d[loc+1])/ave
                for i in range(3):
                    if change > w_score[i]:
                        if (i<2):
                            w_score[2] = w_score[1]
                            w_time[2] = w_time[1]
                        if (i<1):
                            w_score[1] = w_score[0]
                            w_time[1] = w_time[0]
                        w_score[i] = change
                        w_time[i] = float(loc)/sf
                        break
                        
# define burst in spectrogram?
# subprocesses are functions for spetral analysis
def spec_burst (s,e,t,sf,sd):
    
    global s_score 
    s_score[:] = [0,0,0]
    global s_time
    s_time[:] = [0,0,0]
    diff = []
    nyquist = sf/2
    
    ret=subprocess.check_call("fft -z -wHamming -l{} -S{} -r{}:{} {} temp1.fea".format(t,t,s,e,sd).split())            
    ret=subprocess.check_call("melspec -H300:{} -n60 temp1.fea temp2.fea".format(nyquist).split())
    ret=subprocess.check_call("nodiff -o1 -fre_spec_val temp2.fea nodiff.fea".split())  # spectral change
    diffstring = subprocess.check_output(["pplain","-fre_spec_val_d1","nodiff.fea"])
    lines = diffstring.decode().rstrip().split('\n')    # break the string into separate values
    for l in lines:
        line = list(map(float,l.split()))           # convert array from string to floating point number
        diff.append(sum(line))

    for loc in range(len(diff)):
        d = diff[loc]
        for i in range(3):
            if d > s_score[i]:
                if (i<2):
                    s_score[2] = s_score[1]
                    s_time[2] = s_time[1]
                if (i<1):
                    s_score[1] = s_score[0]
                    s_time[1] = s_time[0]
                s_score[i] = d
                s_time[i] = float(loc)*t/sf
                break


def burst (soundfile, start_time, end_time):

    if (start_time>end_time):
        usage()
        return(-1)

    # 'sf' = sampling frequency
    # run 'sox' subprocess to resample audio file to 16k Hz
    # run 'wav2sd' subprocess to convert .wav to .sd
    sf=16000
    ret=subprocess.check_call("sox -q {} temp.wav rate {}".format(soundfile,sf).split())            
    # ret=subprocess.call("wav2sd temp.wav".split())            
    # sd="temp.sd"  # is created by wav2sd
    sd = 'temp.wav'

    pol = -1
    
    # number of samples in the timestep (5ms)
    t=int(step*sf)

    # loop through all of the labels on the "phone" tier that match the set of stops
    # remember: 'phone' is an audiolabel label object, but 'word' is a string
    s_samp = int(start_time*sf)
    e_samp = int(end_time*sf)
    
    # dstring = subprocess.check_output("pplain -i -r{}:{} {}".format(s_samp,e_samp,sd).split())
    # data = list(map(int,dstring.rstrip().split()))

    _, wavdata = wavfile.read(sd)
    data = wavdata[s_samp:e_samp+1]

    # 'wave_burst' function (defined earlier) finds values associated with stop burst in waveform
    wave_burst(t,sf,pol,data) # looks for big jumps in waveform
    spec_burst(s_samp,e_samp,t,sf,sd) # looks for big jumps in spectra
    
        # candidate is a dictionary/dict that will have 0-3 entries depending on loop
    cand = {}

    # loop through the 3 values each in w_time and s_time, find the difference in time
    # if difference is less than 4 ms, 'w' becomes a candidate 0, 1, or 2
    for w in range(3):
        for s in range(3):
            if math.fabs(w_time[w] - s_time[s]) < 0.004: # indicates that the same event resulted in this burst
                cand[w]=s
    
    maxb = 0 # set maxb lower (e.g. -10) if you want lower scores to still 'pass'
    
    loc = -1  
    
    for (w,s) in cand.items():  
        # burst score - derived from lda over timit bursts
        b = -1.814 + 0.618*math.log(w_score[w]) + 0.003*s_score[s]
        if (b>maxb): 
            maxb = b
            loc = w_time[w] + start_time
        if debug:
            print(b)
    
    return (loc,maxb)

In [3]:
burst('YO0010c_yoneshiro_europestory_20130424/unaligned/66.wav', 1.35, 1.47)

FileNotFoundError: [Errno 2] No such file or directory: 'fft': 'fft'