### Load some audio

In [None]:
import numpy as np

import scipy.io.wavfile
import matplotlib.pyplot as plt

#import sys

In [None]:
def spectrogram(wav_filepath):
    # https://mail.python.org/pipermail/chicago/2010-December/007314.html
    sample_rate, wav_data = scipy.io.wavfile.read(wav_filepath)

    data_av = np.mean(wav_data)
    data_max = np.max(np.absolute(wav_data-data_av))
    sound_data = (wav_data - data_av)/data_max  # Should be within +/- 1
    
    ## Parameters: 10ms step, 30ms window
    nstep = int(sample_rate * 0.01)
    nwin  = int(sample_rate * 0.03)
    nfft = 2*int(nwin/2)

    window = np.hamming(nwin)

    ## will take windows x[n1:n2].  generate
    ## and loop over n2 such that all frames
    ## fit within the waveform
    nn = range(nwin, len(sound_data), nstep)

    X = np.zeros( (len(nn), nfft//2) )

    for i,n in enumerate(nn):
        wav_segment = sound_data[ n-nwin:n ]
        z = np.fft.fft(window * wav_segment, nfft)
        X[i,:] = np.log(np.absolute(z[:nfft//2]))
        
    #X_min = np.min(X[:,1:])
    #X_range = X.max()-X_min
    #X = (X - X_min)/X_range

    return X

In [None]:
f = './data/audio 23.42.17 13.03.17x.wav'

X = spectrogram(f)
print("X.shape=", X.shape)

Y = np.std(X, axis=1)
Y_min = np.min(Y)
Y_range = Y.max()-Y_min
Y = (Y - Y_min)/Y_range

print("Y.shape=", Y.shape)

Y_crop = np.where(Y>0.25, 1.0, 0.0)
# Apply some smoothing

def smooth(x, window_len=21):  # , window='hanning'
    # http://scipy-cookbook.readthedocs.io/items/SignalSmooth.html
    #s = np.r_[ x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
    s = np.r_[ np.zeros( ((window_len-1)//2,) ), x, np.zeros( ((window_len-1)//2,) ) ]
    
    #print(len(s))
    #if window == 'flat': #moving average
    #    w=numpy.ones(window_len,'d')
    #else:
    #w=eval('np.'+window+'(window_len)')
    w=np.hamming(window_len)
    return np.convolve(w/w.sum(), s, mode='valid') #[window_len-1 : -(window_len-1) ]

Y_crop = smooth(Y_crop)
Y_crop = np.where(Y_crop>0.01, 1.0, 0.0)
print("Y_crop.shape=", Y_crop.shape)

plt.imshow(X.T, interpolation='nearest',
    origin='lower',
    aspect='auto')

plt.plot(Y * X.shape[1])

plt.plot(Y_crop * X.shape[1])

plt.show()
#Y.min(), Y.max()
#X[100,:]
np.argmin(X)/248, np.argmax(X)/248

In [None]:
# Split the file into voiced segments

#X.max()

# Clip off front and back where Y_crop is zero
#http://stackoverflow.com/questions/4494404/find-large-number-of-consecutive-values-fulfilling-condition-in-a-numpy-array
def contiguous_regions(condition):
    idx = []
    i = 0
    while i < len(condition):
        x1 = i + condition[i:].argmax()
        try:
            x2 = x1 + condition[x1:].argmin()
        except:
            x2 = x1 + 1
        if x1 == x2:
            if condition[x1] == True:
                x2 = len(condition)
            else:
                break
        idx.append( [x1,x2] )
        i = x2
    return idx

contiguous_regions(Y_crop<0.5)

In [None]:
X[:,1]