In [1]:
## save variables
import pickle
## folder names
from glob import glob
## wav import
from scipy.io import wavfile as readWav
## standard libraries
import numpy as np
## MFCC
import sys
!{sys.executable} -m pip install python_speech_features --user
from python_speech_features import mfcc
from python_speech_features import logfbank



In [2]:
## Return the word between two string starting from left
def find_between( s, first, last ):
    try:
        start = s.index( first ) + len( first )
        end = s.index( last, start )
        return s[start:end]
    except ValueError:
        return ""

In [3]:
coreKey = ["Yes", "No", "Up", "Down", "Left", "Right", "On", "Off", "Stop", "Go", "Zero",
           "One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine"]
sampleRate = 16000

In [4]:
folders = glob("dataset/*/")
folders.remove('dataset/_background_noise_/')
print('SIGNALS')
rawDict = {}
for key in folders:
    print('Processing ', key)
    dictKey = find_between( key, '/', '/' )
    tmpFiles = glob(key+'*')
    array = []
    for file in tmpFiles:
        array.append(readWav.read(file)[1].copy())
    rawDict[dictKey] = np.array(array)
print('\nNOISE')
noiseDict = {}
folders = glob('dataset/_background_noise_/*.wav')
for key in folders:
    print('Processing ', key)
    noiseDict[key[key.rindex('/')+1:len(key)]] = readWav.read(key)[1]


SIGNALS
Processing  dataset/two/
Processing  dataset/yes/
Processing  dataset/stop/
Processing  dataset/up/
Processing  dataset/bird/
Processing  dataset/dog/
Processing  dataset/house/
Processing  dataset/seven/
Processing  dataset/off/
Processing  dataset/one/
Processing  dataset/on/
Processing  dataset/zero/
Processing  dataset/sheila/
Processing  dataset/five/
Processing  dataset/happy/
Processing  dataset/three/
Processing  dataset/nine/
Processing  dataset/go/
Processing  dataset/four/
Processing  dataset/left/
Processing  dataset/tree/
Processing  dataset/marvin/
Processing  dataset/no/
Processing  dataset/six/
Processing  dataset/bed/
Processing  dataset/wow/
Processing  dataset/cat/
Processing  dataset/right/
Processing  dataset/down/
Processing  dataset/eight/

NOISE
Processing  dataset/_background_noise_/doing_the_dishes.wav
Processing  dataset/_background_noise_/exercise_bike.wav
Processing  dataset/_background_noise_/white_noise.wav
Processing  dataset/_background_noise_/d



\begin{verbatim}
Parameter         Description
signal 	          the audio signal from which to compute features. Should be an N*1 array
samplerate 	      the samplerate of the signal we are working with.
winlen 	          the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
winstep 	      the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
numcep 	          the number of cepstrum to return, default 13
nfilt 	          the number of filters in the filterbank, default 26.
nfft 	          the FFT size. Default is 512
lowfreq 	      lowest band edge of mel filters. In Hz, default is 0
highfreq 	      highest band edge of mel filters. In Hz, default is samplerate/2\\
preemph 	      apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97\\
ceplifter 	      apply a lifter to final cepstral coefficients. 0 is no lifter. Default is 22\\
appendEnergy 	  if this is true, the zeroth cepstral coefficient is replaced with the log of the total frame energy.\\
returns 	      A numpy array of size (NUMFRAMES by numcep) containing features. Each row holds 1 feature vector.\\
\end{verbatim}

In [5]:
mfccMatDict = {}
print('MFCC features')
for key in rawDict:
    print('Processing ',key)
    array = []
    for sig in rawDict[key]:
        sig.resize(16000, refcheck=False)
        mfcc_feat = mfcc(sig,sampleRate)
        array.append(mfcc_feat)
    mfccMatDict[key] = np.array(array)

MFCC features
Processing  two
Processing  yes
Processing  stop
Processing  up
Processing  bird
Processing  dog
Processing  house
Processing  seven
Processing  off
Processing  one
Processing  on
Processing  zero
Processing  sheila
Processing  five
Processing  happy
Processing  three
Processing  nine
Processing  go
Processing  four
Processing  left
Processing  tree
Processing  marvin
Processing  no
Processing  six
Processing  bed
Processing  wow
Processing  cat
Processing  right
Processing  down
Processing  eight


In [6]:
fbankDict = {}
print('logFilterBanks features')
for key in rawDict:
    print('Processing ',key)
    array = []
    for sig in rawDict[key]:
        sig.resize(16000, refcheck=False)
        fbank_feat = logfbank(sig,sampleRate)
        array.append(fbank_feat.resize(fbank_feat.shape[0]*mfcc_feat.shape[1]))
    fbankDict[key] = np.vstack(array)
    

logFilterBanks features
Processing  two
Processing  yes
Processing  stop
Processing  up
Processing  bird
Processing  dog
Processing  house
Processing  seven
Processing  off
Processing  one
Processing  on
Processing  zero
Processing  sheila
Processing  five
Processing  happy
Processing  three
Processing  nine
Processing  go
Processing  four
Processing  left
Processing  tree
Processing  marvin
Processing  no
Processing  six
Processing  bed
Processing  wow
Processing  cat
Processing  right
Processing  down
Processing  eight


In [8]:
with open('variables/rawDict.pkl', 'wb') as f:  
    pickle.dump(rawDict, f)
with open('variables/noiseDict.pkl', 'wb') as f:  
    pickle.dump(noiseDict, f)    
with open('variables/mfccDict.pkl', 'wb') as f:  
    pickle.dump(mfccMatDict, f)
with open('variables/fbankDict.pkl', 'wb') as f:  
    pickle.dump(fbankDict, f)    