In [38]:
import warnings
warnings.filterwarnings('ignore')

from os import listdir
from os.path import isfile, join
import pickle

from hmmlearn import hmm

from python_speech_features import mfcc
import scipy.io.wavfile as wav
import matplotlib.pyplot as plot
import numpy

from sklearn.model_selection import train_test_split

In [39]:
ghmms = {}
data = {}
datadirs = [f for f in listdir("data/") if ('.' not in f)]
print(datadirs)

['dog', 'left', 'seven', 'sheila']


In [40]:
for directory in datadirs:
    curdir = "data/" + directory
    datafiles = [f for f in listdir(curdir) if isfile(join(curdir,f))]

    dataForWord = []
    for f in datafiles:
        (rate, signal) = wav.read(curdir + "/" + f)
        dataForWord.append(mfcc(signal, rate, winfunc=numpy.hamming))
    
    train, test = train_test_split(dataForWord, test_size=0.3)
    data[directory] = {'train': train, 'test': test}
    print(directory, len(datafiles), len(data[directory]['train']), len(data[directory]['test']))


dog 1746 1222 524
left 2353 1647 706
seven 2377 1663 714
sheila 1734 1213 521


In [41]:
flatData = {}
lengths = {}
for directory in datadirs:
    flatDataForWord = []
    runLengthsForWord = []
    for fileData in data[directory]['train']:
        runLengthsForWord.append(len(fileData))
        flatDataForWord += fileData.flatten().tolist()
            
    flatData[directory] = numpy.array(flatDataForWord).reshape(-1, 13)
    lengths[directory] = runLengthsForWord
    print(directory, flatData[directory].shape, sum(lengths[directory]))

dog (118913, 13) 118913
left (161159, 13) 161159
seven (162509, 13) 162509
sheila (118688, 13) 118688


In [42]:
for directory in datadirs:
    print(flatData[directory].shape)
    ghmm = hmm.GaussianHMM(n_components=7)
    ghmm.fit(flatData[directory], lengths=lengths[directory])
    ghmms[directory] = ghmm

(118913, 13)
(161159, 13)
(162509, 13)
(118688, 13)


In [43]:
pickle.dump(ghmms, open("hmmset.p", "wb"))

In [51]:
def predict(fileData):
    logOddsToKey = {}
    for key in ghmms:
        ghmm = ghmms[key]
        logOdds = ghmms[key].score_samples(fileData)[0]
        logOddsToKey[logOdds] = key
    return logOddsToKey[max(logOddsToKey.keys())]

In [45]:
labeledTestData = []
for key in data:
    for fileData in data[key]['test']:
        labeledTestData.append((fileData, key))

In [46]:
successful = 0
for test in labeledTestData:
    prediction = predict(test[0])
    if prediction == test[1]:
        successful += 1

In [47]:
print('# data points:', len(labeledTestData))
print('success rate:', float(successful) / len(labeledTestData) )

# data points: 2465
success rate: 0.8081135902636917


In [52]:
(rate, signal) = wav.read('kim-shiela.wav')
features = mfcc(signal, rate, winfunc=numpy.hamming)
print(features[0])

[ 10.62156486 -10.50005401 -21.53243413  -6.26368371   7.66470925
  -2.74448373   1.94527387  11.52021881  -8.6896683  -12.0482188
   0.9109049    4.34333176   4.87513094]


In [53]:
print(predict(features))

dog
-3876.36464388
left
-3734.57576592
seven
-3691.87356977
sheila
-3723.60394865
seven
