In [137]:
import warnings
warnings.filterwarnings('ignore')

from os import listdir
from os.path import isfile, join
import pickle

from hmmlearn import hmm

from python_speech_features import mfcc
import scipy.io.wavfile as wav
import matplotlib.pyplot as plot
import numpy

from sklearn.model_selection import train_test_split

In [138]:
ghmms = {}
data = {}
datadirs = [f for f in listdir("data/") if ('.' not in f)]
print(datadirs)

['dog', 'left', 'sheila', 'seven']


In [139]:
for directory in datadirs:
    curdir = "data/" + directory
    datafiles = [f for f in listdir(curdir) if isfile(join(curdir,f))]

    dataForWord = []
    for f in datafiles:
        (rate, signal) = wav.read(curdir + "/" + f)
        dataForWord.append(mfcc(signal, rate, winfunc=numpy.hamming))
    
    train, test = train_test_split(dataForWord, test_size=0.3)
    data[directory] = {'train': train, 'test': test}
    print(directory, len(datafiles), len(data[directory]['train']), len(data[directory]['test']))


dog 1746 1222 524
left 2353 1647 706
sheila 1734 1213 521
seven 2377 1663 714


In [140]:
flatData = {}
lengths = {}
for directory in datadirs:
    flatDataForWord = []
    runLengthsForWord = []
    for fileData in data[directory]['train']:
        runLengthsForWord.append(len(fileData))
        flatDataForWord += fileData.flatten().tolist()
            
    flatData[directory] = numpy.array(flatDataForWord).reshape(-1, 13)
    lengths[directory] = runLengthsForWord
    print(directory, flatData[directory].shape, sum(lengths[directory]))

dog (118788, 13) 118788
left (161256, 13) 161256
sheila (118636, 13) 118636
seven (162460, 13) 162460


In [141]:
for directory in datadirs:
    print(flatData[directory].shape)
    ghmm = hmm.GaussianHMM(n_components=7)
    ghmm.fit(flatData[directory], lengths=lengths[directory])
    ghmms[directory] = ghmm

(118788, 13)
(161256, 13)
(118636, 13)
(162460, 13)


In [142]:
pickle.dump(ghmms, open("hmmset.p", "wb"))

In [143]:
def predict(fileData):
    logOddsToKey = {}
    for key in ghmms:
        ghmm = ghmms[key]
        logOdds = ghmms[key].score_samples(fileData)[0]
        logOddsToKey[logOdds] = key
    return logOddsToKey[max(logOddsToKey.keys())]

In [144]:
labeledTestData = []
for key in data:
    for fileData in data[key]['test']:
        labeledTestData.append((fileData, key))

In [145]:
successful = 0
for test in labeledTestData:
    prediction = predict(test[0])
    if prediction == test[1]:
        successful += 1

In [146]:
print('# data points:', len(labeledTestData))
print('success rate:', float(successful) / len(labeledTestData) )

# data points: 2465
success rate: 0.8356997971602435


In [147]:
(rate, signal) = wav.read('brett-seven.wav')
features = mfcc(signal, rate, winfunc=numpy.hamming)
print(features[0])

[ 16.66124922 -47.84035587  29.44517202 -16.87728624   0.07080559
   7.37416151 -18.61087704  -2.4980464  -23.90755594 -18.89497544
  19.90086434  -2.77875535  -7.35379118]


In [148]:
print(predict(features))

seven
