In [7]:
import warnings
warnings.filterwarnings('ignore')

from os import listdir
from os.path import isfile, join
import pickle

from hmmlearn import hmm

from python_speech_features import mfcc
import scipy.io.wavfile as wav
import matplotlib.pyplot as plot
import numpy

from sklearn.model_selection import train_test_split

import pomegranate as pom
from random import random

In [8]:
ghmms = {}
data = {}
datadirs = [f for f in listdir("data/") if ('.' not in f)]
print(datadirs)

['dog', 'left', 'sheila', 'seven']


In [9]:
for directory in datadirs:
    curdir = "data/" + directory
    datafiles = [f for f in listdir(curdir) if isfile(join(curdir,f))]

    dataForWord = []
    for f in datafiles:
        (rate, signal) = wav.read(curdir + "/" + f)
        dataForWord.append(mfcc(signal, rate, winfunc=numpy.hamming))
    
    train, test = train_test_split(dataForWord, test_size=0.3)
    data[directory] = {'train': train, 'test': test}
    print(directory, len(datafiles), len(data[directory]['train']), len(data[directory]['test']))


dog 1746 1222 524
left 2353 1647 706
sheila 1734 1213 521
seven 2377 1663 714


In [10]:
flatData = {}
lengths = {}
for directory in datadirs:
    flatDataForWord = []
    runLengthsForWord = []
    for fileData in data[directory]['train']:
        runLengthsForWord.append(len(fileData))
        flatDataForWord += fileData.flatten().tolist()
            
    flatData[directory] = numpy.array(flatDataForWord).reshape(-1, 13)
    lengths[directory] = runLengthsForWord
    print(directory, flatData[directory].shape, sum(lengths[directory]))

dog (118759, 13) 118759
left (161159, 13) 161159
sheila (118576, 13) 118576
seven (162478, 13) 162478


In [66]:
class GMMBuilder(object):
    def __init__(self, n_mix = 1, n_jobs = 4):
        self.n_mix = n_mix
        self.n_jobs = n_jobs
        
    def from_samples(self, X):
        print('new Gaussian Mix Model')
        return pom.GeneralMixtureModel.from_samples(
            pom.MultivariateGaussianDistribution, self.n_mix, X,
            max_iterations = 0, n_jobs = self.n_jobs, verbose=True)
        

class GMMHMM(object):
    def __init__(self, word, n_components=1, n_mix=1, n_jobs=4):
        self.word = word
        self.n_components=n_components
        self.n_mix=n_mix
        self.n_jobs=n_jobs
        self.hmm = None
    
    def fit(self, X):
        self.hmm = pom.HiddenMarkovModel.from_samples(
            GMMBuilder(self.n_mix, self.n_jobs), 
            self.n_components, X, n_jobs = self.n_jobs,
            algorithm='baum-welch', verbose=True
        )
        return self.hmm

In [None]:
for directory in datadirs:
    print(directory, flatData[directory].shape)
    ghmm = GMMHMM(directory, n_components=10, n_mix=3)
    ghmm.fit(data[directory]['train'])
    ghmms[directory] = ghmm

(118759, 13)
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1075
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1080
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1073
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1085
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1062
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1088
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1072
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1043
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1027
new Gaussian Mix Model
Total Improvement: 0
Total Time (s): 0.1045
[1] Improvement: 201263.70396616124	Time (s): 36.49
[2] Improvement: 52281.767341984436	Time (s): 42.83
[3] Improvement: 64235.565864954144	Time (s): 38.93
[4] Improvement: 9611.900646008551	Time (s): 45.46
[5] Improvement: 6629.737440981902	Time (s): 29.81
[6] Improvement: 5091.840945320204	Time (s): 24.4
[7] Impro

[144] Improvement: 2.5280730994418263	Time (s): 41.47
[145] Improvement: 3.215497429482639	Time (s): 41.46
[146] Improvement: 6.239691894501448	Time (s): 45.65
[147] Improvement: 7.362056911922991	Time (s): 42.48
[148] Improvement: 5.8147277645766735	Time (s): 53.08
[149] Improvement: 7.414914502762258	Time (s): 51.81
[150] Improvement: 9.153062922880054	Time (s): 44.7
[151] Improvement: 10.471362974494696	Time (s): 47.57
[152] Improvement: 12.558307328261435	Time (s): 38.89
[153] Improvement: 14.028937970288098	Time (s): 52.64
[154] Improvement: 15.243535195477307	Time (s): 59.05
[155] Improvement: 15.737909103743732	Time (s): 54.23
[156] Improvement: 15.345800250768661	Time (s): 50.89
[157] Improvement: 17.11968486569822	Time (s): 49.21
[158] Improvement: 19.0188996354118	Time (s): 49.71
[159] Improvement: 18.135401571169496	Time (s): 44.11
[160] Improvement: 17.793782949447632	Time (s): 56.77
[161] Improvement: 17.655752820894122	Time (s): 50.9
[162] Improvement: 19.308748342096806	

In [None]:
pickle.dump(ghmms, open("hmmset.p", "wb"))

In [None]:
def predict(fileData):
    logOddsToKey = {}
    for key in ghmms:
        ghmm = ghmms[key]
        logOdds = ghmms[key].score_samples(fileData)[0]
        logOddsToKey[logOdds] = key
    return logOddsToKey[max(logOddsToKey.keys())]

In [None]:
labeledTestData = []
for key in data:
    for fileData in data[key]['test']:
        labeledTestData.append((fileData, key))

In [None]:
successful = 0
for test in labeledTestData:
    prediction = predict(test[0])
    if prediction == test[1]:
        successful += 1

In [None]:
print('# data points:', len(labeledTestData))
print('success rate:', float(successful) / len(labeledTestData) )

In [None]:
(rate, signal) = wav.read('kim-shiela.wav')
features = mfcc(signal, rate, winfunc=numpy.hamming)
print(features[0])

In [None]:
print(predict(features))