In [1]:
# p.292 12.7 発話語の認識
import os
import numpy as np
from scipy.io import wavfile

from hmmlearn import hmm
from python_speech_features import mfcc

class ModelHMM(object):
    def __init__(self):
        self.models = []
        self.model = hmm.GaussianHMM(n_components=4,
                                    covariance_type='diag', n_iter=1000)
        
    # モデルを訓練するメソッド
    def train(self, training_data):
        cur_model = self.model.fit(training_data)
        self.models.append(cur_model)
        
    # 入力データに対するスコアを計算するメソッド
    def compute_score(self, input_data):
        return self.model.score(input_data)

In [2]:
def train_model(training_files):
    X = None
    
    for file in training_files:
        sampling_freq, signal = wavfile.read(file)
        features_mfcc = mfcc(signal, sampling_freq)
        
        if X is None:
            X = features_mfcc
        else:
            X = np.append(X, features_mfcc, axis=0)
            
    model = ModelHMM()
    model.train(X)
    return model

In [3]:
def build_models(wav_files):
    speech_models = []
    for label, files in wav_files.items():
        model = train_model(files[:-1])
        speech_models.append((model, label))
    return speech_models

In [4]:
def speech_recognition(speech_models, test_file):
    sampling_freq, signal = wavfile.read(test_file)
    features_mfcc = mfcc(signal, sampling_freq)
    
    scores = [model.compute_score(features_mfcc)
             for model,_ in speech_models]
    index = np.argmax(scores)
    return speech_models[index][1]

In [5]:
def run_tests(speech_models, wav_files):
    for original_label, files in wav_files.items():
        predicted_label = speech_recognition(speech_models, files[-1])
        print('\nOriginal: ', original_label)
        print('Predicted:',predicted_label)

In [6]:
input_folder = 'data'

wav_files = {}
for root, dirs, files in os.walk(input_folder):
    files = [file for file in files if file.endswith('.wav')]
    if not files:
        continue
    label = files[0][:-6]
    wav_files[label] = [os.path.join(root, file) for file in files]
    
speech_models = build_models(wav_files)
run_tests(speech_models, wav_files)


Original:  apple
Predicted: apple

Original:  kiwi
Predicted: kiwi

Original:  lime
Predicted: lime

Original:  banana
Predicted: banana

Original:  pineapple
Predicted: pineapple

Original:  orange
Predicted: orange

Original:  peach
Predicted: peach
