<a href="https://colab.research.google.com/github/MaharshiYeluri01/SpeechRecognition/blob/master/Speech_recognition_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
from pathlib import Path

import librosa
import numpy as np
from hmmlearn import hmm
from librosa.feature import mfcc, chroma_stft

In [2]:
class HMMTrainer:
    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000):
        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []

        if self.model_name == 'GaussianHMM':
            self.model = hmm.GaussianHMM(n_components=self.n_components, 
                    covariance_type=self.cov_type, n_iter=self.n_iter)
        else:
            raise TypeError('Invalid model type')

    # X is a 2D numpy array where each row is 13D
    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))

    # Run the model on input data
    def get_score(self, input_data):
        return self.model.score(input_data)

In [3]:
input_folder = Path('~/Downloads/hmm-speech-recognition-0.1/audio').expanduser()

In [4]:
for dirname in os.listdir(input_folder):
    # Get the name of the subfolder 
    subfolder = os.path.join(input_folder, dirname)
    #print(subfolder)
    label = subfolder[subfolder.rfind('/') + 1:]
    print(label)

apple
.DS_Store
kiwi
lime
banana
pineapple
orange
peach


In [5]:
hmm_models = []
for dirname in os.listdir(input_folder):
    subfolder = os.path.join(input_folder, dirname)
    if not os.path.isdir(subfolder): 
         continue
    label = subfolder[subfolder.rfind('/') + 1:]
    X = np.array([])
    y_words = []
    for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]:
        filepath = os.path.join(subfolder, filename)
        # print(f"filename: {filename}")
        y, sr = librosa.load(filepath)            
        mfcc_features = mfcc(y=y, sr=sr)
        if len(X) == 0:
            X = mfcc_features[:,:15]
        else:
            X = np.append(X, mfcc_features[:,:15], axis=0)            
        y_words.append(label)
    print('X.shape =', X.shape)
    hmm_trainer = HMMTrainer()
    hmm_trainer.train(X)
    hmm_models.append((hmm_trainer, label))
    hmm_trainer = None

filename: apple07.wav
filename: apple13.wav
filename: apple12.wav
filename: apple06.wav
filename: apple10.wav
filename: apple04.wav
filename: apple05.wav
filename: apple11.wav
filename: apple15.wav
filename: apple01.wav
filename: apple14.wav
filename: apple02.wav
filename: apple03.wav
filename: apple08.wav
X.shape = (280, 15)
filename: kiwi08.wav
filename: kiwi09.wav
filename: kiwi01.wav
filename: kiwi15.wav
filename: kiwi14.wav
filename: kiwi02.wav
filename: kiwi03.wav
filename: kiwi13.wav
filename: kiwi07.wav
filename: kiwi06.wav
filename: kiwi12.wav
filename: kiwi04.wav
filename: kiwi10.wav
filename: kiwi11.wav
X.shape = (280, 15)
filename: lime07.wav
filename: lime13.wav
filename: lime12.wav
filename: lime06.wav
filename: lime10.wav
filename: lime04.wav
filename: lime05.wav
filename: lime11.wav
filename: lime15.wav
filename: lime01.wav
filename: lime14.wav
filename: lime02.wav
filename: lime03.wav
filename: lime08.wav
X.shape = (280, 15)
filename: banana03.wav
filename: banana02.wa

In [6]:
# Test files
input_files = [
    input_folder / 'apple' / 'apple15.wav',
    input_folder / 'banana' / 'banana15.wav',
    input_folder / 'kiwi' / 'kiwi15.wav',
    input_folder / 'orange' / 'orange15.wav',
    input_folder / 'pineapple' / 'pineapple15.wav',
    input_folder / 'lime' / 'lime15.wav',
    input_folder / 'peach' / 'peach15.wav',
]

In [7]:
for input_file in input_files:
    y, sr = librosa.load(input_file)

    # Extract MFCC features
    mfcc_features = mfcc(y=y, sr=sr)
    mfcc_features = mfcc_features[:, :15]

    scores = []
    for item in hmm_models:
        hmm_model, label = item

        score = hmm_model.get_score(mfcc_features)
        scores.append(score)
    index = np.array(scores).argmax()
    print("\nTrue:", input_file.parts[-2])
    print("Predicted:", hmm_models[index][1])


True: apple
Predicted: apple

True: banana
Predicted: banana

True: kiwi
Predicted: kiwi

True: orange
Predicted: orange

True: pineapple
Predicted: pineapple

True: lime
Predicted: lime

True: peach
Predicted: peach
