In [1]:
import os
import argparse 

import numpy as np
from scipy.io import wavfile 
from hmmlearn import hmm
import librosa

In [2]:
from librosa.feature import mfcc

In [3]:
class HMMTrainer(object):
    def __init__(self, model_name='GaussianHMM', n_components=4, cov_type='diag', n_iter=1000):
        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []

        if self.model_name == 'GaussianHMM':
            self.model = hmm.GaussianHMM(n_components=self.n_components, 
                    covariance_type=self.cov_type, n_iter=self.n_iter)
        else:
            raise TypeError('Invalid model type')

    # X is a 2D numpy array where each row is 13D
    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))

    # Run the model on input data
    def get_score(self, input_data):
        return self.model.score(input_data)

In [4]:
input_folder='audio'

In [5]:
for dirname in os.listdir(input_folder):
    # Get the name of the subfolder 
    if dirname != ".DS_Store":
        subfolder = os.path.join(input_folder, dirname)
        #print(subfolder)
        label = subfolder[subfolder.rfind('/') + 1:]
        print(label)

apple
kiwi
lime
banana
pineapple
orange
peach


In [6]:
hmm_models = []
for dirname in os.listdir(input_folder):
    if dirname != ".DS_Store":
        subfolder = os.path.join(input_folder, dirname)
        if not os.path.isdir(subfolder): 
             continue
        label = subfolder[subfolder.rfind('/') + 1:]
        X = np.array([])
        y_words = []
        for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]:
            filepath = os.path.join(subfolder, filename)
            sampling_freq, audio = librosa.load(filepath)            
            mfcc_features = mfcc(y=sampling_freq, sr=audio)
            print(mfcc_features)
            if len(X) == 0:
                X = mfcc_features[:,:15]
            else:
                X = np.append(X, mfcc_features[:,:15], axis=0)            
            y_words.append(label)
        print('X.shape =', X.shape)
        hmm_trainer = HMMTrainer()
        hmm_trainer.train(X)
        hmm_models.append((hmm_trainer, label))
        hmm_trainer = None


[[-4.43639282e+02 -3.49186676e+02 -2.57997406e+02 -2.26729706e+02
  -2.58248138e+02 -3.50571320e+02 -3.79025238e+02 -3.92805664e+02
  -3.98346436e+02 -3.48705719e+02 -3.34747711e+02 -3.64468140e+02
  -3.77009430e+02 -3.83602325e+02 -4.09540100e+02 -4.30369293e+02
  -4.52430847e+02 -4.90488525e+02]
 [ 1.73118988e+02  2.12677795e+02  2.51574066e+02  2.66749939e+02
   2.55671173e+02  2.24943802e+02  2.04164413e+02  1.96736816e+02
   1.95439774e+02  2.23034286e+02  2.38780792e+02  2.42782104e+02
   2.45849670e+02  2.48211533e+02  2.32676453e+02  2.17913742e+02
   1.93025177e+02  1.67377838e+02]
 [-7.24142303e+01 -1.25769470e+02 -1.60059387e+02 -1.63723984e+02
  -1.41792999e+02 -9.89223175e+01 -1.07735550e+02 -1.06600113e+02
  -9.66536560e+01 -9.29454041e+01 -8.35502472e+01 -5.10754280e+01
  -3.68221779e+01 -2.27837830e+01 -1.22448921e+01 -7.03858232e+00
  -1.80121441e+01 -1.45386248e+01]
 [-4.02107697e+01 -4.83083878e+01 -4.16290474e+01 -2.73921547e+01
  -1.77278633e+01 -3.33775940e+01 -5.

In [7]:
# Test files
input_files = [
            'audio/pineapple/pineapple15.wav',
            'audio/orange/orange15.wav',
            'audio/apple/apple15.wav',
            'audio/kiwi/kiwi15.wav',
            'pnp_test.wav'
            ]

In [8]:
for input_file in input_files:
      sampling_freq, audio = librosa.load(input_file)

        # Extract MFCC features
      mfcc_features = mfcc(y=sampling_freq, sr=audio)
      mfcc_features=mfcc_features[:,:15]

      scores=[]
      for item in hmm_models:
          hmm_model, label = item
            
          score = hmm_model.get_score(mfcc_features)
          scores.append(score)
      index=np.array(scores).argmax()
      print("\nTrue:", input_file[input_file.find('/')+1:input_file.rfind('/')])
      print("Predicted:", hmm_models[index][1]) 




True: pineapple
Predicted: pineapple

True: orange
Predicted: orange

True: apple
Predicted: apple

True: kiwi
Predicted: kiwi

True: pnp_test.wa
Predicted: apple
