# Practical 10

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os

import numpy as np

from scipy.io import wavfile
from hmmlearn import hmm

import librosa
from librosa.feature import mfcc

## Gaussian Hmm Model

In [12]:
class HMMTrainer(object):
    def __init__(self, model_name="GaussianHMM", n_components=4, cov_type="diag", n_iter=1000):
        self.model_name = model_name
        self.n_components = n_components
        self.cov_type = cov_type
        self.n_iter = n_iter
        self.models = []
        
        self.model = hmm.GaussianHMM(n_components=self.n_components, covariance_type=self.cov_type, n_iter=self.n_iter)
        
    # X is a 2D numpy array where each row is 130
    def train(self, X):
        np.seterr(all='ignore')
        self.models.append(self.model.fit(X))
        
    def get_score(self, input_data):
        return self.model.score(input_data)

In [6]:
input_folder = "Audio_data"
ls = os.listdir(input_folder)
ls = ls[:-1]
ls

['kiwi', 'banana', 'lime', 'pineapple', 'orange', 'apple', 'peach']

In [7]:
for dirname in ls:
    subfolder = os.path.join(input_folder, dirname)
    print("subfolder: ", subfolder)
    label = dirname
    print("label: ", label)

subfolder:  Audio_data/kiwi
label:  kiwi
subfolder:  Audio_data/banana
label:  banana
subfolder:  Audio_data/lime
label:  lime
subfolder:  Audio_data/pineapple
label:  pineapple
subfolder:  Audio_data/orange
label:  orange
subfolder:  Audio_data/apple
label:  apple
subfolder:  Audio_data/peach
label:  peach


In [13]:
hmm_models = []

for dirname in ls:
    subfolder = os.path.join(input_folder, dirname)
    
    X = np.array([])
    y_words = []
    
    for filename in [x for x in os.listdir(subfolder) if x.endswith('.wav')][:-1]:
        filepath = os.path.join(subfolder, filename)
        sampling_freq, audio = librosa.load(filepath)
        mfcc_features = mfcc(sampling_freq, audio)
        
        if len(X) == 0:
            X = mfcc_features[:, :15]
        else:
            X = np.append(X, mfcc_features[:,:15], axis=0)
            
        y_words.append(dirname)
        
    print("X shape: ", X.shape)

    hmm_trainer = HMMTrainer()
    hmm_trainer.train(X)
    hmm_models.append((hmm_trainer, dirname))
    hmm_trainer = None

X shape:  (280, 15)
X shape:  (280, 15)
X shape:  (280, 15)
X shape:  (280, 15)
X shape:  (280, 15)
X shape:  (280, 15)
X shape:  (280, 15)


In [17]:
# Text files
input_files = [
    "Audio_data/pineapple/pineapple15.wav",
    "Audio_data/orange/orange15.wav",
    "Audio_data/apple/apple15.wav",
    "Audio_data/kiwi/kiwi15.wav"
]

In [21]:
for input_file in input_files:
    sampling_freq, audio = librosa.load(input_file)
    
    # Extract MFCC features
    mfcc_features = mfcc(sampling_freq, audio)
    mfcc_features = mfcc_features[:, :15]
    
    scores = []
    for item in hmm_models:
        hmm_model, label = item
        
        score = hmm_model.get_score(mfcc_features)
        scores.append(score)
        
    index = np.array(scores).argmax()
    print("True file path: ", input_file)
    print("True name: ", input_file[input_file.find('/')+1:input_file.rfind('/')])
    
    print("Predicated: ", hmm_models[index][1])
    print("")

True file path:  Audio_data/pineapple/pineapple15.wav
True name:  pineapple
Predicated:  pineapple

True file path:  Audio_data/orange/orange15.wav
True name:  orange
Predicated:  orange

True file path:  Audio_data/apple/apple15.wav
True name:  apple
Predicated:  apple

True file path:  Audio_data/kiwi/kiwi15.wav
True name:  kiwi
Predicated:  kiwi

