# HMM

In [1]:
# import thư viện
import librosa
import librosa.display
import matplotlib.pyplot as plt
import IPython.display as dsp
import numpy as np
import os
import shutil
import subprocess
import random
from hmmlearn.base import ConvergenceMonitor
from hmmlearn import hmm
from sklearn import preprocessing
from scipy.io.wavfile import read

In [2]:
# Hàm tính đặc trưng MFCC
def mfcc(wav_path, delta=2):
    y, sr = librosa.load(wav_path)
    # MEL frequency cepstrum coefficient
    mfcc_feat = librosa.feature.mfcc(y = y, sr = sr, n_mfcc = 13)
    ans = [mfcc_feat]
    # Calculate the 1st derivative
    if delta >= 1:
        mfcc_delta1 = librosa.feature.delta(mfcc_feat, order = 1, mode ='nearest')
        ans.append(mfcc_delta1)
    # Calculate the 2nd derivative
    if delta >= 2:
        mfcc_delta2 = librosa.feature.delta(mfcc_feat, order = 2, mode ='nearest')
        ans.append(mfcc_delta2)

    return np.transpose(np.concatenate(ans, axis = 0),[1,0])

In [3]:
input_folder = "D:\\xltn\\dataset\\"

In [4]:
# Hàm train model cho từng nhãn 
def train_model_hmm(train_dir):
    train_files = [x for x in os.listdir(train_dir) if x.endswith('.wav')]
    X = np.array([])
    for file_name in train_files:
        file_path = os.path.join(train_dir, file_name)
        try:
            features_mfcc = mfcc(file_path)
            if len(X) == 0:
                X = features_mfcc
            else:
                try:
                    X = np.append(X, features_mfcc, axis=0)
                except:
                    pass
        except:
            print(file_path)
            pass
    model = hmm.GaussianHMM(n_components=4, covariance_type='diag', n_iter=1800)
    # fit hmm model
    np.seterr(all='ignore')
    model.fit(X)
    return model

In [5]:
# mỗi nhãn tạo một model 
hmm_models = []
for digit in os.listdir(input_folder):
    label = digit
    path = input_folder+ digit
    print(path)
    hmm_models.append((train_model_hmm(path), label))

D:\xltn\dataset\a
D:\xltn\dataset\b
D:\xltn\dataset\ban
D:\xltn\dataset\len
D:\xltn\dataset\nhay
D:\xltn\dataset\phai
D:\xltn\dataset\trai
D:\xltn\dataset\xuong


In [6]:
# Hàm dự đoán
def predict_hmm(hmm_models, test_file):
    features_mfcc_test = mfcc(test_file)
    max_score = -float('inf')
    predicted_label = ""
    for item in hmm_models:
        model, label = item
        score = model.score(features_mfcc_test)
        if score > max_score:
            max_score = score
            predicted_label = label
    return predicted_label

In [17]:
# test độ chính xác của model
test_paths = []
for path in os.listdir(input_folder):
    label = path
    arr = os.listdir(input_folder+path)
    test_arr = np.random.choice(arr,100)
    for test_file in test_arr:
        test_paths.append({
            "test_path" : input_folder+path+"\\"+test_file,
            "label": label})

In [18]:
predict_true = 0
predict_false = {'a':0, 'b':0, 'len':0, 'xuong':0, 'trai':0, 'phai':0, 'ban':0, 'nhay':0}
for test in test_paths:
    predict_label = predict_hmm(hmm_models,test["test_path"])
    if predict_label == test["label"]:
        predict_true +=1
    else:
        predict_false[test["label"]]+=1

In [15]:
predict_false

{'a': 3,
 'b': 3,
 'len': 4,
 'xuong': 0,
 'trai': 1,
 'phai': 5,
 'ban': 4,
 'nhay': 4}

In [21]:
print('độ chính xác: {:.2f}%\n'.format(predict_true/len(test_paths)*100))
print("Số lần nhận diện sai của từng nhãn:")
print(predict_false)

độ chính xác: 93.50%

Số lần nhận diện sai của từng nhãn:
{'a': 3, 'b': 7, 'len': 8, 'xuong': 0, 'trai': 1, 'phai': 14, 'ban': 14, 'nhay': 5}


In [78]:
import time
import sounddevice as sd
from scipy.io.wavfile import write
import wavio as wv
  
# Sampling frequency
freq = 22050
  
# Recording duration
duration = 1

time.sleep(3)
# Start recorder with the given values 
# of duration and sample frequency
print("start recording")
recording = sd.rec(int(duration * freq), 
                   samplerate=freq, channels=2)
  
# Record audio for the given number of seconds
sd.wait()
# This will convert the NumPy array to an audio
# file with the given sampling frequency
write("recording0.wav", freq, recording)

print("end recording")
predict_hmm(hmm_models,"D:\\xltn\\recording0.wav")

start recording
end recording


'nhay'

'nhay'