In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt
import IPython.display as dsp
import numpy as np
import os
import shutil
import subprocess
import random
from hmmlearn.base import ConvergenceMonitor
from hmmlearn import hmm
from sklearn import preprocessing
from scipy.io.wavfile import read
from python_speech_features import mfcc
from python_speech_features import delta

In [None]:
path1 = "D:\\xltn\\data\\{}\\{}\\file{}.wav"
path2 = "D:\\xltn\\dataset\\{}\\file{}.wav"

In [None]:
def mfcc(wav_path, delta=2):
    """
    Read .wav files and calculate MFCC
    :param wav_path: path of audio file
    :param delta: derivative order, default order is 2
    :return: mfcc
    """
    y, sr = librosa.load(wav_path)
    # MEL frequency cepstrum coefficient
    mfcc_feat = librosa.feature.mfcc(y = y, sr = sr, n_mfcc = 13)
    ans = [mfcc_feat]
    # Calculate the 1st derivative
    if delta >= 1:
        mfcc_delta1 = librosa.feature.delta(mfcc_feat, order = 1, mode ='nearest')
        ans.append(mfcc_delta1)
    # Calculate the 2nd derivative
    if delta >= 2:
        mfcc_delta2 = librosa.feature.delta(mfcc_feat, order = 2, mode ='nearest')
        ans.append(mfcc_delta2)

    return np.transpose(np.concatenate(ans, axis = 0),[1,0])

In [None]:
input_folder = "D:\\xltn\\dataset\\"

In [None]:
def train_model_hmm(train_dir):
    train_files = [x for x in os.listdir(train_dir) if x.endswith('.wav')]
    X = np.array([])
    for file_name in train_files:
        file_path = os.path.join(train_dir, file_name)
        try:
            features_mfcc = mfcc(file_path)
            if len(X) == 0:
                X = features_mfcc
            else:
                try:
                    X = np.append(X, features_mfcc, axis=0)
                except:
                    pass
        except:
            print(file_path)
            pass
    model = hmm.GaussianHMM(n_components=4, covariance_type='diag', n_iter=1800)
    # fit hmm model
    np.seterr(all='ignore')
    model.fit(X)
    return model

In [None]:
# mỗi nhãn tạo một model 
hmm_models = []
for digit in os.listdir(input_folder):
    label = digit
    path = input_folder+ digit
    print(path)
    hmm_models.append((train_model_hmm(path), label))

D:\xltn\dataset\a
D:\xltn\dataset\b
D:\xltn\dataset\ban
D:\xltn\dataset\len
D:\xltn\dataset\nhay
D:\xltn\dataset\phai
D:\xltn\dataset\trai
D:\xltn\dataset\xuong


In [None]:
def predict_hmm(hmm_models, test_file):
    features_mfcc_test = mfcc(test_file)
    max_score = -float('inf')
    predicted_label = ""
    for item in hmm_models:
        model, label = item
        score = model.score(features_mfcc_test)
        if score > max_score:
            max_score = score
            predicted_label = label
    return predicted_label

In [None]:
test_paths = []
for path in os.listdir(input_folder):
    label = path
    arr = os.listdir(input_folder+path)
    test_arr = np.random.choice(arr,10)
    for test_file in test_arr:
        test_paths.append({
            "test_path" : input_folder+path+"\\"+test_file,
            "label": label})

In [None]:
predict_true = 0
for test in test_paths:
    predict_label = predict_hmm(hmm_models,test["test_path"])
    if predict_label == test["label"]:
        predict_true +=1

In [None]:
predict_true/len(test_paths)

0.9375