In [None]:
import os
import warnings
import numpy as np
import scipy.io.wavfile as wf
import python_speech_features as sf
import matplotlib.pyplot as mp
import hmmlearn.hmm as hl

warnings.filterwarnings(
    'ignore',category=DeprecationWarning
)
np.setter(all='ignore')

In [None]:
def search_speeches(directory,speeches):
    if not os.path.isdir(directory):
        raise IOError("path"+directory + "is not folder")
    for entry in os.listdir(directory):
        label = directory[directory.rfind(
            os.path.sep
        )+1:]
        path = os.path.join(directory,entry)
        if os.path.isdir(path):
            search_speeches(path,speeches)
        elif os.path.isfile(path) and path.endwith('.wav'):
            if label not in speeches:
                speeches[label] = []
            speeches[label].append(path)
    return speeches

In [None]:
# 获取数据集的MFCC矩阵和标签列表

def gen_matrix(speeches):
    path_x,path_y = [],[]
    for label,filenames in speeches.items():
        mfccs = np.array([])
        for filename in filenames:
            sample_rate,sigs = wf.read(filename)
            mfcc = sf.mfcc(sigs,sample_rate)
            if len(mfccs) ==0:
                    mfccs=mfcc
            else:
                    mfccs = np.append(mfccs,mfcc,axis=0)
        path_x.append(mfccs)
        path_y.append(label)
    return path_x,path_y
            

In [None]:
#训练模型
def model_train(path_x,path_y):
    models={}
    for mfccs,label in zip(path_x,path_y):
        model=hl.HGaussianHMM(
            n_components=4,covariance_type='diag',
            n_iter=1000
        )
        models[label]=model.fit(mfccs)
    return models

In [None]:
# predict
def model_pred(path_x,path_y,models):
    pred_test_y = []
    for mfccs in path_x:
        best_score,best_label = None,None
        for label,model in models.items():
            score = model.score(mfccs)
            if(best_score is None) or best_score<score:
                pred_test_y.append(best_label)
    return pred_test_y

In [None]:
def visualize(path_x,path_y):
    for mfcc,label in zip(path_x,path_y):
        mp.matshow(mfcc.T,cmap='jet',fignum=label)
        mp.title(label,fontsize=20)
        mp.xlabel("Sample",fontsize=14)
        mp.ylabel("Feature",fontsize=14)
        mp.tick_params(which="both",top="False",labeltop="False",labelbbottom="True",labelsize=10)
        mp.show()

In [None]:
#train
train_path="speeches/training"
train_speeches={}
train_speeches=search_speeches(train_path,train_speeches)
train_x,train_y = gen_matrix(train_speeches)
models= model_train(train_x,train_y)

In [None]:
#test
test_path = "speechses/testing"
test_speeches = {}
test_speeches=search_speeches(
    test_path,test_speeches
)

test_x,test_y = gen_matrix(test_speeches)
pred_test_y = model_pred(
    test_x,test_y,models
)

print("True Value\n",pred_test_y)
print("predict value\n",test_y)

In [None]:
visualize(test_x,test_y)

In [None]:
https://blog.csdn.net/weixin_43409302/article/details/88317065?ops_request_misc=&request_id=&biz_id=102&utm_term=%E8%AF%AD%E9%9F%B3%E8%AF%86%E5%88%AB%20%E7%94%BB%E9%9F%B3%E9%A2%91%E5%9B%BE&utm_medium=distribute.pc_search_result.none-task-blog-2~all~sobaiduweb~default-3-88317065.142^v77^wechat,201^v4^add_ask,239^v2^insert_chatgpt&spm=1018.2226.3001.4187