In [None]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
import random
from collections import defaultdict

result = defaultdict(list)

path_to_data = "./Data_Filtered"

In [None]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    ls = os.listdir(data_dir)
    files = [f for f in ls if f.endswith(".wav")]
    random.shuffle(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files]
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [None]:
class_names = ["ThanhPho", "Nha", "Me", "YTe", "Hoc",]# "test_ThanhPho", "test_Me", "test_Nha", "test_YTe", "test_Hoc",]

datas = {}
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset", end=' - ')
    datas[cname] = get_class_data(os.path.join(path_to_data, cname))
    print(len(datas[cname]))
    datas[f"test_{cname}"] = datas[cname][-20:]
    datas[cname] = datas[cname][:-20]
    #datas[f"test_{cname}"] = get_class_data(os.path.join(path_to_data, f"test_{cname}"))
    
print("Done!!!")

In [None]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in datas.items() if k[:4]!='test'], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

In [None]:
dict_components = {
    #   tʰa̤jŋ˨˩ fo˧˥ -> 5 âm vị -> 15 states
    "ThanhPho": 15,
    #  mɛ̰ʔ˨˩ -> 2 âm vị -> 6 states
    "Me": 6,
    #  i˧˧ te˧˥ -> 3 âm vị -> 9 states
    "YTe": 9,
    #  ha̰ʔwk˨ -> 3 âm vị -> 9 states
    "Hoc": 9,
    #  ɲa̤ː˨˩ -> 3 âm vị -> 9 states
    "Nha": 9,
} 

dict_transmat = {
    "ThanhPho": [[0.7, 0.,  0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.1, 0. , 0.,  0. , 0. , 0.2,],
                 [0. , 1.,  0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.,  0. , 0. , 0. ,],
                 [0.2, 0.,  0.1, 0.1, 0. , 0.1, 0.2, 0. , 0. , 0.1, 0.2, 0.,  0. , 0. , 0. ,],
                 [0. , 0.,  0. , 0.9, 0. , 0. , 0. , 0. , 0. , 0. , 0.1, 0.,  0. , 0. , 0. ,],
                 [0. , 0.,  0. , 0.1, 0.9, 0. , 0. , 0. , 0. , 0. , 0. , 0.,  0. , 0. , 0. ,],
                 [0. , 0.,  0. , 0. , 0. , 1. , 0. , 0. , 0. , 0. , 0. , 0.,  0. , 0. , 0. ,],
                 [0. , 0.,  0. , 0. , 0. , 0. , 0.8, 0. , 0.1, 0. , 0. , 0.,  0. , 0. , 0. ,],
                 [0. , 0.,  0. , 0. , 0. , 0. , 0. , 1. , 0. , 0. , 0. , 0.,  0. , 0. , 0. ,],
                 [0. , 0.,  0.1, 0. , 0. , 0. , 0. , 0. , 0.7, 0. , 0.1, 0.,  0.1, 0. , 0. ,],
                 [0. , 0.,  0. , 0. , 0. , 0. , 0.1, 0. , 0. , 0.9, 0. , 0.,  0. , 0. , 0. ,],
                 [0. , 0.,  0. , 0. , 0.1, 0. , 0. , 0. , 0.1, 0. , 0.8, 0.,  0. , 0. , 0. ,],
                 [0. , 0.,  0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 1.,  0. , 0. , 0. ,],
                 [0. , 0.,  0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.,  0.9, 0. , 0.1 ,],
                 [0. , 0.,  0. , 0. , 0. , 0. , 0. , 0.1, 0. , 0. , 0. , 0.,  0. , 0.3, 0.7,],
                 [0. , 0.,  0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.,  0. , 0. , 1. ,],],


    "Nha":  [[0.,  0. , 0.6, 0.4, 0. , 0. , 0. , 0.,  0. ,],
             [0.,  0.7, 0. , 0. , 0.2, 0.1, 0. , 0.,  0. ,],
             [0.,  0. , 0.9, 0. , 0. , 0. , 0. , 0.,  0.1,],
             [0.,  0. , 0. , 1. , 0. , 0. , 0. , 0.,  0. ,],
             [0.,  0. , 0. , 0. , 1. , 0. , 0. , 0.,  0. ,],
             [0.,  0. , 0. , 0. , 0. , 0.9, 0. , 0.,  0. ,],
             [0.,  0. , 0. , 0. , 0. , 0. , 0.9, 0.,  0.1,],
             [0.,  0. , 0. , 0. , 0. , 0. , 0. , 1.,  0. ,],
             [0.,  0. , 0.1, 0. , 0. , 0. , 0. , 0.,  0.8,],],


    "Me":   [[0.867, 0.   , 0.   , 0.133, 0.   , 0.   ,],
             [0.   , 0.906, 0.   , 0.046, 0.049, 0.   ,],
             [0.056, 0.   , 0.944, 0.   , 0.   , 0.   ,],
             [0., 1., 0., 0., 0.   , 0.   ,],
             [0., 0., 0.   , 0.0, 1., 0.,],
             [0.   , 0.   , 0.013, 0.   , 0.018, 0.97 ,],],
 
    "YTe":   [[0.,  0. , 0. , 0.5, 0. , 0. , 0.5, 0.,  0. ,],
             [0.,  1. , 0. , 0. , 0. , 0. , 0. , 0.,  0. ,],
             [0.,  0. , 0.9, 0. , 0. , 0. , 0. , 0.,  0.1,],
             [0.,  0. , 0. , 1. , 0. , 0. , 0. , 0.,  0. ,],
             [0.,  0. , 0. , 0. , 0.9, 0. , 0. , 0.,  0. ,],
             [0.,  0. , 0.1, 0. , 0. , 0.9, 0.1, 0.,  0. ,],
             [0.,  0. , 0. , 0. , 0. , 0. , 0.9, 0.,  0. ,],
             [0.,  0.6, 0. , 0. , 0. , 0. , 0.4, 0.,  0. ,],
             [0.,  0. , 0. , 0. , 0.1, 0. , 0. , 0.,  0.8,],],
 
    "Hoc":  [[0.3, 0. , 0.2, 0.4, 0. , 0.,  0. , 0.1, 0. ,],
             [0. , 0.7, 0. , 0.2, 0.1, 0.,  0. , 0. , 0. ,],
             [0.2, 0. , 0.4, 0.1, 0. , 0.,  0.2, 0.1, 0. ,],
             [0.1, 0. , 0. , 0.8, 0. , 0.,  0. , 0. , 0. ,],
             [0. , 0.1, 0. , 0. , 0.9, 0.,  0. , 0. , 0. ,],
             [0. , 0. , 0. , 0. , 0. , 1.,  0. , 0. , 0. ,],
             [0. , 0. , 0.1, 0. , 0. , 0.,  0.9, 0. , 0. ,],
             [0.3, 0. , 0.2, 0.3, 0. , 0.,  0.1, 0.2, 0. ,],
             [0. , 0. , 0. , 0. , 0.1, 0.,  0. , 0. , 0.9,],],
}

In [None]:
models = {}
for cname in class_names:
    class_vectors = datas[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    km = kmeans
    dataset[cname] = list([km.predict(v).reshape((-1,1)) for v in datas[cname]])
    dataset[f"test_{cname}"] = list([km.predict(v).reshape((-1,1)) for v in datas[f"test_{cname}"]])

    if cname[:4] != 'test':
        n = dict_components[cname]
        startprob = np.zeros(n)
        startprob[0] = 1.0
        transmat=np.diag(np.full(n,1))
        #transmat = np.array(dict_transmat[cname])
        
        hmm = hmmlearn.hmm.MultinomialHMM(
            n_components=n, random_state=0, n_iter=1000, verbose=True,
            startprob_prior=startprob,
            transmat_prior=transmat,
            init_params='e', params='te'
        )
    
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
print("Training done")

In [None]:
def max_score(score):
    res = "None";
    for e in score:
        if res == "None":
            res = e
        else:
            if (score[e]>score[res]):
                res = e
    return res
    
print("Testing")
percent = {}
for cname in class_names:
    true_cname = f"test_{cname}"
    
    print(true_cname,len(datas[true_cname]))
    dc = 0
    for O in dataset[true_cname]:
        score = {cname : round(model.score(O, [len(O)]),3) for cname, model in models.items()}            
        if (max_score(score)==true_cname[5:]): dc+=1
        print(true_cname,score,(max_score(score)==true_cname[5:]))
    print()
    percent[true_cname] = f"{dc}/{len(datas[true_cname])}"

        
for k, v in percent.items():
    print(k,v)

In [None]:
np.set_printoptions(precision=3, suppress=True)
for k, v in models.items():
    print(k,v.transmat_)

In [None]:
import pickle
model_ver = {}
for i in range(1,12):
    model_ver[i] = pickle.load(open(f"models_v{i}.pkl", "rb"))



In [None]:
print(model_ver[1]["Nha"].transmat_)
print()
print(model_ver[1]["Nha"].startprob_)

In [None]:
print(model_ver[1]["ThanhPho"].transmat_)
print()
print(model_ver[1]["ThanhPho"].startprob_)

In [None]:
print(model_ver[1]["YTe"].transmat_)
print()
print(model_ver[1]["YTe"].startprob_)

In [None]:
print(model_ver[1]["Hoc"].transmat_)
print()
print(model_ver[1]["Hoc"].startprob_)

In [None]:
print(model_ver[11]["Me"].transmat_)
print()
print(model_ver[11]["Me"].startprob_)

In [44]:
def find_true_predict(pre_list):
    res = "None"
    dc = {"Nha":0,"ThanhPho":0,"Hoc":0,"Me":0,"YTe":0}
    for e in pre_list:
        dc[e]+=1
    cmax = 0
    for e in dc:
        if dc[e]>cmax:
            cmax = dc[e]
    
    for e in pre_list:
        if (dc[e]==cmax):
            res = e
    return res

print("Testing")
percent = {}
for cname in class_names:
    true_cname = f"test_{cname}"
    
    print(true_cname,len(datas[true_cname]))
    dc = 0
    for O in dataset[true_cname]:
        predict_name = "None";
        predict_list = []
        for i in range(1,12):
            score = {cname : round(model.score(O, [len(O)]),3) for cname, model in model_ver[i].items()}            
            predict_list.append(max_score(score))
        predict_name = find_true_predict(predict_list)
        
        if (predict_name==cname):
            dc+=1
        percent[true_cname] = f"{dc}/{len(datas[true_cname])}"
        
for k, v in percent.items():
    print(k,v)

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


Testing
test_ThanhPho 20


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

test_Nha 20


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

test_Me 20


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

test_YTe 20


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

test_Hoc 20


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

test_ThanhPho 20/20
test_Nha 0/20
test_Me 0/20
test_YTe 0/20
test_Hoc 0/20
