In [59]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

## lấy mfcc của file wav

In [60]:
# read file
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

## lấy mfcc của tất cả các file trong dir

In [61]:
# lấy mfcc của tất cả các file wav trong wav
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

## Hàm Clustering

In [62]:
def clustering(X, n_clusters=14):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

## Data

In [63]:
class_names = ['bệnh nhân', 'chúng ta', 'có thể', 'người','Việt Nam']
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join(cname))

Load bệnh nhân dataset
Load chúng ta dataset
Load có thể dataset
Load người dataset
Load Việt Nam dataset


In [64]:
print(len(dataset['Việt Nam']))

100


## split train test

In [65]:
trainset = {}
testset = {}
n_test = {'bệnh nhân': 0, 'chúng ta': 0, 'có thể': 0, 'người': 0,'Việt Nam': 0}
for cname in class_names:
    n = len(dataset[cname])
    n_train = math.floor(n*0.7)
    trainset[cname] = dataset[cname][:n_train]
    testset[cname] = dataset[cname][n_train:]
    n_test[cname] += len(testset[cname])
    
print(len(trainset['Việt Nam']))

70


## Fit kmeans trên tập train

In [66]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in trainset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print(kmeans)

vectors (12727, 36)
centers (14, 36)
centers (14, 36)
KMeans(n_clusters=14, n_init=50, random_state=0)


In [67]:
for cname in class_names:
    trainset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in trainset[cname]])
    testset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in testset[cname]])

    

# Train

In [68]:
models = {}

## Models cho 'bệnh nhân' 6x3

In [69]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=6*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xbn = np.concatenate(trainset['bệnh nhân'])
lengths = list([len(x) for x in trainset['bệnh nhân']])
print("training class", 'bệnh nhân')
print(Xbn.shape, lengths, len(lengths))
hmm.fit(Xbn, lengths=lengths)
models['bệnh nhân'] = hmm
print("Training done")

training class bệnh nhân
(2832, 1) [37, 37, 40, 38, 47, 35, 41, 41, 44, 32, 35, 28, 32, 44, 47, 33, 34, 45, 51, 41, 62, 40, 44, 47, 52, 39, 54, 35, 41, 43, 47, 41, 51, 39, 38, 38, 43, 44, 55, 45, 26, 29, 43, 33, 30, 37, 30, 45, 42, 42, 45, 41, 44, 52, 41, 43, 41, 36, 38, 46, 46, 34, 33, 35, 37, 28, 30, 45, 36, 44] 70


         1       -7223.0905             +nan
         2       -5472.7995       +1750.2910
         3       -4642.8698        +829.9297
         4       -4276.3727        +366.4972
         5       -4117.9940        +158.3787
         6       -4029.8556         +88.1384
         7       -3968.7423         +61.1133
         8       -3930.0567         +38.6856
         9       -3908.5784         +21.4783
        10       -3896.7074         +11.8710
        11       -3889.0705          +7.6370
        12       -3883.3960          +5.6745
        13       -3878.7992          +4.5968
        14       -3874.6894          +4.1098
        15       -3870.5049          +4.1845
        16       -3865.8640          +4.6409
        17       -3859.5637          +6.3003
        18       -3852.1993          +7.3644
        19       -3848.1732          +4.0262
        20       -3845.8315          +2.3417
        21       -3844.3200          +1.5115
        22       -3843.2445          +1.0755
        23

Training done


        38       -3840.1294          +0.0093


## Models cho 'chúng ta' 5x3

In [70]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=5*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xct = np.concatenate(trainset['chúng ta'])
lengths = list([len(x) for x in trainset['chúng ta']])
print("training class", 'chúng ta')
print(Xct.shape, lengths, len(lengths))
hmm.fit(Xct, lengths=lengths)
models['chúng ta'] = hmm
print("Training done")

         1       -7044.3207             +nan
         2       -5159.1727       +1885.1480


training class chúng ta
(2759, 1) [41, 38, 28, 46, 47, 36, 43, 34, 36, 42, 36, 40, 36, 37, 30, 40, 36, 41, 36, 37, 36, 42, 40, 35, 39, 38, 38, 41, 36, 44, 38, 59, 49, 37, 31, 43, 52, 42, 50, 48, 46, 54, 55, 47, 45, 35, 38, 38, 42, 38, 43, 33, 35, 32, 42, 32, 39, 41, 40, 37, 35, 37, 47, 31, 34, 36, 34, 34, 31, 30] 70


         3       -4601.1399        +558.0328
         4       -4036.8634        +564.2765
         5       -3869.1592        +167.7043
         6       -3810.2204         +58.9388
         7       -3765.1377         +45.0827
         8       -3723.3165         +41.8212
         9       -3685.1682         +38.1483
        10       -3641.4592         +43.7090
        11       -3582.1072         +59.3521
        12       -3532.5856         +49.5216
        13       -3506.7393         +25.8463
        14       -3494.8012         +11.9381
        15       -3489.1477          +5.6535
        16       -3486.2527          +2.8950
        17       -3476.9297          +9.3230
        18       -3466.6734         +10.2563
        19       -3464.3143          +2.3592
        20       -3463.1775          +1.1367
        21       -3462.3207          +0.8569
        22       -3461.5882          +0.7325
        23       -3460.9540          +0.6341
        24       -3460.3728          +0.5812
        25

Training done


        72       -3385.7990          +0.0082


## Models cho 'có thể' 4x3

In [71]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=4*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xcth = np.concatenate(trainset['có thể'])
lengths = list([len(x) for x in trainset['có thể']])
print("training class", 'có thể')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['có thể'] = hmm
print("Training done")

         1       -6530.9960             +nan
         2       -4602.2109       +1928.7851
         3       -3927.9620        +674.2489

training class có thể
(2537, 1) [34, 31, 56, 39, 44, 40, 46, 35, 45, 47, 37, 56, 36, 31, 39, 35, 42, 30, 30, 37, 36, 39, 26, 34, 47, 36, 41, 34, 26, 30, 25, 27, 16, 39, 27, 52, 25, 27, 20, 22, 36, 53, 44, 44, 28, 57, 53, 36, 37, 30, 34, 38, 51, 43, 40, 47, 38, 44, 102, 44, 52, 56, 35, 37, 39] 65



         4       -3624.4522        +303.5098
         5       -3511.2653        +113.1868
         6       -3460.2929         +50.9724
         7       -3425.6037         +34.6892
         8       -3403.3312         +22.2725
         9       -3390.8124         +12.5188
        10       -3383.1510          +7.6614
        11       -3378.2240          +4.9271
        12       -3375.0501          +3.1739
        13       -3372.8605          +2.1896
        14       -3371.2031          +1.6575
        15       -3369.7849          +1.4182
        16       -3368.4206          +1.3643
        17       -3367.0384          +1.3822
        18       -3365.7054          +1.3330
        19       -3364.5623          +1.1431
        20       -3363.6784          +0.8839
        21       -3363.0213          +0.6571
        22       -3362.5166          +0.5047
        23       -3362.0957          +0.4209
        24       -3361.7113          +0.3845
        25       -3361.3368          +0.3744
        2

Training done


        80       -3345.3045          +0.0094


## Models cho 'người' 3x3

In [72]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=3*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xcth = np.concatenate(trainset['người'])
lengths = list([len(x) for x in trainset['người']])
print("training class", 'người')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['người'] = hmm
print("Training done")

         1       -5065.2120             +nan
         2       -3665.5691       +1399.6428
         3       -3265.0447        +400.5244


training class người
(1951, 1) [29, 27, 25, 18, 18, 19, 20, 22, 26, 23, 31, 30, 21, 19, 18, 22, 15, 29, 26, 19, 22, 51, 37, 42, 37, 18, 32, 62, 26, 32, 21, 28, 43, 23, 25, 36, 42, 20, 18, 20, 21, 38, 22, 22, 50, 18, 18, 21, 16, 19, 23, 19, 23, 17, 24, 27, 21, 21, 13, 22, 36, 35, 26, 30, 35, 28, 18, 58, 23, 35, 25, 27, 28] 73


         4       -3154.9046        +110.1401
         5       -3108.6905         +46.2142
         6       -3083.4153         +25.2752
         7       -3070.8691         +12.5461
         8       -3064.7982          +6.0710
         9       -3061.0689          +3.7293
        10       -3058.0502          +3.0187
        11       -3054.6938          +3.3564
        12       -3050.2241          +4.4697
        13       -3045.6388          +4.5853
        14       -3043.0837          +2.5550
        15       -3041.8205          +1.2633
        16       -3040.8378          +0.9826
        17       -3039.8340          +1.0039
        18       -3038.6254          +1.2086
        19       -3036.9917          +1.6337
        20       -3034.5894          +2.4023
        21       -3030.8041          +3.7852
        22       -3024.4828          +6.3213
        23       -3013.6116         +10.8712
        24       -2994.5073         +19.1043
        25       -2959.1134         +35.3939
        26

Training done


        71       -2775.5528          +0.0221
        72       -2775.5381          +0.0147
        73       -2775.5281          +0.0100


## Models cho 'Việt Nam' 6x3

In [73]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=6*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xvn = np.concatenate(trainset['Việt Nam'])
lengths = list([len(x) for x in trainset['Việt Nam']])
print("training class", 'Việt Nam')
print(Xvn.shape, lengths, len(lengths))
hmm.fit(Xvn, lengths=lengths)
models['Việt Nam'] = hmm
print("Training done")

         1       -6700.7900             +nan


training class Việt Nam
(2648, 1) [42, 34, 48, 36, 42, 34, 37, 42, 32, 38, 35, 40, 44, 37, 38, 37, 31, 27, 29, 32, 28, 22, 26, 32, 37, 39, 27, 33, 29, 41, 34, 30, 31, 36, 26, 27, 40, 27, 37, 26, 59, 53, 46, 32, 35, 30, 57, 36, 35, 60, 34, 57, 35, 44, 33, 39, 31, 28, 35, 43, 57, 56, 47, 44, 44, 47, 42, 41, 44, 41] 70


         2       -4929.0747       +1771.7152
         3       -4203.8622        +725.2126
         4       -3891.2967        +312.5655
         5       -3744.2032        +147.0935
         6       -3648.6890         +95.5142
         7       -3580.4008         +68.2881
         8       -3509.2174         +71.1834
         9       -3452.3371         +56.8803
        10       -3426.6850         +25.6521
        11       -3417.5570          +9.1280
        12       -3411.3071          +6.2499
        13       -3406.8691          +4.4380
        14       -3403.6226          +3.2465
        15       -3400.6910          +2.9316
        16       -3397.3727          +3.3183
        17       -3392.8389          +4.5339
        18       -3385.3553          +7.4836
        19       -3376.5732          +8.7821
        20       -3371.4160          +5.1572
        21       -3368.1726          +3.2434
        22       -3359.9211          +8.2514
        23       -3352.6704          +7.2507
        24

Training done


       110       -3314.9886          +0.0072


# Test

In [74]:
print("Testing")
n_correct = {'bệnh nhân': 0, 'chúng ta': 0, 'có thể': 0, 'người': 0,'Việt Nam': 0}
for true_cname in class_names:
    for O in testset[true_cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
        if (true_cname == max(score, key=score.get)): n_correct[true_cname] += 1
        print(true_cname, score, 'predict:', max(score, key=score.get))

Testing
bệnh nhân {'bệnh nhân': -49.58661772664467, 'chúng ta': -188.53183651398777, 'có thể': -86.42677862368798, 'người': -367.9746586590195, 'Việt Nam': -135.2271050626424} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -49.0426219928118, 'chúng ta': -176.2720638594681, 'có thể': -86.27025790202997, 'người': -249.0644487606346, 'Việt Nam': -229.04633553555564} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -80.64786691783327, 'chúng ta': -1170.7533651831202, 'có thể': -707.378005350463, 'người': -820.9324272053892, 'Việt Nam': -755.9780220549138} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -79.65551706229904, 'chúng ta': -inf, 'có thể': -597.2042376568389, 'người': -694.8286309344271, 'Việt Nam': -inf} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -85.38552642759966, 'chúng ta': -inf, 'có thể': -423.50639178900315, 'người': -1095.3055146519039, 'Việt Nam': -inf} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -66.08848752660948, 'chúng ta': -412.5394151686034, 'có thể': -141.47483080923942, 'người':

In [75]:
for cname in class_names:
    print('Accuracy:', cname, n_correct[cname]/n_test[cname])

Accuracy: bệnh nhân 0.9333333333333333
Accuracy: chúng ta 0.9666666666666667
Accuracy: có thể 0.896551724137931
Accuracy: người 0.9375
Accuracy: Việt Nam 0.9666666666666667


In [76]:
print('All Accuracy:', sum(n_correct.values())/sum(n_test.values()))

All Accuracy: 0.9403973509933775


In [77]:
print(n_test)

{'bệnh nhân': 30, 'chúng ta': 30, 'có thể': 29, 'người': 32, 'Việt Nam': 30}


In [78]:
np.around(models['người'].transmat_, 2)

array([[0.76, 0.12, 0.11, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.81, 0.18, 0.01, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.81, 0.19, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.72, 0.05, 0.23, 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.88, 0.1 , 0.02, 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.78, 0.  , 0.22, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.77, 0.23],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])

## Test dữ liệu nói từ micro

In [79]:
testset1 = {}
n_test1 = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    testset1[cname] = get_class_data(os.path.join('test',cname+'_test'))
    n_test1[cname] = len(testset1[cname])

Load bệnh nhân dataset
Load chúng ta dataset
Load có thể dataset
Load người dataset
Load Việt Nam dataset


In [80]:
for cname in class_names:
    testset1[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in testset1[cname]])

In [81]:
print("Testing")
n_correct1 = {'bệnh nhân': 0, 'chúng ta': 0, 'có thể': 0, 'người': 0,'Việt Nam': 0}
for true_cname in class_names:
    for O in testset1[true_cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
        if (true_cname == max(score, key=score.get)): n_correct1[true_cname] += 1
        print(true_cname, score, 'predict:', max(score, key=score.get))

Testing
bệnh nhân {'bệnh nhân': -50.77025130652266, 'chúng ta': -608.8803512490299, 'có thể': -564.8924229053771, 'người': -90.77514570476872, 'Việt Nam': -120.19128588972897} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -77.95194918849823, 'chúng ta': -278.48909238408663, 'có thể': -147.53313741482265, 'người': -196.61702199664532, 'Việt Nam': -200.4668795333769} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -81.03682522154311, 'chúng ta': -197.4698218203992, 'có thể': -223.85823153191578, 'người': -94.84705306865798, 'Việt Nam': -179.6560911155945} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -66.12225144065061, 'chúng ta': -767.8995443091987, 'có thể': -115.50747104763099, 'người': -110.75724900017272, 'Việt Nam': -146.1024010995987} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -98.09565704146492, 'chúng ta': -514.3037685307767, 'có thể': -181.4526260048142, 'người': -114.1012306801615, 'Việt Nam': -485.3247313153303} predict: bệnh nhân
chúng ta {'bệnh nhân': -4088.8480266768734, 'chúng ta':

In [82]:
for cname in class_names:
    print('Accuracy:', cname, n_correct1[cname]/n_test1[cname])

Accuracy: bệnh nhân 1.0
Accuracy: chúng ta 0.8333333333333334
Accuracy: có thể 0.6
Accuracy: người 1.0
Accuracy: Việt Nam 0.0


In [83]:
print(n_test1)

{'bệnh nhân': 5, 'chúng ta': 6, 'có thể': 5, 'người': 5, 'Việt Nam': 6}
