In [42]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

## lấy mfcc của file wav

In [43]:
# read file
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

## lấy mfcc của tất cả các file trong dir

In [44]:
# lấy mfcc của tất cả các file wav trong wav
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

## Hàm Clustering

In [45]:
def clustering(X, n_clusters=14):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

## Data

In [46]:
class_names = ['bệnh nhân', 'chúng ta', 'có thể', 'người','Việt Nam']
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join(cname))

Load bệnh nhân dataset
Load chúng ta dataset
Load có thể dataset
Load người dataset
Load Việt Nam dataset


## Test

In [67]:
testset1 = {}
for cname in class_names:
    print(f"Load {cname} testset ")
    testset1[cname] = get_class_data(os.path.join('test',cname + '_test'))

Load benh nhan testset
Load chung ta testset
Load co the testset
Load nguoi testset
Load viet nam testset


In [69]:
print(len(testset1['viet nam']))

6


In [47]:
print(len(dataset['Việt Nam']))

100


## split train test

In [48]:
trainset = {}
testset = {}
n_test = {'bệnh nhân': 0, 'chúng ta': 0, 'có thể': 0, 'người': 0,'Việt Nam': 0}
for cname in class_names:
    n = len(dataset[cname])
    n_train = math.floor(n*0.8)
    trainset[cname] = dataset[cname][:n_train]
    testset[cname] = dataset[cname][n_train:]
    n_test[cname] += len(testset[cname])
    
print(len(trainset['Việt Nam']))

80


## Fit kmeans trên tập train

In [49]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in trainset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print(kmeans)

vectors (14589, 36)
centers (14, 36)
centers (14, 36)
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=14, n_init=50, n_jobs=None, precompute_distances='auto',
       random_state=0, tol=0.0001, verbose=0)


In [50]:
for cname in class_names:
    trainset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in trainset[cname]])
    testset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in testset[cname]])

    

# Train

In [51]:
models = {}

## Models cho 'bệnh nhân' 6x3

In [52]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=6*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xbn = np.concatenate(trainset['bệnh nhân'])
lengths = list([len(x) for x in trainset['bệnh nhân']])
print("training class", 'bệnh nhân')
print(Xbn.shape, lengths, len(lengths))
hmm.fit(Xbn, lengths=lengths)
models['bệnh nhân'] = hmm
print("Training done")

training class bệnh nhân
(3168, 1) [39, 29, 38, 32, 55, 48, 41, 41, 41, 47, 51, 43, 44, 63, 37, 35, 44, 40, 32, 41, 32, 41, 41, 31, 40, 44, 45, 30, 38, 35, 45, 48, 32, 35, 46, 33, 37, 51, 38, 45, 35, 35, 33, 29, 37, 47, 42, 43, 47, 44, 35, 43, 54, 41, 33, 36, 30, 33, 41, 43, 46, 41, 37, 40, 45, 26, 28, 52, 41, 34, 45, 52, 39, 44, 34, 30, 28, 34, 32, 31] 80


         1       -8104.0133             +nan
         2       -6403.5705       +1700.4428
         3       -5741.2909        +662.2795
         4       -5333.9836        +407.3073
         5       -5166.7133        +167.2703
         6       -5090.6523         +76.0610
         7       -5045.1973         +45.4549
         8       -5014.8130         +30.3843
         9       -4987.8320         +26.9810
        10       -4937.4943         +50.3377
        11       -4869.4459         +68.0484
        12       -4825.1835         +44.2624
        13       -4809.5165         +15.6671
        14       -4799.4337         +10.0828
        15       -4791.9571          +7.4765
        16       -4784.8884          +7.0688
        17       -4776.3315          +8.5569
        18       -4763.7450         +12.5865
        19       -4752.4487         +11.2964
        20       -4743.0114          +9.4373
        21       -4731.7480         +11.2634
        22       -4718.0819         +13.6661
        23

Training done


        64       -4584.3542          +0.0120
        65       -4584.3456          +0.0086


## Models cho 'chúng ta' 5x3

In [53]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=5*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xct = np.concatenate(trainset['chúng ta'])
lengths = list([len(x) for x in trainset['chúng ta']])
print("training class", 'chúng ta')
print(Xct.shape, lengths, len(lengths))
hmm.fit(Xct, lengths=lengths)
models['chúng ta'] = hmm
print("Training done")

         1       -8246.4231             +nan


training class chúng ta
(3202, 1) [34, 31, 34, 75, 33, 35, 59, 39, 36, 38, 37, 37, 41, 42, 41, 35, 34, 37, 34, 32, 31, 40, 30, 37, 38, 39, 38, 55, 35, 54, 40, 40, 52, 36, 43, 37, 44, 41, 37, 39, 50, 32, 46, 47, 38, 46, 42, 39, 36, 41, 47, 28, 36, 47, 41, 43, 32, 41, 38, 42, 36, 45, 30, 32, 36, 48, 43, 57, 37, 36, 34, 41, 51, 35, 38, 37, 39, 43, 37, 45] 80


         2       -6325.1707       +1921.2524
         3       -5643.0385        +682.1322
         4       -5342.5628        +300.4758
         5       -5176.3903        +166.1725
         6       -5079.7069         +96.6834
         7       -5028.7228         +50.9840
         8       -4999.2671         +29.4558
         9       -4982.5717         +16.6954
        10       -4964.9996         +17.5721
        11       -4948.8108         +16.1888
        12       -4926.7071         +22.1037
        13       -4901.9525         +24.7546
        14       -4888.8915         +13.0611
        15       -4883.1434          +5.7481
        16       -4879.4709          +3.6725
        17       -4876.9898          +2.4810
        18       -4875.2653          +1.7246
        19       -4873.2343          +2.0309
        20       -4869.6290          +3.6053
        21       -4860.3579          +9.2711
        22       -4828.5447         +31.8132
        23       -4762.2344         +66.3104
        24

Training done


        63       -4565.4606          +0.0052


## Models cho 'có thể' 4x3

In [54]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=4*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xcth = np.concatenate(trainset['có thể'])
lengths = list([len(x) for x in trainset['có thể']])
print("training class", 'có thể')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['có thể'] = hmm
print("Training done")

         1       -7324.0907             +nan
         2       -5869.8277       +1454.2630


training class có thể
(2942, 1) [32, 40, 57, 23, 41, 43, 38, 102, 36, 37, 62, 25, 35, 37, 53, 36, 34, 44, 35, 51, 35, 39, 39, 39, 28, 31, 39, 37, 40, 44, 52, 46, 41, 52, 26, 47, 36, 52, 59, 43, 43, 30, 28, 36, 38, 35, 45, 31, 30, 56, 39, 53, 34, 34, 26, 56, 30, 33, 44, 37, 36, 35, 40, 25, 20, 27, 45, 25, 44, 26, 36, 47, 16, 27, 49] 75


         3       -5315.0061        +554.8216
         4       -5059.5048        +255.5013
         5       -4925.4599        +134.0449
         6       -4840.9965         +84.4634
         7       -4777.0258         +63.9707
         8       -4735.7122         +41.3136
         9       -4704.5799         +31.1323
        10       -4666.8771         +37.7028
        11       -4634.1208         +32.7563
        12       -4612.3993         +21.7215
        13       -4601.2106         +11.1887
        14       -4587.3406         +13.8700
        15       -4573.3321         +14.0085
        16       -4568.4783          +4.8538
        17       -4566.8723          +1.6061
        18       -4565.3128          +1.5595
        19       -4563.1603          +2.1525
        20       -4562.2009          +0.9594
        21       -4561.8015          +0.3995
        22       -4561.5676          +0.2339
        23       -4561.4097          +0.1579
        24       -4561.2887          +0.1209
        25

Training done


        35       -4560.5769          +0.0112
        36       -4560.5673          +0.0096


## Models cho 'người' 3x3

In [55]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=3*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xcth = np.concatenate(trainset['người'])
lengths = list([len(x) for x in trainset['người']])
print("training class", 'người')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['người'] = hmm
print("Training done")

         1       -5598.4937             +nan
         2       -4051.1433       +1547.3504


training class người
(2202, 1) [30, 58, 16, 36, 26, 33, 27, 18, 18, 22, 29, 22, 22, 31, 23, 32, 25, 15, 20, 21, 23, 29, 28, 37, 18, 24, 27, 62, 21, 23, 27, 16, 26, 35, 13, 20, 20, 20, 28, 16, 19, 43, 23, 18, 42, 50, 28, 20, 38, 23, 38, 22, 22, 36, 37, 18, 22, 21, 24, 20, 25, 22, 23, 25, 23, 30, 22, 19, 42, 25, 19, 26, 21, 21, 30, 26, 35, 19, 35, 19, 21, 26, 25, 22] 84


         3       -3529.1271        +522.0163
         4       -3281.7211        +247.4059
         5       -3196.7766         +84.9445
         6       -3163.2334         +33.5433
         7       -3145.0704         +18.1630
         8       -3131.0700         +14.0004
         9       -3115.1279         +15.9421
        10       -3092.3853         +22.7426
        11       -3057.4815         +34.9038
        12       -3026.9094         +30.5721
        13       -3007.0599         +19.8495
        14       -2991.8776         +15.1824
        15       -2983.7886          +8.0890
        16       -2979.6972          +4.0913
        17       -2978.2243          +1.4729
        18       -2977.4313          +0.7930
        19       -2976.6665          +0.7649
        20       -2975.7823          +0.8842
        21       -2974.6636          +1.1187
        22       -2973.2343          +1.4292
        23       -2971.7522          +1.4821
        24       -2970.6215          +1.1307
        25

Training done


        46       -2963.9970          +0.0100


## Models cho 'Việt Nam' 6x3

In [56]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=6*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xvn = np.concatenate(trainset['Việt Nam'])
lengths = list([len(x) for x in trainset['Việt Nam']])
print("training class", 'Việt Nam')
print(Xvn.shape, lengths, len(lengths))
hmm.fit(Xvn, lengths=lengths)
models['Việt Nam'] = hmm
print("Training done")

         1       -7910.7786             +nan


training class Việt Nam
(3075, 1) [26, 41, 42, 44, 56, 27, 38, 35, 44, 44, 32, 38, 36, 44, 57, 37, 41, 39, 34, 27, 40, 37, 38, 37, 34, 57, 44, 26, 39, 26, 32, 57, 35, 28, 35, 35, 60, 36, 47, 27, 41, 34, 59, 42, 41, 35, 37, 27, 33, 37, 46, 37, 41, 36, 35, 28, 31, 30, 40, 36, 37, 35, 44, 39, 31, 47, 37, 43, 50, 33, 36, 32, 37, 44, 42, 42, 28, 41, 36, 43] 80


         2       -5608.7777       +2302.0009
         3       -4774.6870        +834.0907
         4       -4465.7879        +308.8991
         5       -4313.1409        +152.6470
         6       -4204.3559        +108.7850
         7       -4147.4568         +56.8991
         8       -4102.7632         +44.6937
         9       -4066.0133         +36.7499
        10       -4034.3943         +31.6190
        11       -4007.9639         +26.4304
        12       -3991.5819         +16.3820
        13       -3980.6645         +10.9174
        14       -3972.3097          +8.3548
        15       -3963.8113          +8.4984
        16       -3952.9744         +10.8369
        17       -3941.1359         +11.8385
        18       -3932.4497          +8.6862
        19       -3925.5159          +6.9338
        20       -3917.1844          +8.3315
        21       -3905.1335         +12.0509
        22       -3897.6207          +7.5128
        23       -3895.9418          +1.6789
        24

Training done


        47       -3876.4771          +0.0087


# Test

In [57]:
print("Testing")
n_correct = {'bệnh nhân': 0, 'chúng ta': 0, 'có thể': 0, 'người': 0,'Việt Nam': 0}
for true_cname in class_names:
    for O in testset[true_cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
        if (true_cname == max(score, key=score.get)): n_correct[true_cname] += 1
        print(true_cname, score, 'predict:', max(score, key=score.get))

Testing
bệnh nhân {'bệnh nhân': -61.789695226082095, 'chúng ta': -108.19792440294746, 'có thể': -82.20424654235207, 'người': -350.78403349790017, 'Việt Nam': -169.1781466287802} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -47.61315017127757, 'chúng ta': -86.16086728306502, 'có thể': -80.8968365369586, 'người': -84.36977223762898, 'Việt Nam': -82.97115901486285} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -58.537813859055426, 'chúng ta': -121.46738810916223, 'có thể': -132.59779168926715, 'người': -731.9072231466625, 'Việt Nam': -154.95339201727535} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -52.835745070574724, 'chúng ta': -112.0551127500578, 'có thể': -92.90067807052242, 'người': -209.27652791846026, 'Việt Nam': -93.9086991157729} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -59.15133949792981, 'chúng ta': -85.18541065445764, 'có thể': -102.29448020112787, 'người': -325.7455694900336, 'Việt Nam': -106.63256219235292} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -57.18106812252169, 'chúng ta

người {'bệnh nhân': -28.52094943390113, 'chúng ta': -51.264784591912814, 'có thể': -29.234983389784425, 'người': -inf, 'Việt Nam': -61.15844252831829} predict: bệnh nhân
người {'bệnh nhân': -647.7027102903467, 'chúng ta': -72.15449552925269, 'có thể': -115.39990149461991, 'người': -35.88302424132254, 'Việt Nam': -69.38536499176244} predict: người
người {'bệnh nhân': -86.21641791250043, 'chúng ta': -80.32414548086562, 'có thể': -89.90116786555457, 'người': -80.878205991791, 'Việt Nam': -73.87910273588186} predict: Việt Nam
người {'bệnh nhân': -155.8653514209496, 'chúng ta': -161.3269379370798, 'có thể': -89.30921122161928, 'người': -50.633644398807135, 'Việt Nam': -103.51477818925467} predict: người
người {'bệnh nhân': -29.142057917312084, 'chúng ta': -76.86136292818453, 'có thể': -52.59914526287286, 'người': -28.402902269781446, 'Việt Nam': -48.0435211845957} predict: người
người {'bệnh nhân': -225.3108341656819, 'chúng ta': -66.29490161052955, 'có thể': -47.33766948188826, 'người': -2

In [58]:
for cname in class_names:
    print('Accuracy:', cname, n_correct[cname]/n_test[cname])

Accuracy: bệnh nhân 0.9
Accuracy: chúng ta 0.85
Accuracy: có thể 1.0
Accuracy: người 0.8571428571428571
Accuracy: Việt Nam 0.95


In [59]:
print('All Accuracy:', sum(n_correct.values())/sum(n_test.values()))

All Accuracy: 0.91


In [60]:
print(n_test)

{'bệnh nhân': 20, 'chúng ta': 20, 'có thể': 19, 'người': 21, 'Việt Nam': 20}


In [61]:
np.around(models['người'].transmat_, 2)

array([[0.83, 0.14, 0.03, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.79, 0.2 , 0.02, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.83, 0.16, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.8 , 0.06, 0.15, 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.85, 0.15, 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.75, 0.23, 0.03, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.76, 0.12, 0.13],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.96, 0.04],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])