In [293]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

## Get MFCC function

In [294]:
# read file
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

## lấy mfcc của tất cả các file trong dir

In [295]:
# lấy mfcc của tất cả các file wav trong wav
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

## Hàm Clustering

In [296]:
def clustering(X, n_clusters=14):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

## Data

In [297]:
class_names = ['cothe', 'da', 'duoc', 'nguoi','trong']
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join('data',cname))


Load cothe dataset
Load da dataset
Load duoc dataset
Load nguoi dataset
Load trong dataset


In [298]:
print(len(dataset['cothe']))
print(len(dataset['da']))
print(len(dataset['duoc']))
print(len(dataset['nguoi']))
print(len(dataset['trong']))

124
117
105
100
127


## TestData

In [299]:
testClass_names = ['da', 'duoc', 'trong', 'nguoi', 'cothe']
testDataSet = {}
for cname in testClass_names:
    print(f"Load {cname} testDataSet")
    testDataSet[cname] = get_class_data(os.path.join('test', cname))
print(len(testDataSet['da']))
print(len(testDataSet['duoc']))
print(len(testDataSet['trong']))
print(len(testDataSet['nguoi']))
print(len(testDataSet['cothe']))

Load da testDataSet
Load duoc testDataSet
Load trong testDataSet
Load nguoi testDataSet
Load cothe testDataSet
10
10
10
10
10


## split train test

In [300]:
trainset = {}
testset = {}
n_test = {'cothe': 0, 'da': 0, 'duoc': 0, 'nguoi': 0,'trong': 0}
for cname in class_names:
    n = len(dataset[cname])
    n_train = math.floor(n*0.8)
    trainset[cname] = dataset[cname][:n_train]
    testset[cname] = dataset[cname][n_train:]
    n_test[cname] += len(testset[cname])
    
print(len(trainset['cothe']))
print(len(trainset['da']))
print(len(trainset['duoc']))
print(len(trainset['nguoi']))
print(len(trainset['trong']))

99
93
84
80
101


## Fit kmeans trên tập train

In [301]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in trainset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print(kmeans)

vectors (12520, 36)
centers (14, 36)
centers (14, 36)
KMeans(n_clusters=14, n_init=50, random_state=0)


In [302]:
for cname in class_names:
    trainset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in trainset[cname]])
    testset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in testset[cname]])
for cname in testClass_names:
    testDataSet[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in testDataSet[cname]])
    

# Train

In [303]:
models = {}

## Models cho 'có thể' 4x3

In [304]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=4*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])


Xbn = np.concatenate(trainset['cothe'])
lengths = list([len(x) for x in trainset['cothe']])
print("training class", 'cothe')
print(Xbn.shape, lengths, len(lengths))
hmm.fit(Xbn, lengths=lengths)
models['cothe'] = hmm
print("Training done")

training class cothe
(4501, 1) [34, 31, 56, 39, 44, 40, 46, 35, 45, 47, 37, 56, 36, 31, 39, 35, 42, 30, 30, 37, 36, 39, 26, 34, 47, 36, 41, 34, 26, 30, 25, 27, 16, 39, 27, 52, 25, 27, 20, 22, 36, 53, 44, 44, 28, 57, 53, 36, 37, 30, 34, 38, 51, 43, 40, 47, 38, 44, 102, 44, 52, 56, 35, 37, 39, 25, 41, 52, 43, 33, 49, 59, 35, 42, 40, 37, 40, 35, 36, 28, 45, 44, 23, 30, 48, 35, 32, 35, 35, 26, 36, 43, 45, 62, 165, 163, 179, 165, 158] 99


         1      -11662.7896             +nan
         2       -8178.6870       +3484.1026
         3       -7275.4580        +903.2290
         4       -6906.4269        +369.0311
         5       -6673.5786        +232.8482
         6       -6523.0856        +150.4930
         7       -6406.9561        +116.1296
         8       -6337.8441         +69.1119
         9       -6302.1388         +35.7054
        10       -6282.4439         +19.6949
        11       -6270.6083         +11.8356
        12       -6262.7322          +7.8761
        13       -6257.7123          +5.0198
        14       -6254.5660          +3.1464
        15       -6252.4467          +2.1193
        16       -6250.9339          +1.5127
        17       -6249.8346          +1.0994
        18       -6249.0382          +0.7964
        19       -6248.4560          +0.5821
        20       -6248.0120          +0.4440
        21       -6247.6469          +0.3652
        22       -6247.3189          +0.3280
        23

Training done


        57       -6210.7652          +0.0127
        58       -6210.7602          +0.0050


## Models cho 'đã' 2x3

In [305]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=2*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0 ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.3],
    [0.0, 0.0, 0.0, 0.0, 0.0, 1.0],])

Xct = np.concatenate(trainset['da'])
lengths = list([len(x) for x in trainset['da']])
print("training class", 'da')
print(Xct.shape, lengths, len(lengths))
hmm.fit(Xct, lengths=lengths)
models['da'] = hmm
print("Training done")

         1       -4764.8870             +nan
         2       -3715.7744       +1049.1126

training class da
(1804, 1) [42, 25, 31, 25, 18, 15, 15, 17, 21, 14, 23, 23, 18, 12, 21, 17, 16, 22, 15, 14, 16, 14, 16, 26, 12, 13, 17, 13, 23, 13, 16, 14, 12, 19, 20, 24, 20, 20, 24, 20, 18, 26, 26, 22, 21, 21, 15, 20, 35, 25, 16, 18, 16, 26, 20, 22, 22, 30, 20, 21, 25, 24, 20, 21, 22, 22, 19, 21, 15, 20, 14, 16, 15, 21, 15, 16, 19, 12, 18, 18, 15, 24, 24, 22, 17, 15, 18, 19, 18, 15, 15, 18, 15] 93



         3       -3496.9916        +218.7828
         4       -3311.2098        +185.7818
         5       -3193.7871        +117.4227
         6       -3146.7414         +47.0456
         7       -3116.2045         +30.5369
         8       -3095.6511         +20.5534
         9       -3076.5455         +19.1056
        10       -3054.8308         +21.7147
        11       -3038.2372         +16.5935
        12       -3028.6458          +9.5914
        13       -3023.5757          +5.0701
        14       -3020.7335          +2.8422
        15       -3017.6985          +3.0350
        16       -3013.8625          +3.8360
        17       -3010.6634          +3.1991
        18       -3008.4912          +2.1722
        19       -3007.0562          +1.4350
        20       -3006.1412          +0.9150
        21       -3005.5624          +0.5788
        22       -3005.1834          +0.3790
        23       -3004.9203          +0.2631
        24       -3004.7264          +0.1939
        2

Training done


       101       -2960.6712          +0.0136
       102       -2960.6605          +0.0106
       103       -2960.6522          +0.0083


## Models cho 'được' 4x3

In [306]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=4*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xcth = np.concatenate(trainset['duoc'])
lengths = list([len(x) for x in trainset['duoc']])
print("training class", 'duoc')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['duoc'] = hmm
print("Training done")

         1       -4810.0183             +nan
         2       -3765.2880       +1044.7304
         3       -3450.7596        +314.5284


training class duoc
(1860, 1) [25, 20, 19, 17, 14, 22, 21, 27, 23, 30, 15, 18, 21, 30, 19, 20, 21, 30, 22, 14, 17, 23, 23, 19, 21, 18, 18, 20, 38, 21, 22, 35, 29, 18, 19, 22, 42, 22, 20, 24, 22, 21, 22, 22, 44, 29, 18, 14, 20, 24, 22, 19, 41, 24, 20, 18, 16, 20, 18, 21, 22, 30, 17, 21, 19, 16, 19, 18, 19, 16, 20, 14, 17, 16, 32, 26, 28, 24, 25, 25, 29, 21, 15, 17] 84


         4       -3239.4531        +211.3064
         5       -3062.6042        +176.8490
         6       -2885.1663        +177.4379
         7       -2764.4671        +120.6992
         8       -2707.1059         +57.3612
         9       -2658.9729         +48.1330
        10       -2633.2841         +25.6888
        11       -2621.6741         +11.6100
        12       -2614.3702          +7.3039
        13       -2611.4099          +2.9603
        14       -2609.6560          +1.7539
        15       -2608.1431          +1.5128
        16       -2607.7529          +0.3902
        17       -2607.5970          +0.1560
        18       -2607.5134          +0.0836
        19       -2607.4637          +0.0497
        20       -2607.4314          +0.0323
        21       -2607.4085          +0.0229
        22       -2607.3908          +0.0177
        23       -2607.3761          +0.0147
        24       -2607.3630          +0.0131
        25       -2607.3509          +0.0121
        26

Training done


        28       -2607.3184          +0.0102
        29       -2607.3089          +0.0095


## Models cho 'người' 4x3

In [307]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=5*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ], ])

Xcth = np.concatenate(trainset['nguoi'])
lengths = list([len(x) for x in trainset['nguoi']])
print("training class", 'nguoi')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['nguoi'] = hmm
print("Training done")

         1       -4768.9256             +nan
         2       -3055.3535       +1713.5721
         3       -2551.0050        +504.3486

training class nguoi
(1892, 1) [20, 18, 23, 25, 25, 38, 20, 25, 27, 22, 23, 23, 20, 26, 23, 19, 28, 24, 25, 19, 22, 28, 19, 24, 28, 20, 20, 32, 20, 18, 20, 31, 22, 19, 24, 14, 22, 15, 13, 22, 19, 25, 18, 27, 20, 20, 24, 17, 22, 19, 22, 26, 20, 23, 35, 28, 22, 30, 25, 27, 26, 23, 31, 30, 21, 19, 18, 20, 22, 15, 26, 19, 22, 51, 37, 42, 29, 29, 19, 18] 80



         4       -2435.6670        +115.3379
         5       -2395.5295         +40.1375
         6       -2377.7974         +17.7321
         7       -2365.9235         +11.8739
         8       -2356.7484          +9.1750
         9       -2348.4798          +8.2686
        10       -2340.0927          +8.3872
        11       -2332.1805          +7.9121
        12       -2325.8355          +6.3450
        13       -2320.5171          +5.3184
        14       -2315.4404          +5.0767
        15       -2310.4111          +5.0294
        16       -2305.4117          +4.9994
        17       -2300.2429          +5.1688
        18       -2294.6148          +5.6281
        19       -2288.3010          +6.3138
        20       -2281.3505          +6.9504
        21       -2274.1630          +7.1875
        22       -2266.8930          +7.2700
        23       -2259.9450          +6.9480
        24       -2254.5292          +5.4158
        25       -2250.4949          +4.0343
        2

Training done


        65       -2220.6888          +0.0121
        66       -2220.6782          +0.0106
        67       -2220.6688          +0.0094


## Models cho 'trong' 3x3

In [308]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=3*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xvn = np.concatenate(trainset['trong'])
lengths = list([len(x) for x in trainset['trong']])
print("training class", 'trong')
print(Xvn.shape, lengths, len(lengths))
hmm.fit(Xvn, lengths=lengths)
models['trong'] = hmm
print("Training done")

         1       -6393.4051             +nan
         2       -4709.4268       +1683.9784


training class trong
(2463, 1) [50, 50, 35, 33, 30, 30, 26, 23, 17, 20, 22, 21, 19, 32, 32, 22, 23, 29, 21, 22, 24, 38, 22, 18, 25, 26, 24, 21, 28, 21, 20, 19, 31, 22, 16, 21, 22, 19, 20, 33, 28, 25, 24, 25, 23, 26, 19, 21, 38, 29, 34, 18, 22, 19, 23, 17, 19, 20, 23, 24, 21, 33, 20, 20, 25, 21, 21, 25, 25, 24, 19, 28, 30, 20, 22, 21, 19, 16, 22, 26, 17, 25, 18, 31, 20, 21, 28, 20, 20, 28, 21, 24, 21, 23, 29, 22, 22, 23, 39, 21, 28] 101


         3       -4179.3284        +530.0984
         4       -3763.3374        +415.9910
         5       -3605.6602        +157.6772
         6       -3556.1207         +49.5395
         7       -3503.7788         +52.3419
         8       -3393.1904        +110.5884
         9       -3269.6907        +123.4997
        10       -3193.9599         +75.7308
        11       -3158.4294         +35.5305
        12       -3143.5683         +14.8611
        13       -3137.7824          +5.7860
        14       -3135.6999          +2.0824
        15       -3134.9590          +0.7409
        16       -3134.4370          +0.5220
        17       -3132.9669          +1.4702
        18       -3131.1087          +1.8582
        19       -3130.8453          +0.2634
        20       -3130.8257          +0.0195
        21       -3130.8111          +0.0147
        22       -3130.7939          +0.0172
        23       -3130.7667          +0.0272
        24       -3130.7163          +0.0504
        25

Training done





# Test

In [309]:
print("Testing")
n_correct = {'cothe': 0, 'da': 0, 'duoc': 0, 'nguoi': 0,'trong': 0}
for true_cname in class_names:
    for O in testset[true_cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
        if (true_cname == max(score, key=score.get)): n_correct[true_cname] += 1
        print(true_cname, score, 'predict:', max(score, key=score.get))

Testing
cothe {'cothe': -549.9951776639559, 'da': -1708.0490186765305, 'duoc': -1270.4211653050581, 'nguoi': -inf, 'trong': -11333.565646735415} predict: cothe
cothe {'cothe': -170.7046211904538, 'da': -339.0161507222581, 'duoc': -620.8592403509093, 'nguoi': -inf, 'trong': -290.57445139581347} predict: cothe
cothe {'cothe': -485.6332596083742, 'da': -306.2196007424038, 'duoc': -378.11374511425544, 'nguoi': -inf, 'trong': -1132.969068571496} predict: da
cothe {'cothe': -535.0826292622528, 'da': -387.4879043554186, 'duoc': -963.7176798919318, 'nguoi': -inf, 'trong': -608.9915597448346} predict: da
cothe {'cothe': -380.32723375083367, 'da': -435.22061588817405, 'duoc': -1448.3031238655437, 'nguoi': -inf, 'trong': -6012.094179629728} predict: cothe
cothe {'cothe': -444.2993776662988, 'da': -917.4623820284074, 'duoc': -1204.500162721123, 'nguoi': -inf, 'trong': -8465.948025255824} predict: cothe
cothe {'cothe': -333.8418253165746, 'da': -552.0556015679952, 'duoc': -1373.0627314339413, 'nguo

In [310]:
for cname in class_names:
    print('Accuracy:', cname, n_correct[cname]/n_test[cname])

Accuracy: cothe 0.68
Accuracy: da 0.75
Accuracy: duoc 0.7619047619047619
Accuracy: nguoi 0.8
Accuracy: trong 0.8461538461538461


In [311]:
print('All Accuracy:', sum(n_correct.values())/sum(n_test.values()))

All Accuracy: 0.7672413793103449


In [312]:
print("Testing testDataSet")
n_correct = {'cothe': 0, 'da': 0, 'duoc': 0, 'nguoi': 0,'trong': 0}
for true_cname in testClass_names:
    for O in testDataSet[true_cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
        if (true_cname == max(score, key=score.get)): n_correct[true_cname] += 1
        print(true_cname, score, 'predict:', max(score, key=score.get))

Testing testDataSet
da {'cothe': -125.39408537542131, 'da': -76.89664328852531, 'duoc': -57.755110209924574, 'nguoi': -91.81986249205448, 'trong': -125.37273700267481} predict: duoc
da {'cothe': -83.03533608782242, 'da': -28.194905836668756, 'duoc': -41.72894709808951, 'nguoi': -31.291251955961894, 'trong': -51.127452089771346} predict: da
da {'cothe': -242.00705465805584, 'da': -56.274530269660914, 'duoc': -53.810564508358915, 'nguoi': -3395.4022616295338, 'trong': -686.9043890210642} predict: duoc
da {'cothe': -165.43052267837012, 'da': -30.914299651460038, 'duoc': -46.0146842355355, 'nguoi': -35.65947309335265, 'trong': -97.56380764963929} predict: da
da {'cothe': -66.86608138331907, 'da': -34.45970545027512, 'duoc': -40.19358497247924, 'nguoi': -51.364699724874626, 'trong': -54.321058837614075} predict: da
da {'cothe': -52.932105759045754, 'da': -35.38491834027571, 'duoc': -29.918581770237953, 'nguoi': -41.70721292477989, 'trong': -45.34407478954438} predict: duoc
da {'cothe': -50.

In [313]:
for cname in testClass_names:
    print('Accuracy:', cname, n_correct[cname]/n_test[cname])

Accuracy: da 0.20833333333333334
Accuracy: duoc 0.42857142857142855
Accuracy: trong 0.3076923076923077
Accuracy: nguoi 0.25
Accuracy: cothe 0.36


In [314]:
print(n_test)

{'cothe': 25, 'da': 24, 'duoc': 21, 'nguoi': 20, 'trong': 26}


In [315]:
#np.around(models['trong'].transmat_, 2)