In [27]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

## lấy mfcc của file wav

In [28]:
# read file
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

## lấy mfcc của tất cả các file trong dir

In [29]:
# lấy mfcc của tất cả các file wav trong wav
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

## Hàm Clustering

In [30]:
def clustering(X, n_clusters=14):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

## Data

In [31]:
class_names = ["khong","tôi","cachly", "cothe" , 'nguoi'  ]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))


Load khong dataset
Load tôi dataset
Load cachly dataset
Load cothe dataset
Load nguoi dataset


In [32]:
print(len(dataset['khong']))

68


In [33]:
trainset = {}
testset = {}
n_test = {'khong': 0, 'tôi': 0, 'cachly': 0, 'cothe': 0,'nguoi': 0}
for cname in class_names:
    n = len(dataset[cname])
    n_train = math.floor(n*1)
    trainset[cname] = dataset[cname][:n_train]
    testset[cname] = dataset[cname][n_train:]
    n_test[cname] += len(testset[cname])
    
print(len(trainset['khong']))

68


## Fit kmeans trên tập train

In [None]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in trainset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print(kmeans)

vectors (15599, 36)


In [9]:
for cname in class_names:
    trainset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in trainset[cname]])
    testset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in testset[cname]])

    

# Train

In [10]:
models = {}

## Model cho 'tôi' 3x3


In [11]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=3*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.6,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0])
hmm.transmat_ = np.array([
        [0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.4],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
    ])
Xbn = np.concatenate(trainset['tôi'])
lengths = list([len(x) for x in trainset['tôi']])
print("training class", 'tôi')
print(Xbn.shape, lengths, len(lengths))
hmm.fit(Xbn, lengths=lengths)
models['tôi'] = hmm
print("Training done")

training class tôi
(925, 1) [15, 17, 11, 13, 17, 21, 21, 14, 15, 12, 11, 18, 15, 16, 10, 27, 42, 17, 17, 30, 30, 12, 14, 14, 14, 18, 25, 17, 15, 9, 17, 22, 12, 12, 30, 36, 17, 15, 13, 15, 16, 20, 14, 28, 27, 19, 18, 12, 17, 14, 24] 51


         1       -2483.4214             +nan
         2       -1721.4243        +761.9972
         3       -1514.8353        +206.5889
         4       -1397.2514        +117.5839
         5       -1336.4738         +60.7777
         6       -1299.1258         +37.3479
         7       -1271.1331         +27.9928
         8       -1219.2610         +51.8720
         9       -1183.8324         +35.4286
        10       -1168.5603         +15.2721
        11       -1160.7677          +7.7926
        12       -1157.5004          +3.2674
        13       -1155.5233          +1.9770
        14       -1153.8307          +1.6926
        15       -1152.2569          +1.5739
        16       -1150.7734          +1.4835
        17       -1149.3144          +1.4590
        18       -1147.7320          +1.5824
        19       -1145.5155          +2.2165
        20       -1141.8871          +3.6284
        21       -1138.7636          +3.1235
        22       -1137.3812          +1.3825
        23

Training done


        48       -1115.0102          +0.0116
        49       -1114.9982          +0.0120
        50       -1114.9861          +0.0121
        51       -1114.9743          +0.0118
        52       -1114.9634          +0.0109
        53       -1114.9538          +0.0096


## Model cho 'không' 3x3

In [12]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=3*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xct = np.concatenate(trainset['khong'])
lengths = list([len(x) for x in trainset['khong']])
print("training class", 'khong')
print(Xct.shape, lengths, len(lengths))
hmm.fit(Xct, lengths=lengths)
models['khong'] = hmm
print("Training done")

training class khong
(3991, 1) [26, 101, 29, 31, 34, 25, 15, 22, 46, 30, 33, 101, 25, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 38, 19, 27, 22, 31, 25, 31, 26, 27, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 101, 23, 25, 23, 45, 26, 19, 23, 30, 24, 101, 28, 28, 27, 25, 31, 32, 23, 22, 18, 28, 101, 101, 101] 68


         1       -9556.1065             +nan
         2       -4158.3648       +5397.7417
         3       -2864.7324       +1293.6324
         4       -2659.5757        +205.1567
         5       -2608.3876         +51.1881
         6       -2588.4308         +19.9568
         7       -2575.7705         +12.6603
         8       -2563.5128         +12.2577
         9       -2515.3095         +48.2034
        10       -2504.7654         +10.5440
        11       -2501.9131          +2.8523
        12       -2499.9294          +1.9837
        13       -2497.9153          +2.0141
        14       -2496.9987          +0.9166
        15       -2496.7474          +0.2513
        16       -2496.6472          +0.1002
        17       -2496.5906          +0.0566
        18       -2496.5476          +0.0429
        19       -2496.5081          +0.0396
        20       -2496.4672          +0.0408
        21       -2496.4229          +0.0443
        22       -2496.3745          +0.0484
        23

Training done


        31       -2496.0480          +0.0182
        32       -2496.0330          +0.0150
        33       -2496.0209          +0.0122
        34       -2496.0111          +0.0098


## Model cho 'có thể' 4x3

In [13]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=4*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xcth = np.concatenate(trainset['cothe'])
lengths = list([len(x) for x in trainset['cothe']])
print("training class", 'có thể')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['có thể'] = hmm
print("Training done")

training class có thể
(2235, 1) [34, 56, 39, 40, 46, 35, 45, 47, 37, 56, 36, 35, 30, 36, 39, 26, 34, 36, 30, 25, 27, 16, 25, 27, 20, 22, 44, 57, 53, 36, 37, 30, 34, 43, 38, 44, 52, 35, 39, 41, 52, 43, 33, 49, 59, 35, 42, 40, 37, 40, 35, 28, 44, 23, 30, 35, 32, 35, 35, 26] 60


         1       -5675.9557             +nan
         2       -4345.6336       +1330.3221
         3       -3734.8658        +610.7678
         4       -3317.9259        +416.9400
         5       -3201.1353        +116.7906
         6       -3155.3298         +45.8055
         7       -3127.8974         +27.4324
         8       -3111.8514         +16.0459
         9       -3105.2158          +6.6357
        10       -3100.8486          +4.3672
        11       -3096.5091          +4.3395
        12       -3095.1138          +1.3953
        13       -3094.4976          +0.6162
        14       -3089.5504          +4.9472
        15       -3085.6398          +3.9106
        16       -3085.0190          +0.6208
        17       -3084.6416          +0.3774
        18       -3084.3780          +0.2636
        19       -3084.1682          +0.2098
        20       -3083.9864          +0.1818
        21       -3083.8210          +0.1655
        22       -3083.6661          +0.1548
        23

Training done


        59       -3078.9313          +0.0153
        60       -3078.9195          +0.0118
        61       -3078.9104          +0.0090


## Model cho 'người' 3x3

In [14]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=3*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xcth = np.concatenate(trainset['nguoi'])
lengths = list([len(x) for x in trainset['nguoi']])
print("training class", 'nguoi')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['nguoi'] = hmm
print("Training done")

training class nguoi
(1887, 1) [53, 36, 38, 47, 20, 44, 22, 48, 27, 37, 34, 33, 36, 25, 33, 18, 18, 32, 24, 25, 22, 31, 55, 26, 39, 18, 25, 22, 22, 15, 16, 24, 24, 51, 35, 24, 22, 29, 15, 14, 21, 35, 18, 20, 30, 16, 21, 34, 14, 32, 22, 21, 23, 26, 17, 22, 27, 17, 23, 24, 18, 10, 13, 27, 15, 19, 44, 29, 35, 35] 70


         1       -4862.7234             +nan
         2       -2902.0811       +1960.6423
         3       -2621.6172        +280.4640
         4       -2482.8007        +138.8165
         5       -2426.2527         +56.5480
         6       -2402.2125         +24.0402
         7       -2390.3282         +11.8843
         8       -2381.6740          +8.6542
         9       -2375.6052          +6.0689
        10       -2372.1835          +3.4217
        11       -2370.3742          +1.8093
        12       -2369.2907          +1.0834
        13       -2368.4856          +0.8051
        14       -2367.6852          +0.8004
        15       -2366.5031          +1.1821
        16       -2364.3778          +2.1253
        17       -2362.3803          +1.9975
        18       -2361.6657          +0.7146
        19       -2361.3853          +0.2803
        20       -2361.2414          +0.1440


Training done


        21       -2361.1619          +0.0795
        22       -2361.1168          +0.0451
        23       -2361.0906          +0.0262
        24       -2361.0750          +0.0157
        25       -2361.0652          +0.0097


## Model cho 'Cách Ly' 5x3

In [18]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=5*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7,0.2,0.1,0.0,0.0,0.0, 0.0,0.0,0.0,0.0,0.0,0.0,0.0, 0.0,0.0])
hmm.transmat_ =np.array([
    [0.5,0.3,0.2,0.0,0.0,0.0, 0.0, 0.0, 0.0,0.0, 0.0, 0.0,0.0, 0.0,0.0],
        [0.0,0.5,0.3,0.2,0.0,0.0, 0.0, 0.0, 0.0,0.0, 0.0, 0.0,0.0, 0.0,0.0],
        [0.0,0.0,0.5,0.3,0.2,0.0, 0.0, 0.0, 0.0,0.0, 0.0, 0.0,0.0, 0.0,0.0],
        [0.0,0.0,0.0,0.5,0.3,0.2,0.0, 0.0, 0.0,0.0, 0.0, 0.0,0.0, 0.0,0.0],
        [0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0, 0.0, 0.0,0.0, 0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0, 0.0, 0.0,0.0, 0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0, 0.0,0.0, 0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0, 0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0, 0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0, 0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],])
Xvn = np.concatenate(trainset['cachly'])
lengths = list([len(x) for x in trainset['cachly']])
print("training class", 'cachly')
print(Xvn.shape, lengths, len(lengths))
hmm.fit(Xvn, lengths=lengths)
models['cachly'] = hmm
print("Training done")

training class cachly
(6561, 1) [50, 37, 55, 51, 28, 49, 49, 49, 49, 61, 61, 65, 71, 37, 34, 89, 54, 54, 54, 49, 56, 39, 31, 51, 50, 63, 32, 82, 29, 36, 54, 51, 57, 62, 56, 38, 60, 60, 60, 60, 2957, 51, 59, 61, 89, 64, 64, 64, 64, 39, 58, 58, 58, 57, 65, 45, 49, 56, 56, 56, 89, 89, 89, 29, 33, 89] 66


         1      -18530.3515             +nan
         2      -12142.7292       +6387.6223
         3      -11166.2660        +976.4632
         4      -10892.9698        +273.2961
         5      -10767.3400        +125.6298
         6      -10690.2800         +77.0599
         7      -10655.5058         +34.7742
         8      -10633.3188         +22.1870
         9      -10620.2342         +13.0846
        10      -10604.8172         +15.4170
        11      -10578.5416         +26.2757
        12      -10546.9854         +31.5561
        13      -10533.4134         +13.5721
        14      -10527.8333          +5.5801
        15      -10523.0521          +4.7812
        16      -10517.6740          +5.3781
        17      -10513.6992          +3.9748
        18      -10511.7468          +1.9524
        19      -10510.5405          +1.2062
        20      -10509.1743          +1.3662
        21      -10507.1823          +1.9920
        22      -10505.5988          +1.5835
        23

Training done


        74      -10477.2915          +0.0157
        75      -10477.2812          +0.0103
        76      -10477.2744          +0.0069


# Test

In [26]:
print("Testing")
n_correct = {'khong': 0, 'tôi': 0, 'cothe': 0, 'nguoi': 0,'cachly': 0}
for true_cname in class_names:
    for O in testset[true_cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
        if (true_cname == max(score, key=score.get)): n_correct[true_cname] += 1
        print(true_cname, score, 'predict:', max(score, key=score.get))

Testing


In [20]:
for cname in class_names:
    print('Accuracy:', cname, n_correct[cname]/n_test[cname])

ZeroDivisionError: division by zero

In [22]:
print('All Accuracy:', sum(n_correct.values())/sum(n_test.values()))

All Accuracy: 0.9205298013245033


In [23]:
print(n_test)

{'bệnh nhân': 30, 'chúng ta': 30, 'có thể': 29, 'người': 32, 'Việt Nam': 30}


In [24]:
np.around(models['nguoi'].transmat_, 2)

array([[0.83, 0.06, 0.11, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.82, 0.18, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.81, 0.06, 0.12, 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.86, 0.14, 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.91, 0.05, 0.04, 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.98, 0.  , 0.02, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.96, 0.04, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.83, 0.17],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])

In [25]:
np.around(models['cothe'].transmat_, 2)

array([[0.79, 0.21, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.87, 0.13, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.43, 0.02, 0.55, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.91, 0.09, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.67, 0.06, 0.27, 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.44, 0.56, 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.63, 0.01, 0.36, 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.8 , 0.11, 0.1 , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.51, 0.41, 0.09,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.82, 0.18,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.73,
        0.27],
       [0.  , 0.  , 0

In [26]:
np.around(models['cachly'].transmat_, 2)

array([[0.68, 0.11, 0.21, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.67, 0.33, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.59, 0.28, 0.13, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.7 , 0.3 , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.63, 0.12, 0.25, 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.84, 0.08, 0.07, 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.81, 0.13, 0.05, 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.79, 0.18, 0.04, 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],


In [27]:
np.around(models['khong'].transmat_, 2)

array([[0.74, 0.26, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.85, 0.06, 0.08, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.8 , 0.15, 0.04, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.85, 0.15, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.38, 0.1 , 0.52, 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.87, 0.  , 0.13, 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.46, 0.38, 0.16, 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.77, 0.17, 0.05, 0.  ,
        0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],


In [28]:
np.around(models['toi'].transmat_, 2)

array([[0.86, 0.12, 0.02, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.82, 0.18, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.85, 0.15, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.77, 0.22, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.35, 0.24, 0.41, 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.88, 0.  , 0.12, 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.12, 0.88, 0.  , 0.  , 0.  ,
        0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.58, 0.39, 0.04, 0.  ,
        0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.83, 0.  , 0.17,
        0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  ,

## Test dữ liệu nói từ micro

In [25]:
testset1 = {}
n_test1 = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    testset1[cname] = get_class_data(os.path.join('data','test_'+cname))
    n_test1[cname] = len(testset1[cname])

Load khong dataset
Load tôi dataset
Load cachly dataset
Load cothe dataset
Load nguoi dataset


In [30]:
for cname in class_names:
    testset1[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in testset1[cname]])

In [31]:
print("Testing")
n_correct = {'khong': 0, 'toi': 0, 'cothe': 0, 'nguoi': 0,'cachly': 0}
for true_cname in class_names:
    for O in testset1[true_cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
        if (true_cname == max(score, key=score.get)): n_correct1[true_cname] += 1
        print(true_cname, score, 'predict:', max(score, key=score.get))

Testing
bệnh nhân {'bệnh nhân': -85.01895072882863, 'chúng ta': -116.89227689303209, 'có thể': -145.5429951589105, 'người': -79.12924074435324, 'Việt Nam': -105.07484469037391} predict: người
bệnh nhân {'bệnh nhân': -68.56648775916136, 'chúng ta': -135.8658249915291, 'có thể': -145.92926637729198, 'người': -109.12223911501658, 'Việt Nam': -130.38131951425976} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -57.163946179657664, 'chúng ta': -184.1771335088101, 'có thể': -126.65149206275542, 'người': -80.46311942385488, 'Việt Nam': -76.78814830780043} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -80.41844963773977, 'chúng ta': -185.13929588212147, 'có thể': -146.46293056268, 'người': -112.6877792505747, 'Việt Nam': -190.7548103572772} predict: bệnh nhân
bệnh nhân {'bệnh nhân': -64.49725977557267, 'chúng ta': -132.75405520685666, 'có thể': -103.81233439400432, 'người': -167.05841916754355, 'Việt Nam': -85.70916966073456} predict: bệnh nhân
chúng ta {'bệnh nhân': -1613.4628264667938, 'chúng ta': -

In [32]:
for cname in class_names:
    print('Accuracy:', cname, n_correct1[cname]/n_test1[cname])

Accuracy: bệnh nhân 0.8
Accuracy: chúng ta 0.8333333333333334
Accuracy: có thể 0.8
Accuracy: người 1.0
Accuracy: Việt Nam 1.0


In [33]:
print(n_test1)

{'bệnh nhân': 5, 'chúng ta': 6, 'có thể': 5, 'người': 5, 'Việt Nam': 6}


In [34]:
np.around(models['nguoi'].transmat_, 2)

array([[0.83, 0.06, 0.11, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.82, 0.18, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.81, 0.06, 0.12, 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.86, 0.14, 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.91, 0.05, 0.04, 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.98, 0.  , 0.02, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.96, 0.04, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.83, 0.17],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])

In [35]:
import pickle

In [36]:
filename = 'hmm_models.sav'
pickle.dump(models, open(filename, 'wb'))

In [38]:
pickle.dump(kmeans, open('kmeans.sav', 'wb'))