In [1]:
import librosa
import numpy as np
import os
import math
import operator
from sklearn.cluster import KMeans
import hmmlearn.hmm

Import requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit
Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.[0m
  from numba.decorators import jit as optional_jit


In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans 

In [7]:
class_names = ["nguoi", "viet_nam", "lam_viec", "mot", "khong", "test_nguoi", "test_viet_nam", "test_lam_viec", "test_mot", "test_khong"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data_hmm", cname))
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

Load nguoi dataset
Load viet_nam dataset
Load lam_viec dataset
Load mot dataset
Load khong dataset
Load test_nguoi dataset
Load test_viet_nam dataset
Load test_lam_viec dataset
Load test_mot dataset
Load test_khong dataset
vectors (17729, 36)
centers (10, 36)
centers (10, 36)


In [17]:
def train_GMMHMM(train_set, n_component, startprob_prior, transmat_prior):
    model = hmmlearn.hmm.MultinomialHMM(n_components=n_component, random_state=0, n_iter=1000, verbose=True,
                                       startprob_prior=startprob_prior, transmat_prior=transmat_prior)
    X = np.concatenate(train_set)
    lengths = list([len(x) for x in dataset[cname]])
    # fit dat 
    print(X.shape, lengths, len(lengths))
    model.fit(X, lengths=lengths)
    return model

In [15]:
startprob9 = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
startprob18 = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
transmat9 = np.array([[0.1 for x in range(9)] for y in range(9)])
transmat18 = np.array([[0.1 for x in range(18)] for y in range(18)])
models = {}
models["khong"] = train_GMMHMM(dataset["khong"], 9, )
models["lam_viec"] = train_GMMHMM(dataset["lam_viec"], 18, 2)
models["mot"] = train_GMMHMM(dataset["mot"], 9, 2)
models["nguoi"] = train_GMMHMM(dataset["nguoi"], 9, 2)
models["viet_nam"] = train_GMMHMM(dataset["viet_nam"], 18, 2)
print(models)

(1887, 36) [26, 28, 26, 24, 20, 19, 16, 29, 37, 24, 31, 32, 26, 31, 28, 29, 18, 24, 24, 16] 20
(5932, 36) [26, 28, 26, 24, 20, 19, 16, 29, 37, 24, 31, 32, 26, 31, 28, 29, 18, 24, 24, 16] 20
(1365, 36) [26, 28, 26, 24, 20, 19, 16, 29, 37, 24, 31, 32, 26, 31, 28, 29, 18, 24, 24, 16] 20
(2020, 36) [26, 28, 26, 24, 20, 19, 16, 29, 37, 24, 31, 32, 26, 31, 28, 29, 18, 24, 24, 16] 20
(3536, 36) [26, 28, 26, 24, 20, 19, 16, 29, 37, 24, 31, 32, 26, 31, 28, 29, 18, 24, 24, 16] 20


  new_cov = new_cov_numer / new_cov_denom


{'khong': GMMHMM(algorithm='viterbi', covariance_type='diag',
       covars_prior=array([[[-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
         -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
         -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
         -1.5, -1.5, -1.5, -1.5, -1.5, -1.5],
        [-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
         -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -...
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0.]]]),
       means_weight=array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]]),
       min_covar=0.001, n_components=9, n_iter=10, n_mix=2, params='stmcw',
       random_state=None, startprob_prior=1.0, tol=0.01, transmat_prior=1.0,
       verbose=False,
       weights_prior=array([[1., 1.],
       [1., 1.],
       [1., 1.],
    

In [16]:
print("Testing")
result = 0
length = 0
for true_cname in class_names:
    if true_cname[:4] == "test":
        for O in dataset[true_cname]:
            length+=1
            score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
            print(true_cname, score, max(score.items(), key=operator.itemgetter(1))[0])
            if true_cname == max(score.items(), key=operator.itemgetter(1))[0]:
                result = result + 1
print('ACc', result/length, result, length)

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


Testing
test_nguoi {'khong': -25898.125740999723, 'lam_viec': -14773.569690435603, 'mot': -62251.93555014383, 'nguoi': -25589.71526747086, 'viet_nam': -18143.21788782855} lam_viec
test_nguoi {'khong': -24528.648586568677, 'lam_viec': -15629.866123234684, 'mot': -45921.44541813546, 'nguoi': -21437.193049124468, 'viet_nam': -18634.02769258047} lam_viec
test_nguoi {'khong': -22894.445452461106, 'lam_viec': -9545.822138149313, 'mot': -32010.44691234258, 'nguoi': -20657.375875885446, 'viet_nam': -13870.810988559906} lam_viec
test_nguoi {'khong': -23482.244147468664, 'lam_viec': -16953.826403689505, 'mot': -43036.94966643635, 'nguoi': -24984.862529516446, 'viet_nam': -19571.294363897097} lam_viec
test_nguoi {'khong': -17498.03870046465, 'lam_viec': -11792.637026379492, 'mot': -49343.619321186605, 'nguoi': -19888.66933078273, 'viet_nam': -14937.128701958887} lam_viec
test_nguoi {'khong': -25587.783272346733, 'lam_viec': -15573.076096501063, 'mot': -61284.98959624157, 'nguoi': -31907.522518729

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


test_nguoi {'khong': -22795.046481018366, 'lam_viec': -15576.437455298623, 'mot': -73623.15674297563, 'nguoi': -30073.81612504115, 'viet_nam': -22632.650025989962} lam_viec
test_nguoi {'khong': -12891.206070599783, 'lam_viec': -6782.043251571037, 'mot': -41743.437068210915, 'nguoi': -13969.686598198188, 'viet_nam': -12518.615641215276} lam_viec
test_nguoi {'khong': -40820.30645687034, 'lam_viec': -18647.723973847736, 'mot': -86291.51274196229, 'nguoi': -34619.914202254295, 'viet_nam': -25220.728632934246} lam_viec
test_nguoi {'khong': -50421.61158871924, 'lam_viec': -13544.862568584665, 'mot': -51964.80913577527, 'nguoi': -36960.403051845205, 'viet_nam': -26881.9785814638} lam_viec
test_nguoi {'khong': -21601.833473231494, 'lam_viec': -10126.392661701278, 'mot': -48233.57421226088, 'nguoi': -22667.853232675465, 'viet_nam': -12829.566080604001} lam_viec
test_nguoi {'khong': -18957.418993521376, 'lam_viec': -11239.271551715516, 'mot': -57069.29233930203, 'nguoi': -23120.291211825406, 'vi

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


 {'khong': -23807.861779429215, 'lam_viec': -16450.90524539543, 'mot': -69290.41902506782, 'nguoi': -36202.74831155963, 'viet_nam': -24018.2641324291} lam_viec
test_nguoi {'khong': -45806.786654639356, 'lam_viec': -21940.254997945878, 'mot': -81286.57386747806, 'nguoi': -42823.80122264991, 'viet_nam': -31711.928915655695} lam_viec
test_viet_nam {'khong': -61386.47900123789, 'lam_viec': -24911.717458186824, 'mot': -75838.87871484568, 'nguoi': -61015.13150015886, 'viet_nam': -38564.407141016076} lam_viec
test_viet_nam {'khong': -62272.350361636476, 'lam_viec': -21399.386885283344, 'mot': -74043.48079767538, 'nguoi': -57287.930505985, 'viet_nam': -33333.055335167126} lam_viec
test_viet_nam {'khong': -53259.52344411059, 'lam_viec': -17710.186910971897, 'mot': -71342.42087009655, 'nguoi': -41302.77902445084, 'viet_nam': -27780.285560991324} lam_viec
test_viet_nam {'khong': -48007.39327844422, 'lam_viec': -14658.0442187921, 'mot': -67609.20004977265, 'nguoi': -39772.51883932944, 'viet_nam': 

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


test_viet_nam {'khong': -60136.019534764375, 'lam_viec': -22602.284750379622, 'mot': -108869.24764320772, 'nguoi': -44933.12136260425, 'viet_nam': -36463.591756686496} lam_viec
test_viet_nam {'khong': -71755.55633404454, 'lam_viec': -23522.7343767322, 'mot': -110817.28553103865, 'nguoi': -48551.59453455548, 'viet_nam': -42604.363954763896} lam_viec
test_viet_nam {'khong': -53665.65439532088, 'lam_viec': -19572.424809789914, 'mot': -93667.09892211122, 'nguoi': -55904.05553853198, 'viet_nam': -29782.19936278503} lam_viec
test_viet_nam {'khong': -63538.43315346148, 'lam_viec': -19631.75082690104, 'mot': -87836.4036531817, 'nguoi': -58607.14387681369, 'viet_nam': -35759.20183729771} lam_viec
test_viet_nam {'khong': -78635.57075593571, 'lam_viec': -30284.526484875863, 'mot': -113401.10189087738, 'nguoi': -84448.43914600965, 'viet_nam': -62741.85706190831} lam_viec
test_viet_nam {'khong': -74445.08499678837, 'lam_viec': -28355.768839660275, 'mot': -120675.71292527138, 'nguoi': -75779.4125211

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


 lam_viec
test_viet_nam {'khong': -44766.888682649034, 'lam_viec': -21853.76870186616, 'mot': -82511.45930257089, 'nguoi': -42953.792312905935, 'viet_nam': -25171.24935353561} lam_viec
test_viet_nam {'khong': -46666.158241734076, 'lam_viec': -28977.060755788512, 'mot': -83363.6662034119, 'nguoi': -55034.90969839644, 'viet_nam': -38086.924654455215} lam_viec
test_viet_nam {'khong': -41174.75106461745, 'lam_viec': -23945.866324987743, 'mot': -80524.67432845737, 'nguoi': -39160.352619090714, 'viet_nam': -29787.660010697742} lam_viec
test_lam_viec {'khong': -54440.85123804616, 'lam_viec': -16895.730728206992, 'mot': -76374.06382169678, 'nguoi': -38885.398428598004, 'viet_nam': -22319.808328394887} lam_viec
test_lam_viec {'khong': -31962.135962187294, 'lam_viec': -15028.460154757804, 'mot': -63100.43693807347, 'nguoi': -38309.34469937611, 'viet_nam': -22252.59685132758} lam_viec
test_lam_viec {'khong': -42035.54191128175, 'lam_viec': -13141.531399443435, 'mot': -56063.27114552524, 'nguoi': 

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


 {'khong': -30425.691905577296, 'lam_viec': -11989.757343168727, 'mot': -55429.77154621848, 'nguoi': -39230.09132478395, 'viet_nam': -15718.748418600659} lam_viec
test_lam_viec {'khong': -25609.06616364966, 'lam_viec': -11351.713902388141, 'mot': -44576.02562489597, 'nguoi': -31237.01943646422, 'viet_nam': -17315.256129177207} lam_viec
test_lam_viec {'khong': -41110.35277068951, 'lam_viec': -17200.802301297783, 'mot': -54202.68582944265, 'nguoi': -32507.426562944613, 'viet_nam': -25734.985999753062} lam_viec
test_lam_viec {'khong': -27236.130271821097, 'lam_viec': -12922.641280672311, 'mot': -50150.531905107106, 'nguoi': -29125.231920847542, 'viet_nam': -22560.526432360042} lam_viec
test_lam_viec {'khong': -32722.646262758128, 'lam_viec': -13615.732725285361, 'mot': -60076.92502067388, 'nguoi': -34929.8663138809, 'viet_nam': -21988.176664529314} lam_viec
test_lam_viec {'khong': -42927.4857366769, 'lam_viec': -14900.517019941632, 'mot': -45103.45572325522, 'nguoi': -36843.544464062914, 

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


 {'khong': -42131.7911345638, 'lam_viec': -13526.203734381059, 'mot': -52710.0546587969, 'nguoi': -25690.30316599713, 'viet_nam': -19206.281806106468} lam_viec
test_lam_viec {'khong': -24544.765501017246, 'lam_viec': -10878.804810082032, 'mot': -42487.350102171884, 'nguoi': -23922.28262212622, 'viet_nam': -13751.33945269304} lam_viec
test_lam_viec {'khong': -47104.294478312164, 'lam_viec': -18303.81007696852, 'mot': -77433.58887438384, 'nguoi': -39218.16975461006, 'viet_nam': -32391.134653428027} lam_viec
test_lam_viec {'khong': -34276.49024396729, 'lam_viec': -13113.080329160297, 'mot': -51486.39301129707, 'nguoi': -37398.717493790806, 'viet_nam': -19077.24682343805} lam_viec
test_lam_viec {'khong': -33294.57095278562, 'lam_viec': -13470.986553221848, 'mot': -55072.560543133106, 'nguoi': -30644.90587940569, 'viet_nam': -23332.278013741277} lam_viec
test_lam_viec {'khong': -26825.744707306098, 'lam_viec': -12899.539512066698, 'mot': -42404.264049784135, 'nguoi': -26470.64671116251, 'vi

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


 {'khong': -21981.106746045545, 'lam_viec': -6366.917148347911, 'mot': -21587.06028696007, 'nguoi': -22136.61162391695, 'viet_nam': -13013.451525597817} lam_viec
test_mot {'khong': -10763.24013209137, 'lam_viec': -6141.975820248634, 'mot': -16381.794444303503, 'nguoi': -16176.26097217597, 'viet_nam': -9603.46353946242} lam_viec
test_mot {'khong': -27237.556754152047, 'lam_viec': -7545.793522044256, 'mot': -29022.02252238067, 'nguoi': -25567.447190959432, 'viet_nam': -13151.696934517378} lam_viec
test_mot {'khong': -15975.302348335492, 'lam_viec': -6504.077665653765, 'mot': -24486.402247515536, 'nguoi': -18886.103360736146, 'viet_nam': -6480.636493844315} viet_nam
test_mot {'khong': -24471.092016343173, 'lam_viec': -11010.146490700143, 'mot': -30760.7859787405, 'nguoi': -35832.76490817684, 'viet_nam': -16019.93877568789} lam_viec
test_mot {'khong': -14480.501447545335, 'lam_viec': -6526.14792717933, 'mot': -18244.85931285056, 'nguoi': -14204.723362179668, 'viet_nam': -8694.997457908035}

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


test_mot {'khong': -19031.6416828633, 'lam_viec': -7272.9528668972425, 'mot': -23087.416076913534, 'nguoi': -18871.121482899685, 'viet_nam': -7178.58406188969} viet_nam
test_mot {'khong': -15565.554074651884, 'lam_viec': -5562.996928293171, 'mot': -23483.801350583715, 'nguoi': -14124.683016733548, 'viet_nam': -6593.806343101098} lam_viec
test_mot {'khong': -25037.103892481446, 'lam_viec': -8395.62377241907, 'mot': -27819.949366934587, 'nguoi': -27296.751346755176, 'viet_nam': -7951.285762262246} viet_nam
test_mot {'khong': -55772.771585053124, 'lam_viec': -12902.182512227382, 'mot': -72159.19513652506, 'nguoi': -49787.32441657359, 'viet_nam': -34302.056054229826} lam_viec
test_mot {'khong': -43245.94320384734, 'lam_viec': -13966.039858534645, 'mot': -50497.213131990924, 'nguoi': -51001.73242604847, 'viet_nam': -24026.41370318301} lam_viec
test_mot {'khong': -20691.923907885615, 'lam_viec': -8371.175817755793, 'mot': -26552.04207739553, 'nguoi': -27653.2106651333, 'viet_nam': -12529.040

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


 {'khong': -23152.34429875001, 'lam_viec': -8976.66408616356, 'mot': -24312.706470277393, 'nguoi': -32508.931937813813, 'viet_nam': -10816.108638216912} lam_viec
test_khong {'khong': -21837.428528496795, 'lam_viec': -13277.868163488854, 'mot': -43628.217298811105, 'nguoi': -41569.911118355456, 'viet_nam': -21420.318207415232} lam_viec
test_khong {'khong': -18976.152311604437, 'lam_viec': -12553.110903490517, 'mot': -51816.57112471713, 'nguoi': -30398.950301162207, 'viet_nam': -20287.108075388882} lam_viec
test_khong {'khong': -17320.339757192178, 'lam_viec': -11429.345237313688, 'mot': -50597.07278255722, 'nguoi': -27677.02231486205, 'viet_nam': -18110.413529237532} lam_viec
test_khong {'khong': -21617.359833355735, 'lam_viec': -10851.260645250086, 'mot': -35574.835652746246, 'nguoi': -29638.62640439194, 'viet_nam': -19177.375280656204} lam_viec
test_khong {'khong': -13754.22726326807, 'lam_viec': -9043.483071306182, 'mot': -33635.93375916714, 'nguoi': -30318.946239703135, 'viet_nam': 

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


 {'khong': -38456.961459899336, 'lam_viec': -17228.568974402846, 'mot': -61904.46979585182, 'nguoi': -35810.69258357025, 'viet_nam': -24906.07472326437} lam_viec
test_khong {'khong': -16501.417319408152, 'lam_viec': -11438.051809762537, 'mot': -43744.687004695355, 'nguoi': -32637.34651632744, 'viet_nam': -14557.15708554367} lam_viec
test_khong {'khong': -17269.807847773143, 'lam_viec': -13767.550833810432, 'mot': -57596.02801943875, 'nguoi': -32872.186998862744, 'viet_nam': -14575.351651525132} lam_viec
test_khong {'khong': -32476.075707031054, 'lam_viec': -15454.563635233777, 'mot': -73761.93088927641, 'nguoi': -53238.21149264745, 'viet_nam': -25096.63524648494} lam_viec
test_khong {'khong': -21117.268568375588, 'lam_viec': -11821.195097565773, 'mot': -40025.780552840726, 'nguoi': -31565.160666679614, 'viet_nam': -22270.83096903635} lam_viec
test_khong {'khong': -24469.974999065693, 'lam_viec': -15217.057796622857, 'mot': -72094.19488191146, 'nguoi': -47413.09749351205, 'viet_nam': -2

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


 {'khong': -18186.89880096075, 'lam_viec': -8780.79816382857, 'mot': -36002.12749355636, 'nguoi': -35654.592815122975, 'viet_nam': -12236.68110366679} lam_viec
test_khong {'khong': -27458.646907684102, 'lam_viec': -11538.073092391332, 'mot': -41874.718300669585, 'nguoi': -39332.448071352264, 'viet_nam': -21021.49845877801} lam_viec
test_khong {'khong': -14195.878595602804, 'lam_viec': -10396.449937434212, 'mot': -40978.40239293046, 'nguoi': -29737.352445501903, 'viet_nam': -15066.248956391597} lam_viec
test_khong {'khong': -9744.871280800531, 'lam_viec': -7602.9989186489875, 'mot': -36938.46400016322, 'nguoi': -25909.393214973185, 'viet_nam': -10607.656462639618} lam_viec
ACc 0.0 0 100


In [None]:

    models = {}
    for cname in class_names:
        class_vectors = dataset[cname]
        # convert all vectors to the cluster index
        # dataset['one'] = [O^1, ... O^R]
        # O^r = (c1, c2, ... ct, ... cT)
        # O^r size T x 1
        dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
        hmm = hmmlearn.hmm.MultinomialHMM(
            n_components=8, random_state=0, n_iter=1000, verbose=True,
            startprob_prior=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0]),
            transmat_prior=np.array([
                [0.1,0.5,0.1,0.1,0.1,0.1,0.1, 0.1],
                [0.1,0.1,0.5,0.1,0.1,0.1,0.1, 0.1],
                [0.1,0.1,0.1,0.5,0.1,0.1,0.1, 0.1],
                [0.1,0.1,0.1,0.1,0.5,0.1,0.1, 0.1],
                [0.1,0.1,0.1,0.1,0.1,0.5,0.1, 0.1],
                [0.1,0.1,0.1,0.1,0.1,0.1,0.5, 0.1],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.5],
                [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.5],
            ]),
        )
        if cname[:4] != 'test':
            X = np.concatenate(dataset[cname])
            lengths = list([len(x) for x in dataset[cname]])
            print("training class", cname)
            print(X.shape, lengths, len(lengths))
            hmm.fit(X, lengths=lengths)
            models[cname] = hmm
    print("Training done")

    print("Testing")
    result = 0
    length = 0
    for true_cname in class_names:
        for O in dataset[true_cname]:
            length+=1
            score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
            print(true_cname, score, max(score.items(), key=operator.itemgetter(1))[0])
            if true_cname == max(score.items(), key=operator.itemgetter(1))[0]:
                result = result + 1
    print('ACc', result/length, result, length)