In [2]:
!pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.2.3-cp37-cp37m-win_amd64.whl (111 kB)
Installing collected packages: hmmlearn
Successfully installed hmmlearn-0.2.3


In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
import pickle as pk

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix


In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
#     print(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    #print("centers", kmeans.cluster_centers_.shape)
    return kmeans 

In [11]:
if __name__ == "__main__":
    class_names = ["la", "cua", "nguoi", "co", "giadinh", "test_giadinh", "test_la", "test_nguoi", "test_cua", "test_co"]
    dataset = {}
    train_dataset = {}
    for cname in class_names:
        dataset[cname] = get_class_data(os.path.join("train", cname))
        if cname[:4] != "test":
#         print(f"Load {cname} dataset to train")
            train_dataset[cname] = get_class_data(os.path.join("train", cname))

#   # Get all vectors in the datasets
#   all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
#   print("vectors", all_vectors.shape)
#   # Run K-Means algorithm to get clusters
#   kmeans = clustering(all_vectors)
#   print("centers", kmeans.cluster_centers_.shape)

# Get all vectors in the datasets
    all_train_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in train_dataset.items()], axis=0)
    print("vectors", all_train_vectors.shape)
# Run K-Means algorithm to get clusters
    kmeans = clustering(all_train_vectors)
    
    print("centers", kmeans.cluster_centers_.shape)

    models = {}
    for cname in class_names:
        class_vectors = dataset[cname]
# convert all vectors to the cluster index
# dataset['cname'] = [O^1, ... O^R]
# O^r = (c1, c2, ... ct, ... cT)
# O^r size T x 1
        dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])

# =================================================================
# cua |c|~|u|~|a|
        if cname == "cua":
            print(f"training {cname}")
            hmm = hmmlearn.hmm.MultinomialHMM(
              n_components=9, init_params='e', params='ste', verbose=True, n_iter=1000
            ) 
            startprob_prior=np.array([0.7,0.2,0.1,0.0,0.0,0.0, 0.0,0.0]),
            transmat_prior=np.array([                      
                [0.2,0.5,0.2,0.1,0.0,0.0,0.0,0.0],
                [0.0,0.2,0.5,0.2,0.1,0.0,0.0,0.0],
                [0.0,0.0,0.2,0.5,0.2,0.1,0.0,0.0],
                [0.0,0.0,0.0,0.2,0.5,0.2,0.1,0.0],
                [0.0,0.0,0.0,0.0,0.2,0.5,0.2,0.1],
                [0.0,0.0,0.0,0.0,0.0,0.2,0.6,0.2],
                [0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.7],
                [0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
            ]),
            X = np.concatenate(dataset[cname])
            lengths = list([len(x) for x in dataset[cname]])
        #       print("training class", cname)
        #       print(X.shape, lengths, len(lengths))
            hmm.fit(X, lengths=lengths)
            models[cname] = hmm
            print("==================================")
            print(cname)
            np.set_printoptions(precision=2, suppress=True)
            print(models[cname].transmat_)
        # =================================================================
#         nguoi |ng|~|uo|~|i|
        if cname == "nguoi":
            print(f"training {cname}")
            hmm = hmmlearn.hmm.MultinomialHMM(
              n_components=8, init_params='e', params='ste', verbose=True, n_iter=1000
            )
            startprob_prior=np.array([0.7,0.2,0.1,0.0,0.0,0.0, 0.0,0.0]),
            transmat_prior=np.array([                      
                [0.2,0.5,0.2,0.1,0.0,0.0,0.0,0.0],
                [0.0,0.2,0.5,0.2,0.1,0.0,0.0,0.0],
                [0.0,0.0,0.2,0.5,0.2,0.1,0.0,0.0],
                [0.0,0.0,0.0,0.2,0.5,0.2,0.1,0.0],
                [0.0,0.0,0.0,0.0,0.2,0.5,0.2,0.1],
                [0.0,0.0,0.0,0.0,0.0,0.2,0.6,0.2],
                [0.0,0.0,0.0,0.0,0.0,0.0,0.3,0.7],
                [0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
            ]),
            X = np.concatenate(dataset[cname])
            lengths = list([len(x) for x in dataset[cname]])
        #       print("training class", cname)
        #       print(X.shape, lengths, len(lengths))
            hmm.fit(X, lengths=lengths)
            models[cname] = hmm
            print("==================================")
            print(cname)
            np.set_printoptions(precision=2, suppress=True)
            print(models[cname].transmat_)

        # =================================================================
        # co |c|~|o|
        if cname == "co":
            print(f"training {cname}")
            hmm = hmmlearn.hmm.MultinomialHMM(
              n_components=6, init_params='e', params='ste', verbose=True, n_iter=1000
            )
            startprob_prior=np.array([0.3,0.6,0.1,0.0,0.0,0.0])
            transmat_prior=np.array([
                [0.5,0.4,0.1,0.0,0.0,0.0,],
                [0.0,0.5,0.4,0.1,0.0,0.0,],
                [0.0,0.0,0.5,0.4,0.1,0.0,],
                [0.0,0.0,0.0,0.5,0.4,0.1,],
                [0.0,0.0,0.0,0.0,0.5,0.5],
                [0.0,0.0,0.0,0.0,0.0,1.0]
            ])
            X = np.concatenate(dataset[cname])
            lengths = list([len(x) for x in dataset[cname]])
        #       print("training class", cname)
        #       print(X.shape, lengths, len(lengths))
            hmm.fit(X, lengths=lengths)
            models[cname] = hmm
            print("==================================")
            print(cname)
            np.set_printoptions(precision=2, suppress=True)
            print(models[cname].transmat_)

        # =================================================================
        # la |l|~|a|
        if cname == "la":
            print(f"training {cname}")
            hmm = hmmlearn.hmm.MultinomialHMM(
              n_components=6, init_params='e', params='ste', verbose=True, n_iter=1000
            )
            startprob_prior_=np.array([0.2,0.7,0.1,0.0,0.0,0.0])
            transmat_prior=np.array([
                [0.5,0.4,0.1,0.0,0.0,0.0,],
                [0.0,0.5,0.4,0.1,0.0,0.0,],
                [0.0,0.0,0.5,0.4,0.1,0.0,],
                [0.0,0.0,0.0,0.5,0.4,0.1,],
                [0.0,0.0,0.0,0.0,0.5,0.5],
                [0.0,0.0,0.0,0.0,0.0,1.0]
            ])
            X = np.concatenate(dataset[cname])
            lengths = list([len(x) for x in dataset[cname]])
        #       print("training class", cname)
        #       print(X.shape, lengths, len(lengths))
            hmm.fit(X, lengths=lengths)
            models[cname] = hmm
            print("==================================")
            print(cname)
            np.set_printoptions(precision=2, suppress=True)
            print(models[cname].transmat_)

        # =================================================================
        # giadinh |g|~|i|~|a|~|silent|~|d|~|i|~|nh| 
        if cname == "giadinh":
            print(f"training {cname}")
            hmm = hmmlearn.hmm.MultinomialHMM(
                n_components=18, init_params='e', params='ste', verbose=True, n_iter=1000
            )
            hmm.startprob_ = np.array([0.5,0.2,0.1,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0])
            hmm.transmat_ = np.array([
            [0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2,0.0],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.3,0.2],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5],
            [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
            ])
            X = np.concatenate(dataset[cname])
            lengths = list([len(x) for x in dataset[cname]])
        #       print("training class", cname)
        #       print(X.shape, lengths, len(lengths))
            hmm.fit(X, lengths=lengths)
            models[cname] = hmm
            print("==================================")
            print(cname)
            np.set_printoptions(precision=2, suppress=True)
            print(models[cname].transmat_)

          
    #       print("Training done")

    print("Testing and Labeling")
    for true_cname in class_names:
        if true_cname[:4] == "test":
            print("==================================")
            print(true_cname)

            lname = true_cname[5:]
            totalWord = 0
            true = 0
            accuracy = 0

            for O in dataset[true_cname]:
                totalWord += 1
                scores = {}
                for cname, model in models.items():
                    if cname[:4] != "test":
                        score = model.score(O, [len(O)])
                        scores[cname] = score
#                 print(scores)
                srt = sorted(scores.items(), key=lambda x: x[1], reverse=True)
            #         print(srt[0])
                if srt[0][0] == lname:
                    true += 1
            accuracy = true/totalWord
            print("--------------------------------------------")
            print("!note: test_folder must contain wavs that it records exactly the word which be trained")
            print("accuracy: ", accuracy,", true: ", true,", total_word: ",totalWord)
            
    print("Exporting models")
    for label in class_names:
        if true_cname[:4] != "test":
            with open(os.path.join("Models", label + ".pkl"), "wb") as file: pk.dump(models[label], file)

  b = a[a_slice]


vectors (10116, 36)
centers (10, 36)
training la


         1       -5038.7426             +nan
         2       -4278.9012        +759.8414
         3       -4260.7182         +18.1830
         4       -4225.5719         +35.1463
         5       -4140.6516         +84.9203
         6       -3965.2941        +175.3575
         7       -3743.9959        +221.2981
         8       -3579.5375        +164.4585
         9       -3504.6529         +74.8846
        10       -3463.3442         +41.3086
        11       -3420.1750         +43.1692
        12       -3361.1926         +58.9824
        13       -3272.3269         +88.8657
        14       -3168.0716        +104.2553
        15       -3093.0531         +75.0185
        16       -3022.7719         +70.2811
        17       -2909.8869        +112.8851
        18       -2753.4780        +156.4089
        19       -2695.6743         +57.8037
        20       -2672.4253         +23.2490
        21       -2655.4133         +17.0119
        22       -2644.9840         +10.4294
        23

la
[[0.8  0.15 0.02 0.   0.02 0.  ]
 [0.19 0.56 0.   0.   0.24 0.01]
 [0.13 0.01 0.86 0.   0.   0.  ]
 [0.   0.   0.06 0.87 0.02 0.06]
 [0.01 0.15 0.   0.   0.71 0.13]
 [0.   0.03 0.   0.   0.1  0.86]]
training cua


         1       -4122.8456             +nan
         2       -2826.1033       +1296.7423
         3       -2798.1131         +27.9902
         4       -2734.3745         +63.7386
         5       -2585.4608        +148.9138
         6       -2378.2544        +207.2063
         7       -2220.2307        +158.0238
         8       -2113.6716        +106.5591
         9       -2037.6976         +75.9739
        10       -1990.3369         +47.3607
        11       -1956.6610         +33.6760
        12       -1927.7119         +28.9490
        13       -1900.2767         +27.4352
        14       -1875.5789         +24.6979
        15       -1856.2109         +19.3679
        16       -1843.2063         +13.0046
        17       -1834.3105          +8.8958
        18       -1827.7943          +6.5162
        19       -1822.8068          +4.9875
        20       -1818.7324          +4.0744
        21       -1815.1412          +3.5912
        22       -1811.7808          +3.3604
        23

cua
[[0.63 0.   0.   0.03 0.02 0.   0.   0.21 0.11]
 [0.   0.7  0.   0.09 0.2  0.01 0.   0.   0.  ]
 [0.   0.   0.65 0.   0.   0.35 0.   0.   0.  ]
 [0.   0.   0.   0.64 0.   0.   0.36 0.   0.  ]
 [0.   0.09 0.   0.03 0.75 0.03 0.1  0.   0.  ]
 [0.   0.   0.   0.   0.49 0.51 0.   0.   0.  ]
 [0.09 0.   0.   0.01 0.   0.   0.65 0.   0.25]
 [0.   0.   0.02 0.   0.   0.14 0.   0.72 0.11]
 [0.03 0.   0.09 0.   0.   0.22 0.   0.25 0.41]]
training nguoi


         1       -6387.8672             +nan
         2       -5950.7771        +437.0900
         3       -5904.5871         +46.1900
         4       -5808.9559         +95.6312
         5       -5611.2080        +197.7479
         6       -5300.6550        +310.5531
         7       -4900.0443        +400.6106
         8       -4372.7501        +527.2943
         9       -3932.8737        +439.8764
        10       -3698.1304        +234.7433
        11       -3585.6943        +112.4361
        12       -3489.6279         +96.0664
        13       -3413.5246         +76.1034
        14       -3366.6473         +46.8773
        15       -3330.4057         +36.2416
        16       -3284.5816         +45.8241
        17       -3232.0282         +52.5534
        18       -3200.0722         +31.9560
        19       -3178.0130         +22.0592
        20       -3154.3021         +23.7110
        21       -3120.7387         +33.5634
        22       -3088.7597         +31.9790
        23

nguoi
[[0.81 0.02 0.   0.03 0.02 0.01 0.   0.1 ]
 [0.08 0.79 0.03 0.01 0.01 0.   0.   0.08]
 [0.   0.15 0.58 0.14 0.   0.03 0.08 0.01]
 [0.   0.09 0.12 0.77 0.   0.   0.02 0.  ]
 [0.06 0.   0.   0.   0.92 0.01 0.   0.01]
 [0.   0.01 0.   0.   0.03 0.95 0.01 0.  ]
 [0.03 0.   0.04 0.03 0.   0.05 0.85 0.  ]
 [0.   0.   0.16 0.   0.   0.03 0.   0.81]]
training co


         2       -2152.9080        +655.1671
         3       -2144.9602          +7.9478
         4       -2132.9122         +12.0479
         5       -2110.2039         +22.7084
         6       -2059.9173         +50.2866
         7       -1962.8720         +97.0453
         8       -1854.4467        +108.4253
         9       -1766.4346         +88.0120
        10       -1684.5169         +81.9177
        11       -1605.4311         +79.0858
        12       -1532.3301         +73.1010
        13       -1495.7027         +36.6274
        14       -1480.2969         +15.4058
        15       -1470.5599          +9.7370
        16       -1462.9223          +7.6375
        17       -1456.4436          +6.4788
        18       -1451.0171          +5.4264
        19       -1446.7306          +4.2865
        20       -1443.4819          +3.2487
        21       -1440.6288          +2.8531
        22       -1436.9022          +3.7266
        23       -1429.1192          +7.7830
        24

co
[[0.72 0.   0.25 0.   0.   0.02]
 [0.   0.78 0.   0.   0.   0.22]
 [0.   0.   0.7  0.01 0.29 0.  ]
 [0.   0.16 0.   0.68 0.17 0.  ]
 [0.   0.04 0.   0.   0.54 0.41]
 [0.13 0.06 0.   0.   0.   0.81]]
training giadinh


         1       -5025.8048             +nan
         2       -3108.0974       +1917.7074
         3       -2537.9627        +570.1346
         4       -2375.1237        +162.8390
         5       -2312.6610         +62.4627
         6       -2278.6032         +34.0578
         7       -2249.5006         +29.1026
         8       -2211.9150         +37.5856
         9       -2175.5055         +36.4095
        10       -2152.4542         +23.0513
        11       -2136.4462         +16.0080
        12       -2121.4395         +15.0067
        13       -2110.1383         +11.3012
        14       -2104.7003          +5.4380
        15       -2101.3476          +3.3527
        16       -2098.1567          +3.1909
        17       -2095.5542          +2.6025
        18       -2093.6843          +1.8699
        19       -2092.0336          +1.6507
        20       -2090.0161          +2.0175
        21       -2088.1956          +1.8204
        22       -2087.0644          +1.1312
        23

giadinh
[[0.8  0.17 0.03 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.77 0.21 0.02 0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.   0.44 0.22 0.34 0.   0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.   0.   0.87 0.03 0.1  0.   0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.79 0.   0.21 0.   0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.56 0.22 0.22 0.   0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.62 0.3  0.08 0.   0.   0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.35 0.02 0.63 0.   0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.64 0.   0.36 0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.71 0.29 0.   0.   0.
  0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.3  0.7  