In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
import random
from collections import defaultdict
import pickle as pk

result = defaultdict(list)


In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    ls = os.listdir(data_dir)
    files = [f for f in ls if f.endswith(".wav")]
    random.shuffle(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files]
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

In [3]:
class_names = [ "toi", "dich",  "benh_nhan", "nguoi", "theo"]
datas = {}
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset", end=' - ')
    datas[cname] = get_class_data(os.path.join("train", cname))
#     dataset[cname] = get_class_data(os.path.join("train", cname))
    print(len(datas[cname]))
    datas[f"test_{cname}"] = datas[cname][-50:]
    datas[cname] = datas[cname][:-50]
    #datas[f"test_{cname}"] = get_class_data(os.path.join(path_to_data, f"test_{cname}"))
    
print("Done!!!")

Load toi dataset - 

  b = a[a_slice]


86
Load dich dataset - 120
Load benh_nhan dataset - 182
Load nguoi dataset - 181
Load theo dataset - 132
Done!!!


In [4]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in datas.items() if k[:4]!='test'], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

vectors (14550, 36)
centers (10, 36)
centers (10, 36)
Done


In [5]:
dict_components = {
    # toi |t|~|o|~|i|
    "toi": 9,
    # dich |d|~|i|~|ch|
    "dich": 9,
    # nguoi |ng|~|uo|~|i|
    "nguoi": 9,
    # theo |th|~|e|~|o|
    "theo": 9,
    # benh_nhan |b|~|e|~|nh|~|silent|~|nh|~|a|~|n| 
    "benh_nhan": 18,
}

In [6]:
models = {}
for cname in class_names:
    class_vectors = datas[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    km = kmeans
    dataset[cname] = list([km.predict(v).reshape((-1,1)) for v in datas[cname]])
    dataset[f"test_{cname}"] = list([km.predict(v).reshape((-1,1)) for v in datas[f"test_{cname}"]])

    if cname[:4] != 'test':
        n = dict_components[cname]
        startprob = np.zeros(n)
        startprob[0] = 1.0
        transmat=np.diag(np.full(n,1))
        #transmat = np.array(dict_transmat[cname])
        
        hmm = hmmlearn.hmm.GMMHMM(
            n_components=n, 
            n_mix = 4, random_state=10, n_iter=500, verbose=True,
            params='mctw', init_params='mct',
            startprob_prior=startprob,
            transmat_prior=transmat,
        )
    
        X = np.concatenate(datas[cname])
        lengths = list([len(x) for x in datas[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        # FOR GMMHMM: NO NEED lengths parameter
        hmm.fit(X)
        models[cname] = hmm
        with open(os.path.join("Models", cname + ".pkl"), "wb") as file: pk.dump(models[cname], file)
print("Training done")

training class toi
(1157, 36) [40, 32, 40, 67, 26, 28, 38, 19, 45, 33, 48, 19, 20, 38, 31, 28, 23, 18, 17, 32, 29, 24, 24, 32, 47, 69, 24, 20, 19, 53, 39, 42, 20, 24, 32, 17] 36


         1     -123780.6414             +nan
         2     -113952.7384       +9827.9030
         3     -111110.4038       +2842.3346
         4     -109726.9820       +1383.4218
         5     -108268.2081       +1458.7738
         6     -105131.1005       +3137.1076
         7       95551.9946     +200683.0952
         8       95708.1271        +156.1325
         9       95775.5335         +67.4064
        10       95790.4284         +14.8949
        11       95771.7861         -18.6423


training class dich
(1925, 36) [28, 22, 27, 47, 13, 16, 19, 37, 31, 12, 20, 40, 25, 21, 44, 29, 24, 21, 39, 33, 28, 19, 24, 32, 43, 23, 33, 27, 39, 28, 26, 29, 33, 19, 29, 34, 25, 21, 23, 21, 32, 44, 24, 28, 21, 40, 26, 24, 19, 28, 27, 31, 48, 26, 24, 23, 27, 19, 23, 46, 15, 24, 22, 29, 27, 19, 22, 28, 33, 22] 70


         1     -200527.8476             +nan
         2     -183578.4208      +16949.4268
         3     -179544.5935       +4033.8273
         4     -138651.5999      +40892.9936
         5     -138457.8608        +193.7391
         6     -138369.7645         +88.0963
         7     -138287.7470         +82.0175
         8     -138215.5673         +72.1797
         9     -138158.4770         +57.0902
        10     -138125.0759         +33.4011
        11     -138116.9304          +8.1455
        12     -138105.7057         +11.2247
        13     -138094.6517         +11.0539
        14     -138085.4835          +9.1682
        15     -138073.0617         +12.4218
        16     -138060.7222         +12.3395
        17     -138053.4304          +7.2917
        18     -138051.0734          +2.3571
        19     -138049.1814          +1.8920
        20     -138046.4007          +2.7807
        21     -138043.4607          +2.9400
        22     -138040.9451          +2.5156
        23

training class benh_nhan
(5643, 36) [37, 25, 36, 41, 44, 35, 39, 36, 51, 36, 34, 54, 27, 42, 63, 43, 45, 35, 45, 48, 44, 34, 48, 62, 63, 35, 31, 45, 45, 54, 75, 41, 42, 34, 50, 37, 67, 29, 47, 29, 35, 41, 52, 57, 30, 30, 44, 53, 41, 58, 39, 30, 52, 42, 39, 52, 31, 34, 37, 36, 43, 71, 37, 46, 34, 40, 32, 45, 26, 44, 42, 34, 41, 37, 41, 42, 72, 39, 37, 44, 38, 41, 51, 39, 70, 43, 37, 43, 32, 41, 60, 39, 33, 35, 35, 38, 57, 43, 33, 48, 35, 43, 33, 49, 41, 47, 48, 51, 28, 44, 44, 33, 46, 49, 42, 34, 46, 45, 39, 45, 41, 52, 32, 59, 34, 32, 74, 47, 42, 35, 43, 46] 132


         1     -581843.9712             +nan
         2     -541880.3768      +39963.5944
         3     -532847.6620       +9032.7148
         4     -530523.8085       +2323.8536
         5     -529519.9843       +1003.8242
         6     -528969.9149        +550.0693
         7     -528630.5536        +339.3614
         8     -528282.5834        +347.9702
         9     -485731.5467      +42551.0367
        10     -485570.3272        +161.2195
        11     -485429.6797        +140.6475
        12     -485329.7633         +99.9163
        13     -485226.5779        +103.1854
        14     -485138.2600         +88.3179
        15     -485056.5458         +81.7142
        16     -484990.6939         +65.8519
        17     -484925.4502         +65.2437
        18     -484867.8759         +57.5743
        19     -484819.4862         +48.3896
        20     -484772.5165         +46.9697
        21     -484730.5593         +41.9572
        22     -484685.8136         +44.7457
        23

training class nguoi
(3202, 36) [34, 20, 37, 13, 19, 23, 17, 28, 17, 17, 18, 18, 27, 29, 23, 19, 26, 36, 35, 17, 18, 15, 43, 20, 20, 19, 23, 33, 19, 20, 32, 20, 30, 15, 34, 24, 25, 17, 20, 19, 26, 22, 25, 27, 41, 36, 23, 23, 23, 15, 25, 30, 22, 29, 28, 22, 22, 18, 14, 21, 27, 42, 28, 21, 20, 22, 19, 30, 20, 16, 18, 23, 20, 30, 19, 20, 50, 21, 40, 51, 20, 26, 36, 23, 22, 38, 33, 21, 25, 24, 22, 31, 26, 19, 20, 48, 20, 19, 18, 26, 23, 26, 44, 14, 53, 20, 19, 23, 28, 23, 19, 35, 20, 15, 16, 21, 14, 22, 17, 23, 16, 38, 23, 20, 22, 20, 18, 23, 23, 13, 26] 131


         1     -324094.5361             +nan
         2     -304714.6888      +19379.8473
         3     -301046.0071       +3668.6817
         4     -275410.4539      +25635.5532
         5     -258194.4547      +17215.9992
         6     -257849.6079        +344.8469
         7     -257559.9045        +289.7034
         8     -257365.7936        +194.1109
         9     -257224.4379        +141.3556
        10     -257118.1537        +106.2842
        11     -257016.7434        +101.4103
        12     -256927.7376         +89.0059
        13     -256859.4547         +68.2829
        14     -256792.7565         +66.6982
        15     -256730.1905         +62.5660
        16     -256682.6658         +47.5247
        17     -256652.3011         +30.3646
        18     -256619.4076         +32.8935
        19     -256589.0066         +30.4010
        20     -256573.3832         +15.6234
        21     -256562.0665         +11.3167
        22     -256549.3323         +12.7343
        23

training class theo
(2623, 36) [27, 32, 44, 33, 27, 39, 39, 63, 25, 45, 46, 21, 30, 23, 29, 23, 14, 48, 31, 11, 46, 21, 46, 30, 32, 32, 42, 22, 25, 17, 19, 13, 27, 39, 29, 30, 25, 33, 21, 37, 43, 34, 23, 64, 26, 39, 42, 26, 42, 14, 22, 15, 25, 41, 30, 45, 39, 32, 18, 28, 25, 57, 31, 21, 51, 59, 27, 28, 28, 27, 56, 27, 27, 33, 30, 15, 30, 42, 25, 30, 35, 35] 82


         1     -281262.6880             +nan
         2     -263339.3842      +17923.3037
         3     -258921.4012       +4417.9830
         4     -257691.1089       +1230.2923
         5     -257284.8573        +406.2516
         6     -257032.8866        +251.9707
         7     -256737.5980        +295.2885
         8     -256561.3272        +176.2709
         9     -256449.9858        +111.3414
        10     -256338.2052        +111.7806
        11     -256298.3981         +39.8071
        12     -256262.7215         +35.6767
        13     -256217.5019         +45.2195
        14     -256177.5743         +39.9276
        15     -256159.5967         +17.9776
        16     -256148.8309         +10.7657
        17     -256121.1313         +27.6996
        18     -256089.4251         +31.7062
        19     -256067.7538         +21.6713
        20     -256055.8834         +11.8704
        21     -256037.2705         +18.6129
        22     -256023.3451         +13.9254
        23

Training done


        71     -255968.9505          +0.0633
        72     -255968.9325          +0.0180
        73     -255968.9270          +0.0055


In [7]:
print("Testing")
result = {}
for cname in class_names:
    true_cname = f"test_{cname}"
    true_predict = 0
#     for O in dataset[true_cname]:
    for O in datas[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
        predict = max(score, key=score.get)
        if predict == cname:
            true_predict += 1
#         print(true_cname, score, predict)
    result[true_cname] = f"QUANTITY: {true_predict}/{len(datas[true_cname])}\nACCURACY: {100*true_predict/len(datas[true_cname])}"

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


Testing


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

In [8]:
for k, v in result.items():
    print(k,'\n',v,'\n')

test_toi 
 QUANTITY: 45/50
ACCURACY: 90.0 

test_dich 
 QUANTITY: 48/50
ACCURACY: 96.0 

test_benh_nhan 
 QUANTITY: 50/50
ACCURACY: 100.0 

test_nguoi 
 QUANTITY: 48/50
ACCURACY: 96.0 

test_theo 
 QUANTITY: 49/50
ACCURACY: 98.0 



In [20]:
import pickle 
with open("m.pkl", "wb") as file:
    pickle.dump(models, file)

In [9]:
np.set_printoptions(precision=3, suppress=True)
for k, v in models.items():
    print(k,v.transmat_)

toi [[0.863 0.    0.014 0.031 0.014 0.    0.    0.077 0.   ]
 [0.    0.679 0.    0.    0.07  0.057 0.174 0.021 0.   ]
 [0.028 0.    0.822 0.027 0.    0.    0.    0.123 0.   ]
 [0.031 0.    0.    0.884 0.    0.    0.    0.085 0.   ]
 [0.038 0.016 0.021 0.005 0.893 0.    0.    0.027 0.   ]
 [0.    0.    0.    0.    0.    0.852 0.085 0.    0.062]
 [0.    0.    0.    0.    0.013 0.    0.808 0.    0.178]
 [0.009 0.289 0.028 0.019 0.    0.    0.    0.655 0.   ]
 [0.022 0.072 0.017 0.    0.039 0.    0.    0.    0.851]]
dich [[0.839 0.    0.007 0.029 0.005 0.097 0.    0.    0.022]
 [0.    0.784 0.    0.    0.216 0.    0.    0.    0.   ]
 [0.    0.17  0.635 0.    0.146 0.    0.    0.049 0.   ]
 [0.    0.011 0.017 0.761 0.    0.006 0.    0.    0.205]
 [0.224 0.    0.    0.057 0.678 0.04  0.    0.    0.   ]
 [0.03  0.    0.006 0.016 0.009 0.845 0.094 0.    0.   ]
 [0.012 0.    0.052 0.063 0.012 0.017 0.845 0.    0.   ]
 [0.    0.    0.    0.    0.16  0.    0.    0.84  0.   ]
 [0.    0.022 0.222 0

In [10]:
for k, v in models.items():
    print(k,v.startprob_)

toi [0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111]
dich [0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111]
benh_nhan [0.056 0.056 0.056 0.056 0.056 0.056 0.056 0.056 0.056 0.056 0.056 0.056
 0.056 0.056 0.056 0.056 0.056 0.056]
nguoi [0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111]
theo [0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111 0.111]
