In [68]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
import random
from collections import defaultdict

result = defaultdict(list)

path_to_data = "./Data_Filtered"

In [69]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    ls = os.listdir(data_dir)
    files = [f for f in ls if f.endswith(".wav")]
    random.shuffle(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files]
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [70]:
class_names = ["Nha", "ThanhPho",  "Me", "YTe", "Hoc",]# "test_ThanhPho", "test_Me", "test_Nha", "test_YTe", "test_Hoc",]

datas = {}
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset", end=' - ')
    datas[cname] = get_class_data(os.path.join(path_to_data, cname))
    print(len(datas[cname]))
    datas[f"test_{cname}"] = datas[cname][-20:]
    datas[cname] = datas[cname][:-20]
    #datas[f"test_{cname}"] = get_class_data(os.path.join(path_to_data, f"test_{cname}"))
    
print("Done!!!")

Load Nha dataset - 121
Load ThanhPho dataset - 101
Load Me dataset - 108
Load YTe dataset - 123
Load Hoc dataset - 108
Done!!!


In [71]:
dict_components = {
    #   tʰa̤jŋ˨˩ fo˧˥ -> 5 âm vị -> 15 states
    "ThanhPho": 15,
    #  mɛ̰ʔ˨˩ -> 2 âm vị -> 6 states
    "Me": 6,
    #  i˧˧ te˧˥ -> 3 âm vị -> 9 states
    "YTe": 9,
    #  ha̰ʔwk˨ -> 3 âm vị -> 9 states
    "Hoc": 9,
    #  ɲa̤ː˨˩ -> 2 âm vị -> 6 states
    "Nha": 6,
} 

In [72]:
models = {}
for cname in class_names:
    class_vectors = datas[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    km = kmeans
    dataset[cname] = list([km.predict(v).reshape((-1,1)) for v in datas[cname]])
    dataset[f"test_{cname}"] = list([km.predict(v).reshape((-1,1)) for v in datas[f"test_{cname}"]])

    if cname[:4] != 'test':
        n = dict_components[cname]
        startprob = np.zeros(n)
        startprob[0] = 1.0
        transmat=np.diag(np.full(n,1))
        #transmat = np.array(dict_transmat[cname])
        
        hmm = hmmlearn.hmm.GMMHMM(
            n_components=n, 
            n_mix = 4, random_state=10, n_iter=500, verbose=True,
            params='mctw', init_params='mct',
            startprob_prior=startprob,
            transmat_prior=transmat,
        )
    
        X = np.concatenate(datas[cname])
        lengths = list([len(x) for x in datas[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        # FOR GMMHMM: NO NEED lengths parameter
        hmm.fit(X)
        models[cname] = hmm
print("Training done")

training class Nha
(15860, 36) [184, 140, 161, 220, 158, 181, 171, 143, 145, 140, 158, 156, 153, 149, 145, 171, 159, 173, 127, 122, 148, 140, 156, 212, 140, 225, 150, 168, 145, 179, 145, 152, 145, 155, 161, 186, 147, 99, 166, 145, 122, 186, 161, 163, 154, 145, 210, 158, 130, 143, 181, 189, 142, 137, 161, 135, 158, 168, 142, 148, 152, 154, 147, 186, 140, 152, 159, 166, 137, 145, 140, 152, 161, 132, 156, 137, 143, 173, 147, 140, 161, 143, 181, 168, 149, 145, 150, 132, 217, 130, 140, 207, 173, 168, 163, 179, 143, 155, 137, 204, 143] 101


         1    -1273450.8811             +nan
         2    -1144095.6495     +129355.2316
         3    -1130929.2165      +13166.4331
         4    -1127833.4970       +3095.7194
         5    -1126413.2195       +1420.2776
         6    -1125506.4827        +906.7367
         7    -1124855.7963        +650.6865
         8    -1124456.9435        +398.8528
         9    -1124158.5484        +298.3951
        10    -1123603.3277        +555.2207
        11    -1122782.2509        +821.0768
        12    -1122435.9473        +346.3036
        13    -1122291.2255        +144.7218
        14    -1122219.6768         +71.5487
        15    -1122170.2172         +49.4596
        16    -1122124.8390         +45.3782
        17    -1122077.2718         +47.5673
        18    -1122021.8633         +55.4085
        19    -1121961.8211         +60.0421
        20    -1121902.9425         +58.8787
        21    -1121842.2357         +60.7067
        22    -1121787.0410         +55.1947
        23

training class ThanhPho
(12685, 36) [233, 77, 128, 166, 128, 131, 166, 144, 149, 81, 148, 204, 175, 161, 133, 220, 243, 171, 162, 137, 102, 153, 167, 141, 192, 192, 181, 89, 202, 136, 126, 184, 147, 100, 152, 217, 132, 133, 192, 176, 233, 149, 181, 159, 169, 103, 135, 197, 154, 154, 145, 97, 152, 186, 166, 141, 74, 142, 154, 150, 166, 126, 272, 148, 126, 163, 167, 168, 148, 217, 160, 156, 167, 121, 140, 151, 160, 130, 170, 158, 159] 81


         1    -1160162.1968             +nan
         2     -986267.6549     +173894.5419
         3     -818419.5678     +167848.0871
         4     -590824.9053     +227594.6624
         5     -380771.7264     +210053.1789
         6      724938.3981    +1105710.1245
         7      725628.5626        +690.1645
         8      725985.9232        +357.3606
         9      726271.7468        +285.8236
        10      726630.1606        +358.4137
        11      726836.5334        +206.3728
        12      726987.2663        +150.7330
        13      727121.9326        +134.6663
        14      727254.2315        +132.2989
        15      727370.5692        +116.3377
        16      727451.0754         +80.5063
        17      727549.8777         +98.8023
        18      727643.9153         +94.0375
        19      727757.4638        +113.5485
        20      727838.4024         +80.9386
        21      727921.3180         +82.9156
        22      728014.6099         +93.2919
        23

training class Me
(11605, 36) [120, 123, 122, 122, 145, 145, 119, 135, 117, 127, 154, 125, 132, 130, 135, 148, 132, 120, 130, 124, 140, 145, 140, 137, 135, 120, 131, 132, 124, 119, 130, 117, 132, 140, 135, 140, 137, 143, 126, 123, 114, 131, 127, 161, 135, 130, 135, 118, 132, 126, 140, 135, 150, 124, 130, 130, 122, 158, 145, 117, 133, 143, 131, 123, 138, 123, 148, 127, 127, 133, 126, 113, 130, 137, 135, 158, 145, 126, 122, 122, 145, 137, 137, 132, 130, 132, 126, 115] 88


         1     -956661.3531             +nan
         2     -874285.5487      +82375.8044
         3     -860726.3788      +13559.1699
         4     -856387.1046       +4339.2742
         5     -854471.7734       +1915.3312
         6     -853428.2093       +1043.5641
         7     -852835.2200        +592.9894
         8     -852475.2624        +359.9576
         9     -852262.1718        +213.0906
        10     -852108.2880        +153.8839
        11     -851980.2017        +128.0863
        12     -851863.2897        +116.9120
        13     -851771.1844         +92.1053
        14     -851702.2156         +68.9688
        15     -851646.7354         +55.4801
        16     -851599.6690         +47.0664
        17     -851558.7480         +40.9210
        18     -851522.7026         +36.0454
        19     -851490.0077         +32.6949
        20     -851458.0736         +31.9341
        21     -851424.6367         +33.4369
        22     -851389.0374         +35.5993
        23

training class YTe
(13516, 36) [128, 125, 139, 120, 142, 130, 141, 120, 156, 119, 139, 139, 141, 134, 106, 112, 118, 144, 119, 142, 132, 120, 119, 120, 120, 128, 127, 146, 136, 128, 120, 154, 133, 113, 130, 140, 139, 118, 142, 147, 130, 145, 135, 145, 132, 142, 120, 126, 129, 125, 128, 135, 136, 129, 116, 150, 140, 136, 124, 141, 124, 118, 134, 122, 129, 141, 111, 117, 138, 114, 136, 147, 139, 119, 120, 147, 140, 130, 164, 146, 145, 118, 142, 128, 141, 111, 119, 128, 135, 126, 129, 131, 147, 145, 153, 145, 119, 123, 115, 125, 120, 120, 125] 103


         1    -1234473.2419             +nan
         2     -992840.3316     +241632.9103
         3     -752358.7963     +240481.5353
         4     -615941.5409     +136417.2554
         5     -371537.7541     +244403.7867
         6     -195226.6430     +176311.1111
         7      -39836.3937     +155390.2494
         8      304392.8796     +344229.2733
         9     1823930.2936    +1519537.4140
        10     1825298.8908       +1368.5972
        11     1827016.8220       +1717.9312
        12     1828284.8112       +1267.9892
        13     1828381.1962         +96.3850
        14     1828475.9923         +94.7961
        15     1828551.6737         +75.6814
        16     1828614.8840         +63.2103
        17     1828663.8755         +48.9914
        18     1828704.9237         +41.0482
        19     1828732.1681         +27.2445
        20     1828759.5944         +27.4263
        21     1828783.0463         +23.4519
        22     1828796.1173         +13.0710
        23

training class Hoc
(12215, 36) [155, 153, 148, 153, 140, 145, 148, 155, 140, 108, 155, 173, 120, 144, 122, 137, 158, 137, 145, 133, 168, 127, 173, 166, 141, 163, 95, 115, 116, 126, 163, 148, 130, 135, 108, 152, 161, 127, 120, 124, 126, 130, 135, 101, 155, 118, 173, 153, 116, 129, 155, 129, 124, 120, 171, 100, 163, 129, 139, 122, 120, 129, 143, 153, 137, 136, 104, 154, 132, 137, 104, 155, 137, 163, 161, 143, 131, 161, 133, 155, 122, 132, 130, 153, 158, 130, 119, 168] 88


         1    -1034176.3465             +nan
         2     -908863.4181     +125312.9284
         3     -876014.2953      +32849.1228
         4     -844562.8893      +31451.4060
         5     -788974.5328      +55588.3565
         6     -702371.1286      +86603.4042
         7     -277179.2917     +425191.8369
         8     -276781.8033        +397.4884
         9     -276520.8168        +260.9866
        10     -276331.1941        +189.6227
        11     -276180.0929        +151.1011
        12     -276035.8968        +144.1962
        13     -275919.2462        +116.6505
        14     -275814.2290        +105.0172
        15     -275739.8681         +74.3609
        16     -275671.0924         +68.7757
        17     -275592.7791         +78.3132
        18     -275521.1187         +71.6604
        19     -275470.6590         +50.4597
        20     -275426.1426         +44.5163
        21     -275384.5319         +41.6108
        22     -275356.1477         +28.3842
        23

Training done


       269     -274813.6802          +0.0099


In [73]:
print("Testing")
result = {}
for cname in class_names:
    true_cname = f"test_{cname}"
    true_predict = 0
#     for O in dataset[true_cname]:
    for O in datas[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
        predict = max(score, key=score.get)
        if predict == cname:
            true_predict += 1
#         print(true_cname, score, predict)
    result[true_cname] = f"QUANTITY: {true_predict}/{len(datas[true_cname])}\nACCURACY: {100*true_predict/len(datas[true_cname])}"

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


Testing


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

In [74]:
for k, v in result.items():
    print(k,'\n',v,'\n')

test_Nha 
 QUANTITY: 19/20
ACCURACY: 95.0 

test_ThanhPho 
 QUANTITY: 20/20
ACCURACY: 100.0 

test_Me 
 QUANTITY: 20/20
ACCURACY: 100.0 

test_YTe 
 QUANTITY: 18/20
ACCURACY: 90.0 

test_Hoc 
 QUANTITY: 19/20
ACCURACY: 95.0 



In [None]:
np.set_printoptions(precision=3, suppress=True)
for k, v in models.items():
    print(k,v.transmat_)

In [56]:
import pickle 
with open("m.pkl", "wb") as file:
    pickle.dump(models, file)