In [15]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
import random
from collections import defaultdict
import pickle as pk

result = defaultdict(list)


In [16]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    ls = os.listdir(data_dir)
    files = [f for f in ls if f.endswith(".wav")]
    random.shuffle(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files]
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

In [17]:
class_names = [ "toi", "dich",  "benh_nhan", "nguoi", "theo"]
datas = {}
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset", end=' - ')
    datas[cname] = get_class_data(os.path.join("train", cname))
#     dataset[cname] = get_class_data(os.path.join("train", cname))
    print(len(datas[cname]))
    datas[f"test_{cname}"] = datas[cname][-50:]
    datas[cname] = datas[cname][:-50]
    #datas[f"test_{cname}"] = get_class_data(os.path.join(path_to_data, f"test_{cname}"))
    
print("Done!!!")

Load toi dataset - 

  b = a[a_slice]


86
Load dich dataset - 120
Load benh_nhan dataset - 182
Load nguoi dataset - 181
Load theo dataset - 132
Done!!!


In [18]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in datas.items() if k[:4]!='test'], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print("Done")

vectors (14652, 36)
centers (10, 36)
centers (10, 36)
Done


In [19]:
dict_components = {
    # toi |t|~|o|~|i|
    "toi": 9,
    # dich |d|~|i|~|ch|
    "dich": 9,
    # nguoi |ng|~|uo|~|i|
    "nguoi": 9,
    # theo |th|~|e|~|o|
    "theo": 9,
    # benh_nhan |b|~|e|~|nh|~|silent|~|nh|~|a|~|n| 
    "benh_nhan": 18,
}

In [12]:
models = {}
for cname in class_names:
    class_vectors = datas[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    km = kmeans
    dataset[cname] = list([km.predict(v).reshape((-1,1)) for v in datas[cname]])
    dataset[f"test_{cname}"] = list([km.predict(v).reshape((-1,1)) for v in datas[f"test_{cname}"]])

    if cname[:4] != 'test':
        n = dict_components[cname]
        startprob = np.zeros(n)
        startprob[0] = 1.0
        transmat=np.diag(np.full(n,1))
        #transmat = np.array(dict_transmat[cname])
        
        hmm = hmmlearn.hmm.GMMHMM(
            n_components=n, 
            n_mix = 4, random_state=10, n_iter=500, verbose=True,
            params='mctw', init_params='mct',
            startprob_prior=startprob,
            transmat_prior=transmat,
        )
    
        X = np.concatenate(datas[cname])
        lengths = list([len(x) for x in datas[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        # FOR GMMHMM: NO NEED lengths parameter
        hmm.fit(X)
        models[cname] = hmm
        with open(os.path.join("Models", cname + ".pkl"), "wb") as file: pk.dump(models[cname], file)
print("Training done")

training class toi
(1212, 36) [32, 62, 39, 48, 40, 36, 53, 20, 30, 69, 22, 40, 32, 20, 31, 32, 31, 45, 28, 31, 23, 47, 32, 33, 17, 12, 39, 18, 21, 37, 23, 42, 25, 26, 49, 27] 36


         1     -130714.9840             +nan
         2     -120401.2723      +10313.7118
         3     -117550.7542       +2850.5180
         4     -116718.1464        +832.6078
         5     -116367.1439        +351.0025
         6     -116162.7347        +204.4092
         7     -116016.2996        +146.4351
         8     -115763.7970        +252.5026
         9     -114828.0328        +935.7642
        10     -112353.7365       +2474.2962
        11      -96369.4846      +15984.2519
        12       89160.0392     +185529.5239
        13       89212.3338         +52.2946
        14       89213.7457          +1.4119
        15       89214.6792          +0.9336
        16       89216.1308          +1.4515
        17       89217.4987          +1.3679
        18       89217.8579          +0.3592
        19       89218.7636          +0.9057
        20       89222.0632          +3.2996
        21       89222.4028          +0.3396
        22       89222.4124          +0.0096


training class dich
(1903, 36) [21, 21, 19, 33, 16, 21, 33, 40, 44, 24, 27, 28, 32, 29, 25, 48, 19, 27, 29, 33, 46, 15, 23, 23, 56, 19, 33, 20, 23, 20, 24, 19, 16, 27, 21, 19, 21, 43, 25, 18, 29, 24, 26, 29, 32, 20, 33, 40, 24, 27, 40, 19, 21, 31, 19, 39, 26, 39, 24, 28, 23, 31, 34, 33, 21, 34, 18, 27, 13, 19] 70


         1     -198861.8486             +nan
         2     -185036.7436      +13825.1051
         3     -182129.0842       +2907.6593
         4     -181365.6910        +763.3933
         5     -181075.7489        +289.9420
         6     -180934.3650        +141.3839
         7     -180819.7742        +114.5908
         8     -180711.4334        +108.3408
         9     -180558.4451        +152.9882
        10     -180431.8304        +126.6148
        11     -180344.7095         +87.1208
        12     -180272.1849         +72.5246
        13     -180219.1562         +53.0287
        14     -180208.5098         +10.6464
        15     -180197.5240         +10.9859
        16     -180154.5134         +43.0106
        17     -180137.2319         +17.2815
        18     -180126.3966         +10.8353
        19     -180116.9214          +9.4752
        20     -180106.3807         +10.5407
        21     -180100.3439          +6.0367
        22     -180094.4100          +5.9339
        23

training class benh_nhan
(5587, 36) [35, 41, 30, 46, 30, 37, 70, 34, 49, 39, 45, 37, 41, 39, 57, 51, 30, 41, 43, 30, 43, 46, 48, 34, 50, 44, 37, 39, 41, 44, 43, 39, 39, 46, 39, 41, 38, 38, 69, 42, 59, 45, 38, 32, 46, 33, 35, 44, 48, 33, 34, 40, 42, 32, 41, 25, 39, 30, 55, 33, 46, 47, 59, 36, 33, 34, 34, 33, 41, 42, 52, 41, 41, 63, 44, 37, 41, 43, 54, 64, 57, 75, 35, 45, 54, 28, 45, 29, 45, 47, 46, 38, 37, 29, 45, 32, 34, 36, 35, 62, 51, 74, 44, 48, 27, 45, 38, 43, 28, 41, 53, 39, 46, 32, 35, 30, 32, 37, 31, 51, 42, 46, 43, 37, 52, 26, 67, 44, 48, 30, 47, 72] 132


         1     -580448.3875             +nan
         2     -541474.9766      +38973.4109
         3     -532223.7385       +9251.2381
         4     -529902.2398       +2321.4986
         5     -528785.9538       +1116.2860
         6     -527831.5511        +954.4028
         7     -527001.1423        +830.4088
         8     -526723.6381        +277.5042
         9     -526430.7394        +292.8987
        10     -526278.8184        +151.9210
        11     -526138.9935        +139.8249
        12     -525987.7019        +151.2916
        13     -525838.2591        +149.4428
        14     -525713.9363        +124.3227
        15     -525624.2010         +89.7353
        16     -525566.3819         +57.8192
        17     -525505.6017         +60.7802
        18     -525464.3841         +41.2175
        19     -525438.5911         +25.7931
        20     -525404.8003         +33.7908
        21     -525354.0502         +50.7501
        22     -525287.1703         +66.8799
        23

training class nguoi
(3235, 36) [28, 18, 15, 19, 36, 21, 23, 15, 28, 20, 19, 30, 20, 20, 35, 25, 25, 16, 24, 23, 22, 22, 48, 20, 19, 18, 19, 26, 14, 20, 22, 18, 19, 36, 18, 19, 27, 19, 27, 17, 19, 16, 27, 25, 20, 28, 33, 16, 18, 20, 17, 21, 33, 20, 32, 41, 18, 26, 22, 30, 44, 28, 19, 53, 20, 33, 17, 22, 28, 29, 28, 23, 18, 47, 26, 25, 42, 29, 20, 24, 37, 20, 19, 18, 23, 18, 19, 31, 15, 22, 36, 23, 23, 29, 22, 23, 21, 38, 17, 16, 23, 22, 22, 38, 25, 23, 30, 25, 18, 31, 16, 34, 17, 26, 34, 20, 21, 30, 14, 51, 20, 24, 25, 55, 30, 19, 21, 24, 26, 19, 15] 131


         1     -329018.6467             +nan
         2     -310037.6720      +18980.9748
         3     -305258.3395       +4779.3325
         4     -271973.8560      +33284.4836
         5     -261862.2045      +10111.6514
         6     -261568.9000        +293.3045
         7     -261361.6007        +207.2993
         8     -261190.2297        +171.3710
         9     -261066.9397        +123.2900
        10     -260949.9596        +116.9801
        11     -260895.1234         +54.8362
        12     -260869.9382         +25.1852
        13     -260862.7027          +7.2355
        14     -260839.3929         +23.3098
        15     -260818.5650         +20.8279
        16     -260798.8618         +19.7032
        17     -260782.7427         +16.1190
        18     -260767.7537         +14.9891
        19     -260752.9875         +14.7662
        20     -260742.8386         +10.1489
        21     -260736.2868          +6.5518
        22     -260730.8814          +5.4054
        23

training class theo
(2683, 36) [45, 34, 44, 25, 25, 30, 33, 21, 42, 27, 39, 57, 40, 23, 27, 25, 11, 64, 28, 27, 22, 30, 29, 59, 17, 21, 31, 46, 22, 13, 30, 39, 56, 27, 43, 32, 21, 26, 28, 21, 33, 48, 40, 30, 23, 46, 39, 14, 23, 28, 27, 22, 25, 35, 38, 39, 29, 30, 39, 46, 35, 34, 50, 42, 30, 63, 14, 42, 33, 22, 29, 23, 41, 39, 39, 31, 33, 32, 24, 30, 30, 33] 82


         1     -288380.7898             +nan
         2     -270539.2195      +17841.5703
         3     -266691.6570       +3847.5625
         4     -265317.4591       +1374.1979
         5     -264817.9937        +499.4654
         6     -264541.9324        +276.0614
         7     -264367.5106        +174.4218
         8     -264217.9876        +149.5230
         9     -263971.9879        +245.9997
        10     -262247.9006       +1724.0872
        11     -221529.9217      +40717.9790
        12     -221461.7585         +68.1632
        13     -221424.3914         +37.3670
        14     -221403.1345         +21.2570
        15     -221382.7096         +20.4249
        16     -221344.8009         +37.9087
        17     -221321.5184         +23.2825
        18     -221305.5010         +16.0173
        19     -221294.3110         +11.1901
        20     -221282.9046         +11.4064
        21     -221274.6234          +8.2812
        22     -221266.9299          +7.6935
        23

Training done


       110     -221104.2310          +0.0027


In [13]:
print("Testing")
result = {}
for cname in class_names:
    true_cname = f"test_{cname}"
    true_predict = 0
#     for O in dataset[true_cname]:
    for O in datas[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
        predict = max(score, key=score.get)
        if predict == cname:
            true_predict += 1
#         print(true_cname, score, predict)
    result[true_cname] = f"QUANTITY: {true_predict}/{len(datas[true_cname])}\nACCURACY: {100*true_predict/len(datas[true_cname])}"

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance


Testing


Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate

In [14]:
for k, v in result.items():
    print(k,'\n',v,'\n')

test_toi 
 QUANTITY: 44/50
ACCURACY: 88.0 

test_dich 
 QUANTITY: 50/50
ACCURACY: 100.0 

test_benh_nhan 
 QUANTITY: 50/50
ACCURACY: 100.0 

test_nguoi 
 QUANTITY: 48/50
ACCURACY: 96.0 

test_theo 
 QUANTITY: 47/50
ACCURACY: 94.0 



In [20]:
import pickle 
with open("m.pkl", "wb") as file:
    pickle.dump(models, file)