In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

  from collections import Sequence
  from collections import Iterable
  from collections import Mapping, namedtuple, defaultdict, Sequence


In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [5]:
class_names = ["toi", "trong", "truoc", "nhan_vien", "gia_dinh", "test_toi", "test_trong", "test_truoc", 
               "test_nhan_vien", "test_gia_dinh"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)


Load toi dataset


  b = a[a_slice]


Load trong dataset
Load truoc dataset
Load nhan_vien dataset
Load gia_dinh dataset
Load test_toi dataset
Load test_trong dataset
Load test_truoc dataset
Load test_nhan_vien dataset
Load test_gia_dinh dataset
vectors (19071, 36)


In [6]:
models = {}

for cname in class_names:
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    hmm = hmmlearn.hmm.GMMHMM(
        n_components=6, n_mix = 2, random_state=42, n_iter=1000, verbose=True,
        params='mctw',
        init_params='m',
#         startprob_prior = np.array([1.0,0.0,0.0,0.0,0.0]),
#         transmat_prior = np.array([
#             [0.7,0.3,0.0,0.0,0.0],
#             [0.0,0.7,0.3,0.0,0.0],
#             [0.0,0.0,0.7,0.3,0.0],
#             [0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,1.0],
#         ])
    )
    hmm.startprob_ = np.array([1.0,0.0,0.0,0.0,0.0, 0.0])
#     hmm.transmat_ = np.array([
#             [0.7,0.3,0.0,0.0,0.0],
#             [0.0,0.7,0.3,0.0,0.0],
#             [0.0,0.0,0.7,0.3,0.0],
#             [0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,1.0],
#         ])

    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X)
        models[cname] = hmm
print("Training done")

training class toi
(2269, 36) [22, 36, 28, 39, 33, 23, 23, 21, 23, 40, 48, 12, 36, 18, 25, 12, 22, 14, 23, 34, 22, 10, 84, 47, 22, 29, 25, 30, 17, 19, 31, 24, 27, 19, 38, 25, 21, 84, 24, 17, 17, 17, 16, 36, 17, 18, 18, 32, 33, 18, 47, 19, 18, 24, 33, 16, 23, 31, 37, 17, 49, 31, 30, 24, 82, 17, 16, 57, 24, 31, 16, 20, 20, 20, 49, 31, 34, 23, 22, 39] 80


         1     -232562.0841             +nan
         2     -222998.6040       +9563.4801
         3     -220121.8832       +2876.7209
         4     -219577.2944        +544.5888
         5     -219428.2236        +149.0708
         6     -219306.3538        +121.8698
         7     -219224.4838         +81.8700
         8     -219154.2286         +70.2552
         9     -219098.1586         +56.0700
        10     -219052.8686         +45.2900
        11     -218979.6957         +73.1730
        12     -218867.3381        +112.3576
        13     -218751.8447        +115.4934
        14     -218640.7991        +111.0456
        15     -218521.2808        +119.5183
        16     -218406.7541        +114.5267
        17     -218332.1784         +74.5757
        18     -218284.2009         +47.9775
        19     -218243.5839         +40.6170
        20     -218224.2483         +19.3356
        21     -218214.1062         +10.1422
        22     -218208.8604          +5.2458
        23

training class trong
(3738, 36) [20, 32, 22, 17, 25, 26, 22, 18, 21, 22, 16, 19, 21, 19, 25, 21, 20, 26, 24, 31, 21, 32, 19, 18, 23, 21, 20, 25, 22, 19, 15, 31, 24, 19, 24, 22, 28, 23, 20, 25, 22, 19, 18, 26, 20, 16, 18, 23, 19, 21, 31, 21, 21, 26, 32, 29, 21, 20, 18, 33, 20, 23, 19, 26, 25, 18, 15, 27, 13, 17, 26, 22, 23, 25, 24, 25, 18, 32, 18, 18, 25, 18, 20, 16, 18, 20, 19, 19, 23, 35, 19, 25, 20, 19, 25, 18, 23, 17, 28, 27, 21, 20, 17, 22, 20, 21, 16, 19, 28, 27, 23, 23, 23, 24, 19, 24, 21, 29, 24, 19, 20, 23, 19, 20, 22, 29, 22, 18, 23, 28, 15, 17, 23, 26, 17, 18, 16, 28, 29, 22, 22, 29, 25, 24, 24, 14, 23, 27, 14, 20, 19, 21, 21, 22, 26, 19, 21, 19, 27, 22, 26, 14, 25, 23, 25, 21, 26, 20, 24] 169


         1     -388521.5609             +nan
         2     -367039.3710      +21482.1899
         3     -364600.7179       +2438.6531
         4     -363766.2564        +834.4616
         5     -363300.8230        +465.4333
         6     -362977.7329        +323.0902
         7     -362780.2835        +197.4493
         8     -362589.6689        +190.6146
         9     -362442.1641        +147.5048
        10     -362320.4682        +121.6959
        11     -362229.8159         +90.6524
        12     -362147.1482         +82.6677
        13     -362067.2437         +79.9045
        14     -362002.7081         +64.5357
        15     -361953.5489         +49.1592
        16     -361916.3333         +37.2155
        17     -361888.2785         +28.0549
        18     -361856.7213         +31.5572
        19     -361842.5403         +14.1810
        20     -361830.9506         +11.5896
        21     -361821.2370          +9.7137
        22     -361812.4922          +8.7448
        23

training class truoc
(2227, 36) [27, 20, 41, 31, 24, 26, 41, 21, 31, 26, 18, 23, 32, 23, 26, 21, 21, 37, 36, 26, 31, 15, 17, 19, 23, 41, 25, 31, 31, 26, 31, 26, 19, 35, 21, 22, 24, 20, 11, 31, 31, 22, 29, 33, 22, 29, 22, 23, 24, 31, 21, 31, 26, 25, 20, 27, 19, 28, 27, 36, 25, 35, 23, 31, 21, 41, 41, 36, 27, 61, 43, 43, 35, 36, 20, 19, 19, 46, 31, 18] 80


         1     -232564.0341             +nan
         2     -218895.3149      +13668.7192
         3     -216834.7499       +2060.5650
         4     -216319.5405        +515.2095
         5     -216002.1082        +317.4323
         6     -215897.3326        +104.7756
         7     -215830.7318         +66.6009
         8     -215778.8870         +51.8447
         9     -215741.6976         +37.1894
        10     -215721.5032         +20.1944
        11     -215704.6931         +16.8101
        12     -215691.9961         +12.6970
        13     -215682.2807          +9.7153
        14     -215674.3626          +7.9181
        15     -215664.5618          +9.8007
        16     -215644.5111         +20.0507
        17     -215630.2529         +14.2582
        18     -215625.2536          +4.9993
        19     -215621.6330          +3.6206
        20     -215616.9316          +4.7014
        21     -215607.7991          +9.1325
        22     -215599.8450          +7.9542
        23

training class nhan_vien
(3680, 36) [50, 53, 65, 29, 33, 53, 50, 55, 31, 44, 40, 42, 36, 42, 46, 32, 40, 55, 39, 42, 37, 38, 36, 29, 55, 38, 68, 57, 33, 55, 55, 43, 53, 48, 55, 48, 53, 49, 62, 40, 49, 50, 41, 42, 53, 55, 30, 38, 78, 52, 56, 40, 38, 35, 44, 50, 38, 41, 36, 34, 63, 54, 65, 29, 36, 34, 35, 63, 44, 45, 38, 40, 43, 43, 33, 36, 39, 44, 38, 43, 32, 54] 82


         1     -362015.3092             +nan
         2     -349280.5081      +12734.8011
         3     -345943.0340       +3337.4741
         4     -345001.2736        +941.7604
         5     -344484.4767        +516.7970
         6     -344207.6127        +276.8640
         7     -344000.8928        +206.7199
         8     -343840.3241        +160.5688
         9     -343701.1821        +139.1419
        10     -343573.0440        +128.1381
        11     -343457.8060        +115.2380
        12     -343361.8628         +95.9432
        13     -343274.9248         +86.9379
        14     -343214.2010         +60.7239
        15     -343163.3987         +50.8022
        16     -343124.1137         +39.2850
        17     -343094.1701         +29.9436
        18     -343065.3256         +28.8445
        19     -343033.1084         +32.2172
        20     -343001.3083         +31.8001
        21     -342969.3527         +31.9556
        22     -342944.0768         +25.2759
        23

training class gia_dinh
(3826, 36) [51, 46, 55, 43, 35, 66, 41, 76, 47, 40, 46, 71, 45, 51, 40, 51, 41, 41, 53, 47, 61, 37, 51, 51, 51, 47, 36, 44, 43, 46, 46, 43, 39, 49, 44, 46, 51, 53, 37, 49, 33, 51, 51, 37, 46, 41, 66, 39, 37, 49, 45, 47, 31, 41, 41, 46, 45, 71, 43, 66, 51, 56, 49, 51, 71, 33, 55, 35, 49, 41, 41, 51, 51, 61, 45, 43, 61, 41, 66, 39] 80


         1     -405814.4448             +nan
         2     -387155.4716      +18658.9732
         3     -383223.0495       +3932.4221
         4     -381879.9907       +1343.0588
         5     -381086.1360        +793.8547
         6     -380397.5614        +688.5746
         7     -379875.7562        +521.8052
         8     -379540.9736        +334.7826
         9     -379270.4803        +270.4933
        10     -379119.1668        +151.3136
        11     -379030.2656         +88.9012
        12     -378976.4024         +53.8632
        13     -378930.2201         +46.1823
        14     -378872.6642         +57.5560
        15     -378848.8583         +23.8059
        16     -378831.1378         +17.7205
        17     -378810.2955         +20.8424
        18     -378799.7290         +10.5665
        19     -378793.4167          +6.3122
        20     -378788.2406          +5.1761
        21     -378783.0360          +5.2046
        22     -378776.9545          +6.0815
        23

Training done


        67     -378256.3455          +0.0436
        68     -378256.3212          +0.0243
        69     -378256.3083          +0.0129
        70     -378256.3016          +0.0067


In [7]:
print("Testing")
miss = {}
test_name = { "test_toi", "test_song", "test_truoc", "test_nhan_vien", "test_gia_dinh"}
for true_cname in test_name:
    kt = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        pre = max(inverse)[1]
        print(true_cname, score, pre)
        if pre != true_cname[5:]:
            kt +=1
    miss[true_cname] = kt
print(miss)

Testing
test_gia_dinh {'toi': -7049.10545635477, 'trong': -7314.762307790923, 'truoc': -7392.4952671252195, 'nhan_vien': -7187.929821430053, 'gia_dinh': -6765.3010811142485} gia_dinh
test_gia_dinh {'toi': -4166.4293495251, 'trong': -4359.920307921832, 'truoc': -4453.194743684829, 'nhan_vien': -4375.023125680764, 'gia_dinh': -4006.244066802912} gia_dinh
test_gia_dinh {'toi': -4569.877055652338, 'trong': -4664.928658034754, 'truoc': -4763.140738238798, 'nhan_vien': -4561.867350291636, 'gia_dinh': -4255.766475877744} gia_dinh
test_gia_dinh {'toi': -4661.549532072559, 'trong': -4743.795751137741, 'truoc': -4923.571048487526, 'nhan_vien': -4661.287952729645, 'gia_dinh': -4409.028947340244} gia_dinh
test_gia_dinh {'toi': -4851.871170114552, 'trong': -5030.575470702249, 'truoc': -5078.20675841581, 'nhan_vien': -4925.33342200247, 'gia_dinh': -4562.481772355831} gia_dinh
test_gia_dinh {'toi': -6465.859671994828, 'trong': -6853.169574531901, 'truoc': -6933.533784424791, 'nhan_vien': -6673.061471

KeyError: 'test_song'

In [None]:
models

In [None]:
print("Testing")
miss = {}
class_names = ["toi", "song", "truoc", "nhan_vien", "gia_dinh"]
for true_cname in class_names:
    kt = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        pre = max(inverse)[1]
        print(true_cname, score, pre)
        if pre != true_cname:
            kt +=1
    miss[true_cname] = kt
print(miss)

In [None]:
# import pickle 
# with open("output.pkl", "wb") as file:
#     pickle.dump(models, file)

In [None]:
!ls