In [2]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [20]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [None]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [15]:
class_names = ["toi", "song", "truoc", "nhan_vien", "gia_dinh", "test_toi", "test_song", "test_truoc", 
               "test_nhan_vien", "test_gia_dinh"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)


Load toi dataset


  b = a[a_slice]


Load song dataset
Load truoc dataset
Load nhan_vien dataset
Load gia_dinh dataset
Load test_toi dataset
Load test_song dataset
Load test_truoc dataset
Load test_nhan_vien dataset
Load test_gia_dinh dataset
vectors (21611, 36)


In [17]:
models = {}

for cname in class_names:
    hmm = hmmlearn.hmm.GMMHMM(
        n_mix = 2, random_state=42, n_iter=1000, verbose=True,
        params='mctw',
        init_params='mst',
    )
    
    if cname == 'truoc':
        hmm.n_components = 6
        hmm.startprob_ = np.array([0.8, 0.2, 0.0, 0.0, 0.0, 0.0])
        hmm.transmat_ = np.array([
                [0.7,0.3,0.0,0.0,0.0,0.0],
                [0.0,0.7,0.3,0.0,0.0,0.0],
                [0.0,0.0,0.7,0.3,0.0,0.0],
                [0.0,0.0,0.0,1.0,0.0,0.0],
                [0.0,0.0,0.0,0.0,0.7,0.3],
                [0.0,0.0,0.0,0.0,0.0,1.0],
            ])
    else:
        hmm.n_components = 7
        hmm.startprob_ = np.array([1.0,0.0,0.0,0.0,0.0, 0.0,0.0])
        hmm.transmat_ = np.array([
                [0.7,0.3,0.0,0.0,0.0,0.0,0.0],
                [0.0,0.7,0.3,0.0,0.0,0.0,0.0],
                [0.0,0.0,0.7,0.3,0.0,0.0,0.0],
                [0.0,0.0,0.0,0.7,0.3,0.0,0.0],
                [0.0,0.0,0.0,0.0,0.7,0.3,0.0],
                [0.0,0.0,0.0,0.0,0.0,0.7,0.3],
                [0.0,0.0,0.0,0.0,0.0,0.0,1.0],
            ])

    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X)
        models[cname] = hmm
print("Training done")

training class toi
(3490, 36) [22, 389, 36, 126, 28, 39, 98, 33, 23, 27, 21, 20, 23, 40, 36, 18, 25, 12, 22, 19, 14, 34, 22, 30, 47, 22, 29, 21, 17, 19, 31, 105, 24, 27, 19, 38, 25, 21, 84, 24, 22, 17, 17, 16, 142, 21, 36, 17, 18, 20, 18, 18, 32, 33, 29, 18, 47, 17, 19, 20, 18, 17, 49, 14, 31, 25, 18, 30, 24, 82, 26, 17, 16, 15, 24, 31, 16, 169, 20, 20, 149, 13, 123, 22, 34, 23, 131, 22, 24] 89


         1     -340834.8327             +nan
         2     -318767.2881      +22067.5446
         3     -312092.3800       +6674.9081
         4     -310707.7561       +1384.6239
         5     -310304.7026        +403.0534
         6     -310121.1784        +183.5242
         7     -309988.7226        +132.4558
         8     -309918.3428         +70.3798
         9     -309874.5546         +43.7881
        10     -309838.6723         +35.8824
        11     -309784.7330         +53.9393
        12     -309709.1466         +75.5863
        13     -309619.3101         +89.8366
        14     -309456.2406        +163.0695
        15     -309325.9262        +130.3144
        16     -309256.0845         +69.8416
        17     -309215.9093         +40.1753
        18     -309174.5024         +41.4069
        19     -309157.2758         +17.2266
        20     -309142.8918         +14.3840
        21     -309126.9358         +15.9560
        22     -309112.2342         +14.7015
        23

training class song
(4580, 36) [25, 32, 47, 31, 24, 35, 54, 27, 35, 51, 34, 24, 25, 93, 188, 158, 17, 187, 46, 26, 31, 35, 30, 32, 28, 25, 31, 45, 30, 26, 43, 45, 168, 206, 33, 31, 35, 41, 43, 33, 46, 32, 82, 30, 47, 32, 47, 47, 42, 47, 41, 42, 33, 47, 36, 31, 122, 24, 36, 162, 30, 33, 34, 32, 27, 76, 44, 45, 30, 35, 31, 152, 43, 31, 163, 54, 30, 45, 26, 25, 44, 24, 17, 42, 26, 33, 42, 32, 31, 33, 43, 51] 92


         1     -452980.9075             +nan
         2     -414897.8669      +38083.0407
         3     -407005.5797       +7892.2872
         4     -405287.8519       +1717.7277
         5     -404517.5334        +770.3186
         6     -404148.5407        +368.9927
         7     -403983.7523        +164.7884
         8     -403808.2416        +175.5106
         9     -403692.0035        +116.2382
        10     -403633.2687         +58.7347
        11     -403593.2964         +39.9724
        12     -403553.9078         +39.3886
        13     -403516.4992         +37.4086
        14     -403487.3322         +29.1669
        15     -403465.7157         +21.6165
        16     -403448.8027         +16.9131
        17     -403436.2811         +12.5215
        18     -403412.9258         +23.3554
        19     -403367.0454         +45.8804
        20     -403320.8928         +46.1526
        21     -403290.2918         +30.6010
        22     -403271.2889         +19.0030
        23

training class truoc
(2227, 36) [27, 20, 41, 31, 24, 26, 41, 21, 31, 26, 18, 23, 32, 23, 26, 21, 21, 37, 36, 26, 31, 15, 17, 19, 23, 41, 25, 31, 31, 26, 31, 26, 19, 35, 21, 22, 24, 20, 11, 31, 31, 22, 29, 33, 22, 29, 22, 23, 24, 31, 21, 31, 26, 25, 20, 27, 19, 28, 27, 36, 25, 35, 23, 31, 21, 41, 41, 36, 27, 61, 43, 43, 35, 36, 20, 19, 19, 46, 31, 18] 80


         1     -232554.4634             +nan
         2     -218867.2962      +13687.1672
         3     -216799.9090       +2067.3872
         4     -216277.6967        +522.2122
         5     -215951.9161        +325.7807
         6     -215832.9915        +118.9246
         7     -215775.2629         +57.7286
         8     -215722.5153         +52.7476
         9     -215685.7548         +36.7605
        10     -215663.6519         +22.1029
        11     -215647.2387         +16.4132
        12     -215635.2951         +11.9435
        13     -215626.5996          +8.6955
        14     -215618.5580          +8.0416
        15     -215603.3089         +15.2491
        16     -215584.5649         +18.7440
        17     -215575.9814          +8.5834
        18     -215571.8096          +4.1718
        19     -215567.9441          +3.8655
        20     -215562.5760          +5.3681
        21     -215556.7056          +5.8704
        22     -215552.3035          +4.4021
        23

training class nhan_vien
(3622, 36) [50, 53, 65, 29, 33, 53, 50, 55, 47, 31, 44, 42, 36, 42, 46, 32, 40, 37, 55, 42, 39, 36, 29, 55, 43, 68, 33, 55, 55, 43, 53, 48, 55, 48, 53, 44, 34, 62, 40, 49, 35, 50, 41, 38, 44, 42, 53, 55, 30, 50, 78, 35, 52, 56, 40, 44, 50, 36, 34, 47, 63, 35, 65, 29, 36, 37, 34, 35, 63, 44, 45, 38, 40, 43, 47, 38, 39, 44, 38, 43, 32] 81


         1     -357113.2845             +nan
         2     -344355.7306      +12757.5539
         3     -340497.7097       +3858.0209
         4     -339283.8129       +1213.8968
         5     -338770.2195        +513.5934
         6     -338561.6499        +208.5697
         7     -338403.2590        +158.3908
         8     -338274.0271        +129.2319
         9     -338186.9896         +87.0375
        10     -338130.0938         +56.8958
        11     -338080.8417         +49.2521
        12     -338037.5301         +43.3116
        13     -338007.1065         +30.4236
        14     -337982.2634         +24.8431
        15     -337954.4470         +27.8164
        16     -337917.6689         +36.7782
        17     -337862.8539         +54.8150
        18     -337833.3520         +29.5019
        19     -337814.8777         +18.4743
        20     -337797.6512         +17.2265
        21     -337784.2154         +13.4358
        22     -337768.7645         +15.4508
        23

training class gia_dinh
(3826, 36) [51, 46, 55, 43, 35, 66, 41, 76, 47, 40, 46, 71, 45, 51, 40, 51, 41, 41, 53, 47, 61, 37, 51, 51, 51, 47, 36, 44, 43, 46, 46, 43, 39, 49, 44, 46, 51, 53, 37, 49, 33, 51, 51, 37, 46, 41, 66, 39, 37, 49, 45, 47, 31, 41, 41, 46, 45, 71, 43, 66, 51, 56, 49, 51, 71, 33, 55, 35, 49, 41, 41, 51, 51, 61, 45, 43, 61, 41, 66, 39] 80


         1     -404592.9311             +nan
         2     -382758.9895      +21833.9416
         3     -378482.3495       +4276.6400
         4     -377143.1165       +1339.2330
         5     -376416.8424        +726.2741
         6     -376066.7585        +350.0838
         7     -375864.1540        +202.6045
         8     -375765.6344         +98.5197
         9     -375738.9436         +26.6907
        10     -375730.3438          +8.5998
        11     -375724.3679          +5.9759
        12     -375721.9445          +2.4234
        13     -375719.5501          +2.3944
        14     -375716.4462          +3.1039
        15     -375714.1454          +2.3008
        16     -375711.3659          +2.7796
        17     -375706.5048          +4.8611
        18     -375700.6679          +5.8368
        19     -375695.2040          +5.4640
        20     -375688.1472          +7.0568
        21     -375679.6629          +8.4843
        22     -375677.7284          +1.9345
        23

Training done


        27     -375677.2691          +0.0092


In [18]:
print("Testing")
acc = {}
test_name = { "test_toi", "test_song", "test_truoc", "test_nhan_vien", "test_gia_dinh"}
for true_cname in test_name:
    kt = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        pre = max(inverse)[1]
#         print(true_cname, score, pre)
        if pre == true_cname[5:]:
            kt +=1
    print(true_cname," ", kt)
    acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)                                    

Testing
test_toi   20
test_gia_dinh   20
test_nhan_vien   21
test_song   20
test_truoc   20
{'test_toi': 100.0, 'test_gia_dinh': 100.0, 'test_nhan_vien': 100.0, 'test_song': 100.0, 'test_truoc': 100.0}


In [19]:
models

{'toi': GMMHMM(algorithm='viterbi', covariance_type='diag',
     covars_prior=array([[[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]],
 
        [[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]],
 
        ...,
 
        [[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]],
 
        [[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]]]),
     covars_weight=array([[[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        ...,
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]]]),
     init_params='mst',
     means_prior=array([[[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        ...,
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., .

In [20]:
print("Testing")
miss = {}
acc = {}
class_names = ["toi", "song", "truoc", "nhan_vien", "gia_dinh"]
for true_cname in class_names:
    kt = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        pre = max(inverse)[1]
#         print(true_cname, score, pre)
        if pre == true_cname:
            kt +=1
    print(true_cname," ", kt)
    acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)      

Testing
toi   89
song   92
truoc   80
nhan_vien   81
gia_dinh   79
{'toi': 100.0, 'song': 100.0, 'truoc': 100.0, 'nhan_vien': 100.0, 'gia_dinh': 98.75}


# Demo


In [23]:
O = get_mfcc('data.wav')
score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
inverse = [(value, key) for key, value in score.items()]
predict = max(inverse)[1]
predict

'toi'

In [23]:
class_names = ["test_toi"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)

print("Testing")
acc = {}
test_name = { "test_toi"}
for true_cname in test_name:
    kt = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        pre = max(inverse)[1]
        print(true_cname, score, pre)
        if pre == true_cname[5:]:
            kt +=1
    print(true_cname," ", kt)
    acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)                                    

Load test_toi dataset
vectors (895, 36)
Testing
test_toi {'toi': -7620.212225111643, 'song': -8230.521415545485, 'truoc': -8180.114885381267, 'nhan_vien': -8055.198681860034, 'gia_dinh': -8394.809761647619} toi
test_toi {'toi': -2149.219995999329, 'song': -2412.9368211004685, 'truoc': -2349.830841056273, 'nhan_vien': -2303.6970214219964, 'gia_dinh': -2363.120670330845} toi
test_toi {'toi': -4754.502610596342, 'song': -5116.315693293601, 'truoc': -5270.077827972711, 'nhan_vien': -5237.692756976712, 'gia_dinh': -4976.711196599017} toi
test_toi {'toi': -1351.7292054794661, 'song': -1544.0518867550961, 'truoc': -1363.8936940397136, 'nhan_vien': -1426.545629349316, 'gia_dinh': -1475.5824434061235} toi
test_toi {'toi': -2143.280006319747, 'song': -2328.4712382758908, 'truoc': -2236.9943185034103, 'nhan_vien': -2233.811970473114, 'gia_dinh': -2324.3715712918483} toi
test_toi {'toi': -8820.564295791386, 'song': -9605.533709422523, 'truoc': -9826.687406909248, 'nhan_vien': -9616.831028032006, '

# Save model

In [None]:
# with open('output1.txt', 'w') as f:
#     print(models, file=f)

In [24]:
# import pickle 
# with open("output2.pkl", "wb") as file:
#     pickle.dump(models, file)

In [1]:
import pickle
with open("output_final.pkl", "rb") as file:
    models = pickle.load(file)

  from collections import Sequence
  from collections import Iterable
  from collections import Mapping, namedtuple, defaultdict, Sequence


In [2]:
models

{'toi': GMMHMM(algorithm='viterbi', covariance_type='diag',
     covars_prior=array([[[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]],
 
        [[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]],
 
        ...,
 
        [[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]],
 
        [[-1.5, -1.5, ..., -1.5, -1.5],
         [-1.5, -1.5, ..., -1.5, -1.5]]]),
     covars_weight=array([[[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        ...,
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]]]),
     init_params='mst',
     means_prior=array([[[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        ...,
 
        [[0., 0., ..., 0., 0.],
         [0., 0., ..., 0., 0.]],
 
        [[0., 0., .