In [7]:
import os
class_names = ['cothe', 'khong', 'nguoi', 'toi', 'nhung']
states = [12, 9, 9, 9, 6]

length = 0
for d in class_names:
    length += len(os.listdir("data/" + d))
print(length)

!pip install librosa
!pip install hmmlearn

607
Collecting hmmlearn
  Downloading hmmlearn-0.2.5-cp39-cp39-win_amd64.whl (112 kB)
Installing collected packages: hmmlearn
Successfully installed hmmlearn-0.2.5


In [8]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix


In [9]:
all_data = {}
all_labels = {}
for cname in class_names:
    file_paths = [os.path.join("data", cname, i) for i in os.listdir(os.path.join('data', cname)) if i.endswith('.wav')]
    data = [get_mfcc(file_path) for file_path in file_paths]
    all_data[cname] = data
    all_labels[cname] = [class_names.index(cname) for i in range(len(file_paths))]

In [10]:
from sklearn.model_selection import train_test_split

X = {'train': {}, 'test': {}}
y = {'train': {}, 'test': {}}
for cname in class_names:
    x_train, x_test, _, y_test = train_test_split(
        all_data[cname], all_labels[cname], 
        test_size = 0.33, 
        random_state=42
    )
    X['train'][cname] = x_train
    X['test'][cname] = x_test
    y['test'][cname] = y_test

In [11]:
for cname in class_names:
    print(cname,len(X['train'][cname]), len(X['test'][cname]), len(y['test'][cname]))

cothe 97 48 48
khong 66 33 33
nguoi 94 47 47
toi 79 40 40
nhung 66 33 33


In [12]:
import hmmlearn.hmm as hmm

model = {}
for idx, cname in enumerate(class_names):
    start_prob = np.full(states[idx], 0.0)
    start_prob[0] = 1.0
    trans_matrix = np.full((states[idx], states[idx]), 0.0)
    p = 0.5
    np.fill_diagonal(trans_matrix, p)
    np.fill_diagonal(trans_matrix[0:, 1:], 1 - p)
    trans_matrix[-1, -1] = 1.0
    
    #trans matrix
    print(cname)
    print(trans_matrix) 

    model[cname] = hmm.GaussianHMM(
        n_components=states[idx], 
        verbose=True, 
        n_iter=300, 
        startprob_prior=start_prob, 
        transmat_prior=trans_matrix,
        params='stmc',
        init_params='mc',
        random_state=42
    )

    model[cname].fit(X=np.vstack(X['train'][cname]), lengths=[x.shape[0] for x in X['train'][cname]])

cothe
[[0.5 0.5 0.  0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.5 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.5 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  1. ]]


         1     -365604.4108             +nan
         2     -351596.5130      +14007.8978
         3     -349606.7959       +1989.7171
         4     -348815.7811        +791.0147
         5     -348360.1695        +455.6116
         6     -348132.8258        +227.3437
         7     -348004.3328        +128.4930
         8     -347900.1501        +104.1827
         9     -347806.4891         +93.6610
        10     -347733.0375         +73.4516
        11     -347623.8585        +109.1790
        12     -347490.6612        +133.1973
        13     -347381.6419        +109.0193
        14     -347299.5207         +82.1212
        15     -347238.8864         +60.6343
        16     -347175.0869         +63.7995
        17     -347114.5099         +60.5770
        18     -347023.9300         +90.5799
        19     -346940.1195         +83.8105
        20     -346851.9886         +88.1309
        21     -346780.7575         +71.2311
        22     -346704.2874         +76.4702
        23

khong
[[0.5 0.5 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  0.  0.  0.  1. ]]


         1     -413182.0120             +nan
         2     -317089.3375      +96092.6745
         3     -206737.9790     +110351.3586
         4      -34273.2304     +172464.7486
         5       15949.1173      +50222.3476
         6       25186.4777       +9237.3604
         7       25780.5621        +594.0844
         8       25916.6530        +136.0909
         9       26068.1311        +151.4781
        10       26256.6211        +188.4900
        11       26414.1672        +157.5461
        12       26663.6151        +249.4478
        13       26965.1199        +301.5048
        14       26937.6369         -27.4830


nguoi
[[0.5 0.5 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  0.  0.  0.  1. ]]


         1     -217813.1281             +nan
         2     -209157.3115       +8655.8167
         3     -208147.9771       +1009.3344
         4     -207806.5226        +341.4545
         5     -207424.0520        +382.4706
         6     -207303.1304        +120.9216
         7     -207200.8859        +102.2445
         8     -207142.2976         +58.5882
         9     -207096.5280         +45.7696
        10     -207041.4015         +55.1265
        11     -206978.3098         +63.0917
        12     -206894.1637         +84.1461
        13     -206840.6807         +53.4830
        14     -206822.4569         +18.2238
        15     -206808.8581         +13.5989
        16     -206803.2156          +5.6424
        17     -206800.1107          +3.1049
        18     -206798.5419          +1.5689
        19     -206797.6847          +0.8571
        20     -206797.4155          +0.2693
        21     -206778.8232         +18.5922
        22     -206776.1488          +2.6744
        23

toi
[[0.5 0.5 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  0.  0.  0.  1. ]]


         1     -188477.8742             +nan
         2     -179580.5109       +8897.3633
         3     -177568.6988       +2011.8121
         4     -176840.8946        +727.8042
         5     -176475.8833        +365.0113
         6     -176201.0168        +274.8665
         7     -176014.1032        +186.9136
         8     -175933.5735         +80.5297
         9     -175918.2697         +15.3038
        10     -175913.1743          +5.0954
        11     -175927.8871         -14.7128


nhung
[[0.5 0.5 0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  1. ]]


         1     -139957.5605             +nan
         2     -134871.9913       +5085.5692
         3     -133760.6617       +1111.3297
         4     -133557.4086        +203.2531
         5     -133486.4107         +70.9979
         6     -133408.5543         +77.8564
         7     -133244.3396        +164.2147
         8     -133023.7228        +220.6168
         9     -132829.5319        +194.1909
        10     -132734.4166         +95.1153
        11     -132709.8467         +24.5699
        12     -132699.5406         +10.3061
        13     -132692.6588          +6.8818
        14     -132685.0395          +7.6193
        15     -132674.4174         +10.6221
        16     -132664.9179          +9.4995
        17     -132652.8582         +12.0597
        18     -132611.8373         +41.0209
        19     -132514.8569         +96.9804
        20     -132419.3889         +95.4680
        21     -132319.7026         +99.6863
        22     -132299.3319         +20.3707
        23

In [13]:
import pickle

# save model
for cname in class_names:
    name = f'models_train\model_{cname}.pkl'
    with open(name, 'wb') as file: 
        pickle.dump(model[cname], file)

In [14]:
import pickle, os
import numpy as np

from sklearn.metrics import classification_report

In [15]:
y_true = []
y_pred = []
for cname in class_names:
    for mfcc, target in zip(X['test'][cname], y['test'][cname]):
        scores = [model[cname].score(mfcc) for cname in class_names]
        pred = np.argmax(scores)
        y_pred.append(pred)
        y_true.append(target)
    print((np.array(y_true) == np.array(y_pred)).sum()/len(y_true))
print(y_true)
print(y_pred)


0.9791666666666666
0.9876543209876543
0.96875
0.9523809523809523
0.9552238805970149
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

In [16]:
report = classification_report(y_true, y_pred, target_names=class_names)
print(report)

              precision    recall  f1-score   support

       cothe       0.96      0.98      0.97        48
       khong       0.97      1.00      0.99        33
       nguoi       0.98      0.94      0.96        47
         toi       0.97      0.90      0.94        40
       nhung       0.89      0.97      0.93        33

    accuracy                           0.96       201
   macro avg       0.95      0.96      0.95       201
weighted avg       0.96      0.96      0.96       201



In [17]:
#loadmodels
import pickle

model_train = {}
for key in class_names:
    name = f"models\model_{key}.pkl"
    with open(name, 'rb') as file:
        model_train[key] = pickle.load(file)

In [18]:
print(model_train)

{'cothe': GaussianHMM(init_params='mc', n_components=12, n_iter=300, random_state=42,
            startprob_prior=array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
            transmat_prior=array([[0.5, 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0.5, 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0.5, 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. , 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 0.5, 0. ],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. , 0.5, 0.5],
       [0. , 0. , 0. , 0. , 0. , 0. , 0. , 