In [255]:
import os
class_names = ['A', 'B', 'ban', 'len', 'nhay', 'phai', 'trai', 'xuong', 'sil']
states = [3, 3, 6, 6, 6, 6, 6, 9, 6]

from google.colab import drive
drive.mount('/content/drive')

length = 0
for d in class_names:
    length += len(os.listdir("/content/drive/MyDrive/Colab Notebooks/data/" + d))
print(length)

70


In [256]:
# install hmmlearn
!pip install hmmlearn



In [257]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [258]:
all_data = {}
all_labels = {}
for cname in class_names:
    file_paths = [os.path.join("/content/drive/MyDrive/Colab Notebooks/data/", cname, i) for i in os.listdir(os.path.join('/content/drive/MyDrive/Colab Notebooks/data/', cname)) if i.endswith('.wav')]
    data = [get_mfcc(file_path) for file_path in file_paths]
    all_data[cname] = data
    all_labels[cname] = [class_names.index(cname) for i in range(len(file_paths))]

In [259]:
from sklearn.model_selection import train_test_split

X = {'train': {}, 'test': {}}
y = {'train': {}, 'test': {}}
for cname in class_names:
    x_train, x_test, _, y_test = train_test_split(
        all_data[cname], all_labels[cname], 
        test_size = 0.33, 
        random_state=42
    )
    X['train'][cname] = x_train
    X['test'][cname] = x_test
    y['test'][cname] = y_test

In [260]:
for cname in class_names:
    print(cname, len(X['train'][cname]), len(X['test'][cname]), len(y['test'][cname]))

A 6 4 4
B 2 2 2
ban 4 2 2
len 5 3 3
nhay 4 2 2
phai 6 4 4
trai 5 3 3
xuong 5 3 3
sil 6 4 4


In [261]:
import hmmlearn.hmm as hmm

model = {}
for idx, cname in enumerate(class_names):
    start_prob = np.full(states[idx], 0.0)
    start_prob[0] = 1.0
    trans_matrix = np.full((states[idx], states[idx]), 0.0)
    p = 0.5
    np.fill_diagonal(trans_matrix, p)
    np.fill_diagonal(trans_matrix[0:, 1:], 1 - p)
    trans_matrix[-1, -1] = 1.0
    
    #trans matrix
    print(cname)
    print(trans_matrix) 

    model[cname] = hmm.GaussianHMM(
        n_components=states[idx], 
        verbose=True, 
        n_iter=300, 
        startprob_prior=start_prob, 
        transmat_prior=trans_matrix,
        params='stmc',
        init_params='mc',
        random_state=42
    )

    model[cname].fit(X=np.vstack(X['train'][cname]), lengths=[x.shape[0] for x in X['train'][cname]])

A
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]


         1      -23455.4786             +nan
         2      -22266.7440       +1188.7346
         3      -22106.3447        +160.3993
         4      -22014.9525         +91.3922
         5      -21953.2806         +61.6718
         6      -21873.6699         +79.6107
         7      -21858.1170         +15.5529
         8      -21852.2345          +5.8824
         9      -21840.6397         +11.5948
        10      -21836.8126          +3.8271
        11      -21808.5306         +28.2820
        12      -21790.9270         +17.6036
        13      -21775.2814         +15.6456
        14      -21753.5140         +21.7674
        15      -21753.3454          +0.1686
        16      -21753.2652          +0.0802
        17      -21753.2228          +0.0424
        18      -21753.2053          +0.0175
        19      -21753.1996          +0.0057
         1       -6559.2925             +nan
         2       -6072.2340        +487.0585
         3       -5963.3288        +108.9052
         4

B
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
ban
[[0.5 0.5 0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  1. ]]
len
[[0.5 0.5 0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  1. ]]


         1      -16995.5439             +nan
         2      -15771.6017       +1223.9422
         3      -15700.5102         +71.0915
         4      -15655.3404         +45.1698
         5      -15614.8517         +40.4886
         6      -15596.8598         +17.9920
         7      -15580.4780         +16.3818
         8      -15577.5964          +2.8816
         9      -15576.8794          +0.7170
        10      -15576.6359          +0.2435
        11      -15576.5818          +0.0542
        12      -15576.5746          +0.0072
         1      -20608.6840             +nan
         2      -19058.3285       +1550.3555
         3      -19284.2590        -225.9305
         1      -16720.2237             +nan
         2      -15521.3525       +1198.8712
         3      -15321.0157        +200.3368
         4      -15463.9663        -142.9506


nhay
[[0.5 0.5 0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  1. ]]
phai
[[0.5 0.5 0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  1. ]]


         1      -27652.0185             +nan
         2      -25775.6762       +1876.3423
         3      -25735.3368         +40.3394
         4      -25712.0957         +23.2411
         5      -25705.9522          +6.1435
         6      -25700.6238          +5.3284
         7      -25698.8341          +1.7898
         8      -25697.5761          +1.2580
         9      -25697.3419          +0.2342
        10      -25696.5215          +0.8204
        11      -25693.2611          +3.2605
        12      -25687.4783          +5.7828
        13      -25685.8344          +1.6438
        14      -25684.1310          +1.7034
        15      -25682.6129          +1.5181
        16      -25681.4833          +1.1296
        17      -25680.9661          +0.5173
        18      -25680.6367          +0.3293
        19      -25680.3295          +0.3072
        20      -25680.0381          +0.2914
        21      -25679.8521          +0.1860
        22      -25679.7585          +0.0936
        23

trai
[[0.5 0.5 0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  1. ]]
xuong
[[0.5 0.5 0.  0.  0.  0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0.  0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0.  0.  0.  0. ]
 [0.  0.  0.  0.5 0.5 0.  0.  0.  0. ]
 [0.  0.  0.  0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.  0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  0.  0.  0.  1. ]]


         1      -21427.7108             +nan
         2      -19659.6041       +1768.1068
         3      -19360.8479        +298.7561
         4      -19346.5718         +14.2761
         5      -19341.0131          +5.5587
         6      -19329.7732         +11.2399
         7      -19316.2188         +13.5544
         8      -19289.7175         +26.5013
         9      -19284.5959          +5.1216
        10      -19284.5858          +0.0100
        11      -19284.5847          +0.0011
         1      -24457.7535             +nan
         2      -22058.5928       +2399.1607
         3      -21942.1900        +116.4028
         4      -21927.5440         +14.6460
         5      -21913.6246         +13.9194
         6      -21900.6345         +12.9901
         7      -21881.5357         +19.0988
         8      -21863.4201         +18.1156
         9      -21842.4875         +20.9326
        10      -21824.3259         +18.1617
        11      -21822.3968          +1.9290
        12

sil
[[0.5 0.5 0.  0.  0.  0. ]
 [0.  0.5 0.5 0.  0.  0. ]
 [0.  0.  0.5 0.5 0.  0. ]
 [0.  0.  0.  0.5 0.5 0. ]
 [0.  0.  0.  0.  0.5 0.5]
 [0.  0.  0.  0.  0.  1. ]]


         1      -23841.0590             +nan
         2      -23319.9954        +521.0636
         3      -23192.5446        +127.4509
         4      -23140.8829         +51.6617
         5      -23104.6846         +36.1983
         6      -23089.7417         +14.9429
         7      -23086.4060          +3.3357
         8      -23086.5312          -0.1253


In [262]:
import pickle

# save model
for cname in class_names:
    name = f'models_train\model_{cname}.pkl'
    with open(name, 'wb') as file: 
        pickle.dump(model[cname], file)

In [263]:
import pickle, os
import numpy as np

from sklearn.metrics import classification_report

In [264]:
y_true = []
y_pred = []
for cname in class_names:
    for mfcc, target in zip(X['test'][cname], y['test'][cname]):
        scores = [model[cname].score(mfcc) for cname in class_names]
        pred = np.argmax(scores)
        y_pred.append(pred)
        y_true.append(target)
    print((np.array(y_true) == np.array(y_pred)).sum()/len(y_true))
print(y_true)
print(y_pred)

1.0
0.6666666666666666
0.75
0.8181818181818182
0.8461538461538461
0.8823529411764706
0.9
0.9130434782608695
0.9259259259259259
[0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8]
[0, 0, 0, 0, 3, 4, 2, 2, 3, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8]


In [265]:
report = classification_report(y_true, y_pred, target_names=class_names)
print(report)

              precision    recall  f1-score   support

           A       1.00      1.00      1.00         4
           B       0.00      0.00      0.00         2
         ban       1.00      1.00      1.00         2
         len       0.75      1.00      0.86         3
        nhay       0.67      1.00      0.80         2
        phai       1.00      1.00      1.00         4
        trai       1.00      1.00      1.00         3
       xuong       1.00      1.00      1.00         3
         sil       1.00      1.00      1.00         4

    accuracy                           0.93        27
   macro avg       0.82      0.89      0.85        27
weighted avg       0.87      0.93      0.90        27



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [268]:
#loadmodels
import pickle

model_train = {}
for key in class_names:
    name = f"/content/models_train\model_{key}.pkl"
    with open(name, 'rb') as file:
        model_train[key] = pickle.load(file)

In [267]:
print(model_train)

{'A': GaussianHMM(init_params='mc', n_components=3, n_iter=300, random_state=42,
            startprob_prior=array([1., 0., 0.]),
            transmat_prior=array([[0.5, 0.5, 0. ],
       [0. , 0.5, 0.5],
       [0. , 0. , 1. ]]),
            verbose=True), 'B': GaussianHMM(init_params='mc', n_components=3, n_iter=300, random_state=42,
            startprob_prior=array([1., 0., 0.]),
            transmat_prior=array([[0.5, 0.5, 0. ],
       [0. , 0.5, 0.5],
       [0. , 0. , 1. ]]),
            verbose=True), 'ban': GaussianHMM(init_params='mc', n_components=6, n_iter=300, random_state=42,
            startprob_prior=array([1., 0., 0., 0., 0., 0.]),
            transmat_prior=array([[0.5, 0.5, 0. , 0. , 0. , 0. ],
       [0. , 0.5, 0.5, 0. , 0. , 0. ],
       [0. , 0. , 0.5, 0.5, 0. , 0. ],
       [0. , 0. , 0. , 0.5, 0.5, 0. ],
       [0. , 0. , 0. , 0. , 0.5, 0.5],
       [0. , 0. , 0. , 0. , 0. , 1. ]]),
            verbose=True), 'len': GaussianHMM(init_params='mc', n_components=6,