In [2]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

## Get MFCC function

In [3]:
# read file
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

## lấy mfcc của tất cả các file trong dir

In [4]:
# lấy mfcc của tất cả các file wav trong wav
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

## Hàm Clustering

In [5]:
def clustering(X, n_clusters=14):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

## Data

In [7]:
class_names = ['cothe', 'da', 'duoc', 'nguoi','trong']
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join('data',cname))

Load cothe dataset
Load da dataset
Load duoc dataset
Load nguoi dataset
Load trong dataset


In [8]:
print(len(dataset['cothe']))
print(len(dataset['da']))
print(len(dataset['duoc']))
print(len(dataset['nguoi']))
print(len(dataset['trong']))

124
99
100
100
100


## split train test

In [9]:
trainset = {}
testset = {}
n_test = {'cothe': 0, 'da': 0, 'duoc': 0, 'nguoi': 0,'trong': 0}
for cname in class_names:
    n = len(dataset[cname])
    n_train = math.floor(n*0.9)
    trainset[cname] = dataset[cname][:n_train]
    testset[cname] = dataset[cname][n_train:]
    n_test[cname] += len(testset[cname])
    
print(len(trainset['cothe']))
print(len(trainset['da']))
print(len(trainset['duoc']))
print(len(trainset['nguoi']))
print(len(trainset['trong']))

111
89
90
90
90


## Fit kmeans trên tập train

In [11]:
# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in trainset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)
print(kmeans)

vectors (14283, 36)
centers (14, 36)
centers (14, 36)
KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
       n_clusters=14, n_init=50, n_jobs=None, precompute_distances='auto',
       random_state=0, tol=0.0001, verbose=0)


In [12]:
for cname in class_names:
    trainset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in trainset[cname]])
    testset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in testset[cname]])

    

# Train

In [13]:
models = {}

## Models cho 'có thể' 5x3

In [14]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=4*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])


Xbn = np.concatenate(trainset['cothe'])
lengths = list([len(x) for x in trainset['cothe']])
print("training class", 'cothe')
print(Xbn.shape, lengths, len(lengths))
hmm.fit(Xbn, lengths=lengths)
models['cothe'] = hmm
print("Training done")

training class cothe
(6331, 1) [34, 31, 56, 39, 44, 40, 46, 35, 45, 47, 37, 56, 36, 31, 39, 35, 42, 30, 30, 37, 36, 39, 26, 34, 47, 36, 41, 34, 26, 30, 25, 27, 16, 39, 27, 52, 25, 27, 20, 22, 36, 53, 44, 44, 28, 57, 53, 36, 37, 30, 34, 38, 51, 43, 40, 47, 38, 44, 102, 44, 52, 56, 35, 37, 39, 25, 41, 52, 43, 33, 49, 59, 35, 42, 40, 37, 40, 35, 36, 28, 45, 44, 23, 30, 48, 35, 32, 35, 35, 26, 36, 43, 45, 62, 165, 163, 179, 165, 158, 179, 144, 154, 142, 141, 153, 162, 146, 163, 148, 144, 154] 111
         1      -16057.5100             +nan
         2      -11647.4536       +4410.0565
         3      -10307.0475       +1340.4061
         4       -9737.5345        +569.5130
         5       -9547.1800        +190.3544
         6       -9444.6261        +102.5539
         7       -9411.3628         +33.2633
         8       -9394.9771         +16.3858
         9       -9381.3271         +13.6500
        10       -9365.3570         +15.9701
        11       -9346.4665         +18.8905
       

## Models cho 'đã' 2x3

In [15]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=2*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0 ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.3],
    [0.0, 0.0, 0.0, 0.0, 0.0, 1.0],])

Xct = np.concatenate(trainset['da'])
lengths = list([len(x) for x in trainset['da']])
print("training class", 'da')
print(Xct.shape, lengths, len(lengths))
hmm.fit(Xct, lengths=lengths)
models['da'] = hmm
print("Training done")

training class da
(1684, 1) [15, 14, 16, 14, 16, 26, 12, 13, 17, 13, 23, 13, 16, 14, 12, 19, 20, 24, 20, 20, 24, 20, 18, 26, 26, 22, 21, 21, 15, 20, 35, 25, 16, 18, 16, 26, 20, 22, 22, 30, 20, 21, 25, 24, 20, 21, 22, 22, 19, 21, 15, 20, 14, 16, 15, 21, 15, 16, 19, 12, 18, 18, 15, 24, 24, 22, 17, 15, 18, 19, 18, 15, 15, 18, 15, 11, 18, 27, 13, 13, 14, 14, 19, 22, 18, 21, 36, 15, 14] 89
         1       -4381.2288             +nan
         2       -3554.7080        +826.5208
         3       -3322.0183        +232.6897
         4       -3156.3250        +165.6932
         5       -3050.6600        +105.6651
         6       -2990.4260         +60.2340
         7       -2952.1655         +38.2605
         8       -2927.0470         +25.1184
         9       -2910.7456         +16.3014
        10       -2898.2168         +12.5288
        11       -2889.0407          +9.1761
        12       -2882.7763          +6.2644
        13       -2876.3763          +6.4000
        14       -2865.0941

## Models cho 'được' 4x3

In [16]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=4*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xcth = np.concatenate(trainset['duoc'])
lengths = list([len(x) for x in trainset['duoc']])
print("training class", 'duoc')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['duoc'] = hmm
print("Training done")

training class duoc
(2034, 1) [22, 21, 27, 23, 30, 15, 18, 21, 30, 19, 20, 21, 30, 22, 14, 17, 23, 23, 19, 21, 18, 18, 20, 38, 21, 22, 35, 29, 18, 19, 22, 42, 22, 20, 24, 22, 21, 22, 22, 44, 29, 18, 14, 20, 24, 22, 19, 41, 24, 20, 18, 16, 20, 18, 21, 22, 30, 17, 21, 19, 16, 19, 18, 19, 16, 20, 14, 17, 16, 32, 26, 28, 24, 25, 25, 29, 21, 15, 17, 31, 22, 26, 27, 31, 24, 19, 24, 31, 18, 16] 90
         1       -5260.9008             +nan
         2       -3794.3014       +1466.5994
         3       -3415.6769        +378.6245
         4       -3189.2896        +226.3872
         5       -3046.4704        +142.8193
         6       -2967.0005         +79.4699
         7       -2925.3974         +41.6031
         8       -2899.7747         +25.6227
         9       -2882.1036         +17.6710
        10       -2869.3647         +12.7389
        11       -2858.9502         +10.4145
        12       -2849.1079          +9.8422
        13       -2841.0696          +8.0383
        14       -283

## Models cho 'người' 4x3

In [17]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=5*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ], ])

Xcth = np.concatenate(trainset['nguoi'])
lengths = list([len(x) for x in trainset['nguoi']])
print("training class", 'nguoi')
print(Xcth.shape, lengths, len(lengths))
hmm.fit(Xcth, lengths=lengths)
models['nguoi'] = hmm
print("Training done")

training class nguoi
(2118, 1) [20, 18, 23, 25, 25, 38, 20, 25, 27, 22, 23, 23, 20, 26, 23, 19, 28, 24, 25, 19, 22, 28, 19, 24, 28, 20, 20, 32, 20, 18, 20, 31, 22, 19, 24, 14, 22, 15, 13, 22, 19, 25, 18, 27, 20, 20, 24, 17, 22, 19, 22, 26, 20, 23, 35, 28, 22, 30, 25, 27, 26, 23, 31, 30, 21, 19, 18, 20, 22, 15, 26, 19, 22, 51, 37, 42, 29, 29, 19, 18, 21, 27, 23, 23, 17, 17, 23, 26, 31, 18] 90
         1       -5527.4822             +nan
         2       -3591.6507       +1935.8316
         3       -3103.4051        +488.2456
         4       -2844.3123        +259.0928
         5       -2739.3397        +104.9726
         6       -2695.5682         +43.7715
         7       -2673.8337         +21.7345
         8       -2660.9006         +12.9331
         9       -2651.7837          +9.1168
        10       -2644.2367          +7.5471
        11       -2636.2948          +7.9419
        12       -2625.8593         +10.4355
        13       -2611.8909         +13.9684
        14       -25

## Models cho 'trong' 6x3

In [28]:
hmm = hmmlearn.hmm.MultinomialHMM(n_components=3*3, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
hmm.transmat_ =np.array([
    [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])

Xvn = np.concatenate(trainset['trong'])
lengths = list([len(x) for x in trainset['trong']])
print("training class", 'trong')
print(Xvn.shape, lengths, len(lengths))
hmm.fit(Xvn, lengths=lengths)
models['trong'] = hmm
print("Training done")

training class trong
(2116, 1) [30, 26, 23, 29, 28, 31, 22, 16, 21, 22, 19, 20, 33, 28, 25, 24, 25, 23, 26, 19, 21, 38, 29, 34, 18, 22, 19, 23, 17, 19, 20, 23, 24, 21, 33, 20, 20, 25, 21, 21, 25, 25, 24, 19, 28, 30, 20, 22, 21, 19, 16, 22, 26, 17, 25, 18, 31, 20, 21, 28, 20, 20, 28, 21, 24, 21, 23, 29, 22, 22, 23, 39, 21, 28, 20, 21, 22, 24, 21, 23, 22, 22, 22, 20, 15, 23, 25, 29, 22, 29] 90
         1       -5378.6266             +nan
         2       -3130.3641       +2248.2625
         3       -2628.7277        +501.6364
         4       -2444.1444        +184.5833
         5       -2373.7390         +70.4054
         6       -2333.1698         +40.5692
         7       -2291.8468         +41.3230
         8       -2264.2696         +27.5772
         9       -2246.3244         +17.9452
        10       -2234.6712         +11.6532
        11       -2228.2456          +6.4256
        12       -2225.0402          +3.2054
        13       -2223.3473          +1.6929
        14       -22

# Test

In [25]:
print("Testing")
n_correct = {'cothe': 0, 'da': 0, 'duoc': 0, 'nguoi': 0,'trong': 0}
for true_cname in class_names:
    for O in testset[true_cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items()}
        if (true_cname == max(score, key=score.get)): n_correct[true_cname] += 1
        print(true_cname, score, 'predict:', max(score, key=score.get))

Testing
cothe {'cothe': -148.9903247989622, 'da': -inf, 'duoc': -inf, 'nguoi': -inf, 'trong': -inf} predict: cothe
cothe {'cothe': -279.96886627846396, 'da': -inf, 'duoc': -inf, 'nguoi': -inf, 'trong': -inf} predict: cothe
cothe {'cothe': -197.0668714291678, 'da': -inf, 'duoc': -inf, 'nguoi': -inf, 'trong': -inf} predict: cothe
cothe {'cothe': -170.85403310996097, 'da': -inf, 'duoc': -inf, 'nguoi': -inf, 'trong': -inf} predict: cothe
cothe {'cothe': -386.7590144247123, 'da': -inf, 'duoc': -inf, 'nguoi': -inf, 'trong': -inf} predict: cothe
cothe {'cothe': -152.68388060095563, 'da': -inf, 'duoc': -inf, 'nguoi': -inf, 'trong': -inf} predict: cothe
cothe {'cothe': -171.67970783313208, 'da': -inf, 'duoc': -inf, 'nguoi': -inf, 'trong': -inf} predict: cothe
cothe {'cothe': -412.18677067293686, 'da': -inf, 'duoc': -inf, 'nguoi': -inf, 'trong': -inf} predict: cothe
cothe {'cothe': -193.49999944515793, 'da': -inf, 'duoc': -inf, 'nguoi': -inf, 'trong': -inf} predict: cothe
cothe {'cothe': -414.53

In [26]:
for cname in class_names:
    print('Accuracy:', cname, n_correct[cname]/n_test[cname])

Accuracy: cothe 1.0
Accuracy: da 0.7
Accuracy: duoc 0.7
Accuracy: nguoi 0.8
Accuracy: trong 0.4


In [127]:
print('All Accuracy:', sum(n_correct.values())/sum(n_test.values()))

All Accuracy: 0.8380952380952381


In [128]:
print(n_test)

{'cothe': 25, 'da': 20, 'duoc': 20, 'nguoi': 20, 'trong': 20}


In [129]:
#np.around(models['trong'].transmat_, 2)