In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

  from collections import Sequence
  from collections import Iterable
  from collections import Mapping, namedtuple, defaultdict, Sequence


In [23]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=20, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 60 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [24]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [25]:
class_names = ["An", "Dat", "Đông", "Ha", "VA", "test_An", "test_Dat", "test_Đông", 
               "test_Ha", "test_VA"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)


Load An dataset
Load Dat dataset
Load Đông dataset
Load Ha dataset
Load VA dataset
Load test_An dataset
Load test_Dat dataset
Load test_Đông dataset
Load test_Ha dataset
Load test_VA dataset
vectors (80075, 60)


In [26]:
models = {}

for cname in class_names:
    hmm = hmmlearn.hmm.GMMHMM(
        n_mix = 2,n_components = 16, random_state=42, n_iter=1000, verbose=True,
    )
    
    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X)
        models[cname] = hmm
print("Training done")

training class An
(12851, 60) [2165, 866, 757, 605, 1244, 227, 351, 1238, 385, 866, 1093, 523, 1196, 805, 530] 15


         1    -1596841.9642             +nan
         2    -1485061.0926     +111780.8716
         3    -1459082.6564      +25978.4362
         4    -1448938.0780      +10144.5784
         5    -1444253.0513       +4685.0267
         6    -1441272.7791       +2980.2722
         7    -1439731.0922       +1541.6869
         8    -1439007.9324        +723.1598
         9    -1438547.4108        +460.5216
        10    -1438171.2092        +376.2016
        11    -1437894.4226        +276.7866
        12    -1437692.1432        +202.2794
        13    -1437547.5145        +144.6287
        14    -1437433.3404        +114.1741
        15    -1437331.6484        +101.6920
        16    -1437245.8617         +85.7867
        17    -1437168.8451         +77.0166
        18    -1437078.5469         +90.2982
        19    -1436981.7653         +96.7816
        20    -1436893.9867         +87.7786
        21    -1436826.1821         +67.8046
        22    -1436772.4353         +53.7468
        23

       184    -1435389.0809          +0.0050


training class Dat
(11521, 60) [205, 1830, 519, 1290, 552, 948, 464, 319, 890, 855, 1129, 503, 324, 596, 743, 354] 16


         1    -1660928.8195             +nan
         2    -1582491.3273      +78437.4922
         3    -1564909.9047      +17581.4226
         4    -1559996.7800       +4913.1247
         5    -1557989.5896       +2007.1903
         6    -1556840.4097       +1149.1800
         7    -1556140.7901        +699.6195
         8    -1555621.1589        +519.6313
         9    -1555274.2004        +346.9585
        10    -1555026.5758        +247.6246
        11    -1554852.6353        +173.9405
        12    -1554710.0076        +142.6278
        13    -1554571.7219        +138.2857
        14    -1554457.6604        +114.0614
        15    -1554369.4047         +88.2557
        16    -1554303.8011         +65.6036
        17    -1554254.4228         +49.3783
        18    -1554203.2715         +51.1514
        19    -1554155.7975         +47.4740
        20    -1554115.9664         +39.8311
        21    -1554080.2431         +35.7233
        22    -1554051.6069         +28.6362
        23

training class Đông
(10720, 60) [578, 466, 602, 1324, 850, 955, 1187, 754, 241, 369, 458, 626, 1187, 874, 249] 15


         1    -1773355.1128             +nan
         2    -1607822.7119     +165532.4008
         3    -1559349.3645      +48473.3474
         4    -1548732.8613      +10616.5032
         5    -1542535.0293       +6197.8320
         6    -1536270.0780       +6264.9513
         7    -1525779.9046      +10490.1735
         8    -1505727.7206      +20052.1839
         9    -1470495.7069      +35232.0137
        10    -1408882.3230      +61613.3840
        11    -1297454.2541     +111428.0689
        12     -909104.0746     +388350.1795
        13     -908970.8528        +133.2218
        14     -908866.6364        +104.2163
        15     -908768.1281         +98.5084
        16     -908686.3828         +81.7453
        17     -908619.2402         +67.1426
        18     -908569.3607         +49.8796
        19     -908526.6862         +42.6745
        20     -908491.9057         +34.7805
        21     -908462.0959         +29.8098
        22     -908429.9995         +32.0964
        23

training class Ha
(14216, 60) [772, 1554, 686, 716, 1169, 900, 960, 686, 909, 2079, 785, 896, 451, 332, 1321] 15


         1    -2162348.1809             +nan
         2    -2060939.8703     +101408.3106
         3    -2045694.7985      +15245.0718
         4    -2041867.2995       +3827.4990
         5    -2040141.5564       +1725.7431
         6    -2038992.6445       +1148.9119
         7    -2038207.5995        +785.0450
         8    -2037611.8759        +595.7236
         9    -2037128.7532        +483.1227
        10    -2036676.1732        +452.5800
        11    -2036359.3773        +316.7959
        12    -2036118.3574        +241.0199
        13    -2035926.6387        +191.7187
        14    -2035752.8124        +173.8263
        15    -2035565.9209        +186.8915
        16    -2035405.4595        +160.4615
        17    -2035265.9736        +139.4858
        18    -2035139.8618        +126.1118
        19    -2035022.6751        +117.1867
        20    -2034922.3023        +100.3728
        21    -2034832.3202         +89.9822
        22    -2034755.1186         +77.2015
        23

training class VA
(12783, 60) [865, 744, 1211, 1214, 834, 594, 891, 597, 661, 840, 687, 865, 1155, 682, 943] 15


         1    -2029176.5309             +nan
         2    -1938017.7962      +91158.7348
         3    -1913210.3932      +24807.4030
         4    -1907966.3190       +5244.0742
         5    -1905622.8826       +2343.4364
         6    -1904002.8906       +1619.9921
         7    -1902999.5917       +1003.2988
         8    -1902106.9865        +892.6052
         9    -1901469.5903        +637.3962
        10    -1900912.7182        +556.8722
        11    -1900402.9343        +509.7839
        12    -1899999.1215        +403.8128
        13    -1899707.0639        +292.0576
        14    -1899482.5327        +224.5312
        15    -1899277.8288        +204.7038
        16    -1899082.1649        +195.6639
        17    -1898884.0017        +198.1632
        18    -1898661.9701        +222.0315
        19    -1898463.7204        +198.2498
        20    -1898282.2007        +181.5197
        21    -1898114.8448        +167.3559
        22    -1897990.1611        +124.6837
        23

Training done


       151    -1896937.7329          +0.0096


In [27]:

acc = {}
for true_cname in class_names:
    if true_cname[:4] == 'test':
        kt = 0
        for O in dataset[true_cname]:
            score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
            inverse = [(value, key) for key, value in score.items()]
            pre = max(inverse)[1]
            print(true_cname, score, pre)
            if pre == true_cname[5:]:
                kt +=1
        print(true_cname," ", kt)
        acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)                                    

test_An {'An': -50130.86055347032, 'Dat': -51700.92183885842, 'Đông': -54265.996591354575, 'Ha': -52231.285911905856, 'VA': -54014.89535924531} An
test_An {'An': -101923.29054594555, 'Dat': -105397.46308149268, 'Đông': -112516.17497550628, 'Ha': -108029.46682576566, 'VA': -112150.30817150677} An
test_An {'An': -87504.70480267737, 'Dat': -91644.77850358801, 'Đông': -100042.71675033635, 'Ha': -95209.141517317, 'VA': -98473.27802684598} An
test_An {'An': -115327.92551609749, 'Dat': -116525.89367995423, 'Đông': -124300.62206030832, 'Ha': -120667.17008626286, 'VA': -123768.36683946932} An
test_An {'An': -110173.13073532796, 'Dat': -115791.05899163091, 'Đông': -125006.00951411453, 'Ha': -119678.04740242017, 'VA': -123280.70815801027} An
test_An   5
test_Dat {'An': -130661.687185562, 'Dat': -116674.2903783344, 'Đông': -128363.02369330447, 'Ha': -123015.18531548226, 'VA': -122841.60359194654} Dat
test_Dat {'An': -130425.13047300659, 'Dat': -114358.3754306724, 'Đông': -126889.29847469697, 'Ha':

In [28]:
with open('output_final.pkl', 'w') as f:
    print(models, file=f)