In [2]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [3]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=20, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 60 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [4]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [43]:
# class_names = ["Đạt", "Hoàng", "Tùng", "VA", "Trấn Thành","Tự Long", "Xuân Bắc",
#                "test_Đạt", "test_Hoàng", "test_Tùng", "test_VA", "test_Trấn Thành", "test_Tự Long", "test_Xuân Bắc"]
class_names = ["TA", "test_TA", "Trường Giang", "test_Trường Giang", "Công Lý", "test_Công Lý", "Quang Thắng", "test_Quang Thắng"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)


Load TA dataset
Load test_TA dataset
Load Trường Giang dataset
Load test_Trường Giang dataset
Load Công Lý dataset
Load test_Công Lý dataset
Load Quang Thắng dataset
Load test_Quang Thắng dataset
vectors (78941, 60)


In [44]:
models = {}

for cname in class_names:
    hmm = hmmlearn.hmm.GMMHMM(
        n_mix = 2,n_components = 16, random_state=42, n_iter=1000, verbose=True,
    )
    
    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X)
        models[cname] = hmm
print("Training done")

training class TA
(12030, 60) [802, 802, 802, 802, 802, 802, 802, 802, 802, 802, 802, 802, 802, 802, 802] 15


         1    -1897993.2431             +nan
         2    -1809299.0824      +88694.1607
         3    -1797416.4910      +11882.5914
         4    -1794663.9499       +2752.5411
         5    -1793225.5675       +1438.3824
         6    -1792361.1496        +864.4180
         7    -1791729.0750        +632.0746
         8    -1791171.0311        +558.0439
         9    -1790679.1695        +491.8616
        10    -1790297.5544        +381.6151
        11    -1789965.2490        +332.3054
        12    -1789678.5198        +286.7292
        13    -1789450.7847        +227.7351
        14    -1789233.9618        +216.8229
        15    -1788997.5120        +236.4498
        16    -1788763.5000        +234.0120
        17    -1788597.8459        +165.6541
        18    -1788470.5182        +127.3277
        19    -1788370.9215         +99.5967
        20    -1788304.1288         +66.7927
        21    -1788256.1358         +47.9930
        22    -1788213.2779         +42.8579
        23

       184    -1787358.3105          +0.1155
       185    -1787358.2169          +0.0936
       186    -1787358.1416          +0.0753
       187    -1787358.0812          +0.0605
       188    -1787358.0325          +0.0487
       189    -1787357.9930          +0.0395
       190    -1787357.9604          +0.0326
       191    -1787357.9328          +0.0275
       192    -1787357.9088          +0.0241
       193    -1787357.8867          +0.0220
       194    -1787357.8653          +0.0215
       195    -1787357.8427          +0.0226
       196    -1787357.8167          +0.0259
       197    -1787357.7842          +0.0325
       198    -1787357.7402          +0.0440
       199    -1787357.6785          +0.0617
       200    -1787357.5954          +0.0831
       201    -1787357.4982          +0.0971
       202    -1787357.4059          +0.0923
       203    -1787357.3290          +0.0769
       204    -1787357.2596          +0.0694
       205    -1787357.1828          +0.0768
       206

training class Trường Giang
(16140, 60) [1122, 995, 1112, 1233, 1397, 1140, 1114, 791, 922, 1395, 796, 917, 993, 1129, 1084] 15


         1    -2516754.6776             +nan
         2    -2421422.2846      +95332.3929
         3    -2404905.4100      +16516.8746
         4    -2399996.6688       +4908.7412
         5    -2397667.6111       +2329.0578
         6    -2396409.5393       +1258.0718
         7    -2395477.3883        +932.1510
         8    -2394641.8894        +835.4989
         9    -2393777.2208        +864.6686
        10    -2392757.5503       +1019.6704
        11    -2391598.7788       +1158.7716
        12    -2390604.3883        +994.3905
        13    -2389853.7097        +750.6786
        14    -2389287.9482        +565.7615
        15    -2388855.4284        +432.5197
        16    -2388523.0161        +332.4123
        17    -2388258.1415        +264.8746
        18    -2388067.1262        +191.0152
        19    -2387899.3926        +167.7336
        20    -2387737.9340        +161.4587
        21    -2387606.7937        +131.1403
        22    -2387500.1638        +106.6299
        23

       184    -2385752.3349          +0.0305
       185    -2385752.3049          +0.0300
       186    -2385752.2755          +0.0294
       187    -2385752.2466          +0.0289
       188    -2385752.2182          +0.0284
       189    -2385752.1903          +0.0279
       190    -2385752.1628          +0.0275
       191    -2385752.1356          +0.0271
       192    -2385752.1086          +0.0270
       193    -2385752.0811          +0.0276
       194    -2385752.0508          +0.0303
       195    -2385752.0109          +0.0399
       196    -2385751.9404          +0.0705
       197    -2385751.7841          +0.1564
       198    -2385751.4634          +0.3206
       199    -2385751.0780          +0.3855
       200    -2385750.8905          +0.1875
       201    -2385750.8318          +0.0587
       202    -2385750.7956          +0.0361
       203    -2385750.7611          +0.0346
       204    -2385750.7240          +0.0371
       205    -2385750.6824          +0.0416
       206

training class Công Lý
(14124, 60) [940, 1026, 773, 844, 851, 798, 796, 1232, 948, 1005, 736, 1086, 1003, 921, 1165] 15


         1    -2198902.8453             +nan
         2    -2103100.6590      +95802.1863
         3    -2079935.5745      +23165.0844
         4    -2072020.6744       +7914.9001
         5    -2068310.2775       +3710.3970
         6    -2065921.3732       +2388.9042
         7    -2064348.5953       +1572.7780
         8    -2063148.0406       +1200.5546
         9    -2062306.9941        +841.0466
        10    -2061588.9015        +718.0926
        11    -2060966.4154        +622.4861
        12    -2060461.1489        +505.2665
        13    -2060161.7701        +299.3788
        14    -2059932.0389        +229.7312
        15    -2059729.2222        +202.8167
        16    -2059578.4270        +150.7952
        17    -2059457.9298        +120.4972
        18    -2059308.9988        +148.9310
        19    -2059190.0103        +118.9884
        20    -2059082.8336        +107.1767
        21    -2058975.1027        +107.7309
        22    -2058875.1919         +99.9108
        23

       184    -2057289.6195          +2.6131
       185    -2057286.8526          +2.7670
       186    -2057283.6813          +3.1713
       187    -2057279.8761          +3.8051
       188    -2057274.7746          +5.1015
       189    -2057268.3181          +6.4565
       190    -2057263.1672          +5.1509
       191    -2057259.6171          +3.5500
       192    -2057256.5580          +3.0591
       193    -2057251.4807          +5.0773
       194    -2057244.9494          +6.5313
       195    -2057239.8536          +5.0958
       196    -2057235.1892          +4.6644
       197    -2057230.0179          +5.1714
       198    -2057223.3228          +6.6950
       199    -2057218.9443          +4.3786
       200    -2057214.4002          +4.5441
       201    -2057208.4336          +5.9666
       202    -2057202.4549          +5.9787
       203    -2057198.5518          +3.9032
       204    -2057195.1091          +3.4427
       205    -2057192.9963          +2.1128
       206

       367    -2056507.8593          +0.0426
       368    -2056507.8114          +0.0479
       369    -2056507.7519          +0.0595
       370    -2056507.6701          +0.0818
       371    -2056507.5509          +0.1192
       372    -2056507.3863          +0.1646
       373    -2056507.2038          +0.1825
       374    -2056507.0464          +0.1574
       375    -2056506.9176          +0.1288
       376    -2056506.8037          +0.1139
       377    -2056506.6976          +0.1061
       378    -2056506.5962          +0.1014
       379    -2056506.4965          +0.0997
       380    -2056506.3927          +0.1038
       381    -2056506.2690          +0.1237
       382    -2056506.0721          +0.1969
       383    -2056505.6223          +0.4498
       384    -2056504.7938          +0.8285
       385    -2056504.2678          +0.5260
       386    -2056504.0525          +0.2152
       387    -2056503.9271          +0.1255
       388    -2056503.8414          +0.0856
       389

training class Quang Thắng
(16478, 60) [1095, 1100, 1450, 1078, 1397, 891, 769, 1065, 1276, 1035, 1410, 858, 1169, 986, 899] 15


         1    -2441962.2479             +nan
         2    -2359907.6884      +82054.5596
         3    -2331054.5425      +28853.1459
         4    -2322659.0510       +8395.4915
         5    -2319228.5916       +3430.4594
         6    -2316912.2007       +2316.3909
         7    -2315235.9278       +1676.2729
         8    -2314120.2875       +1115.6403
         9    -2313298.5405        +821.7469
        10    -2312731.7302        +566.8104
        11    -2312310.3241        +421.4061
        12    -2311976.5152        +333.8089
        13    -2311713.4680        +263.0472
        14    -2311490.3691        +223.0989
        15    -2311296.0107        +194.3584
        16    -2311109.6048        +186.4059
        17    -2310954.3400        +155.2648
        18    -2310796.7746        +157.5654
        19    -2310692.3134        +104.4612
        20    -2310600.8607         +91.4527
        21    -2310515.6839         +85.1768
        22    -2310430.0597         +85.6242
        23

       184    -2308813.5124          +8.5312
       185    -2308806.2628          +7.2497
       186    -2308801.6885          +4.5743
       187    -2308796.0566          +5.6319
       188    -2308788.0277          +8.0289
       189    -2308778.7827          +9.2450
       190    -2308768.7619         +10.0208
       191    -2308760.3651          +8.3967
       192    -2308752.4053          +7.9598
       193    -2308741.0246         +11.3807
       194    -2308731.3579          +9.6667
       195    -2308718.3301         +13.0278
       196    -2308701.9089         +16.4212
       197    -2308688.2654         +13.6436
       198    -2308672.2708         +15.9946
       199    -2308663.5121          +8.7586
       200    -2308652.4550         +11.0571
       201    -2308645.2296          +7.2253
       202    -2308638.4197          +6.8100
       203    -2308631.5505          +6.8692
       204    -2308624.9539          +6.5966
       205    -2308622.3596          +2.5944
       206

Training done


       252    -2308540.3678          +0.0098


In [45]:

acc = {}
for true_cname in class_names:
    if true_cname[:4] == 'test':
        kt = 0
        for O in dataset[true_cname]:
            score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
            inverse = [(value, key) for key, value in score.items()]
            pre = max(inverse)[1]
            print(true_cname, score, pre)
            if pre == true_cname[5:]:
                kt +=1
        print(true_cname," ", kt)
        acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)                                    

test_TA {'TA': -119030.2935844488, 'Trường Giang': -127353.05103947234, 'Công Lý': -124352.42398396342, 'Quang Thắng': -127712.14064914288} TA
test_TA {'TA': -120573.90754897126, 'Trường Giang': -128193.89689762132, 'Công Lý': -126145.03956769164, 'Quang Thắng': -129004.37042808434} TA
test_TA {'TA': -121995.61717051583, 'Trường Giang': -129786.3404164653, 'Công Lý': -127103.5364080175, 'Quang Thắng': -129886.80774352289} TA
test_TA {'TA': -121952.38794060264, 'Trường Giang': -129259.57396923905, 'Công Lý': -126851.45683620466, 'Quang Thắng': -130078.48224184752} TA
test_TA {'TA': -121702.5416397479, 'Trường Giang': -128382.49885005095, 'Công Lý': -126883.78186790466, 'Quang Thắng': -129189.89108489711} TA
test_TA   5
test_Trường Giang {'TA': -156904.3374461757, 'Trường Giang': -146874.2410985643, 'Công Lý': -152864.15691822636, 'Quang Thắng': -151946.8140852406} Trường Giang
test_Trường Giang {'TA': -222223.7826467105, 'Trường Giang': -209169.1846165809, 'Công Lý': -214995.97933498365

In [26]:
import pickle 
with open('output_final_time1.pkl', 'wb') as file:
     pickle.dump(models, file)

In [1]:
for cname in class_names:
    if cname[:4] != 'test':
        cname = str(cname)
        print(cname)
        picklefile = "models/" +  str(cname)+".pkl"
        with open(picklefile, 'wb') as f:
            pickle.dump(models[cname],f)

NameError: name 'class_names' is not defined

In [47]:
modelpath = "models/"
model_files = [os.path.join(modelpath,fname) for fname in 
              os.listdir(modelpath) if fname.endswith('.pkl')]

In [48]:
model_files

['models/Đạt.pkl',
 'models/Công Lý.pkl',
 'models/Xuân Bắc.pkl',
 'models/TA.pkl',
 'models/Trấn Thành.pkl',
 'models/VA.pkl',
 'models/Hoàng.pkl',
 'models/Tùng.pkl',
 'models/Tự Long.pkl',
 'models/Trường Giang.pkl',
 'models/Quang Thắng.pkl']

In [49]:
models    = [pickle.load(open(fname,'rb')) for fname in model_files]
speakers   = [fname.split("/")[-1].split(".pkl")[0] for fname 
              in model_files]


In [50]:
print(speakers)

['Đạt', 'Công Lý', 'Xuân Bắc', 'TA', 'Trấn Thành', 'VA', 'Hoàng', 'Tùng', 'Tự Long', 'Trường Giang', 'Quang Thắng']


In [53]:
class_names = { "test_Đạt", "test_Hoàng", "test_Tùng", "test_VA",  "test_TA", 
               "test_Trấn Thành", "test_Tự Long", "test_Xuân Bắc","test_Trường Giang","test_Công Lý", "test_Quang Thắng"}
acc = {}
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
for true_cname in class_names:
    if true_cname[:4] == 'test':
        kt = 0
        for O in dataset[true_cname]:
            
#             score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
#             inverse = [(value, key) for key, value in score.items()]
#             pre = max(inverse)[1]
#             print(true_cname, score, pre)
            log_likelihood = np.zeros(len(models)) 

            for i in range(len(models)):
                gmm    = models[i]         #checking with each model one by one
                scores = np.array(gmm.score(O, [len(O)]))
                log_likelihood[i] = scores.sum()

            winner = np.argmax(log_likelihood)
            pre = speakers[winner]
            if pre == true_cname[5:]:
                kt +=1
        print(true_cname," ", kt)
        acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)                                    

Load test_Tự Long dataset
Load test_Hoàng dataset
Load test_Trường Giang dataset
Load test_TA dataset
Load test_VA dataset
Load test_Quang Thắng dataset
Load test_Trấn Thành dataset
Load test_Đạt dataset
Load test_Công Lý dataset
Load test_Tùng dataset
Load test_Xuân Bắc dataset
vectors (53939, 60)
test_Tự Long   5
test_Hoàng   5
test_Trường Giang   5
test_TA   5
test_VA   5
test_Quang Thắng   5
test_Trấn Thành   3
test_Đạt   5
test_Công Lý   5
test_Tùng   5
test_Xuân Bắc   4
{'test_Tự Long': 100.0, 'test_Hoàng': 100.0, 'test_Trường Giang': 100.0, 'test_TA': 100.0, 'test_VA': 100.0, 'test_Quang Thắng': 100.0, 'test_Trấn Thành': 60.0, 'test_Đạt': 100.0, 'test_Công Lý': 100.0, 'test_Tùng': 100.0, 'test_Xuân Bắc': 80.0}


In [54]:
class_names = { "Đạt", "Hoàng", "Tùng", "VA",  "TA", 
               "Trấn Thành", "Tự Long", "Xuân Bắc","Trường Giang","Công Lý", "Quang Thắng"}
acc = {}
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
for true_cname in class_names:
    kt = 0
    for O in dataset[true_cname]:

#             score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
#             inverse = [(value, key) for key, value in score.items()]
#             pre = max(inverse)[1]
#             print(true_cname, score, pre)
        log_likelihood = np.zeros(len(models)) 

        for i in range(len(models)):
            gmm    = models[i]         #checking with each model one by one
            scores = np.array(gmm.score(O, [len(O)]))
            log_likelihood[i] = scores.sum()

        winner = np.argmax(log_likelihood)
        pre = speakers[winner]
        if pre == true_cname:
            kt +=1
    print(true_cname," ", kt)
    acc[true_cname] = kt * 100 / len(dataset[true_cname])
print(acc)                                    

Load Công Lý dataset
Load Quang Thắng dataset
Load Trường Giang dataset
Load Trấn Thành dataset
Load Tùng dataset
Load Tự Long dataset
Load Hoàng dataset
Load TA dataset
Load Xuân Bắc dataset
Load Đạt dataset
Load VA dataset
vectors (162281, 60)
Công Lý   15
Quang Thắng   15
Trường Giang   15
Trấn Thành   15
Tùng   18
Tự Long   15
Hoàng   15
TA   15
Xuân Bắc   15
Đạt   16
VA   15
{'Công Lý': 100.0, 'Quang Thắng': 100.0, 'Trường Giang': 100.0, 'Trấn Thành': 100.0, 'Tùng': 100.0, 'Tự Long': 100.0, 'Hoàng': 100.0, 'TA': 100.0, 'Xuân Bắc': 100.0, 'Đạt': 100.0, 'VA': 100.0}
