In [1]:
from hmmlearn import hmm
import os
import librosa
import numpy as np
import pandas as pd
from common import *
from IPython import display
LABELS.remove("sil")
STATES = STATES[:-1]

os.chdir('..')

In [2]:
def get_mfcc(file: str):
    y, sr = librosa.load(file)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    delta = librosa.feature.delta(mfcc)
    delta2 = librosa.feature.delta(mfcc, order=2)
    return np.vstack((mfcc, delta, delta2)).T

In [3]:
data = {}
idx_labels = {}

# mfcc in data, index of label in idx_labels

for label in LABELS:
    if label == "sil":
        continue
    files = os.listdir(f"audio_per_labels/audio/{label}")
    _data = [get_mfcc(f"audio_per_labels/audio/{label}/{file}") for file in files]
    data[label] = _data
    idx_labels[label] = [LABELS.index(label)] * len(_data)



In [4]:
from sklearn.model_selection import train_test_split

X = {'train': {}, 'test': {}}
y = {'train': {}, 'test': {}}

for label in LABELS:
    x_train, x_test, y_train, y_test = train_test_split(data[label], idx_labels[label], test_size=0.2)

    X['train'][label] = x_train
    X['test'][label] = x_test
    y['train'][label] = y_train
    y['test'][label] = y_test

In [5]:
for label in LABELS:
    print(f"{label}: {len(X['train'][label])} / {len(X['test'][label])}")

print(X['train']['A'][0].shape)

len: 101 / 26
xuong: 96 / 24
trai: 100 / 25
phai: 91 / 23
nhay: 112 / 28
ban: 93 / 24
A: 97 / 25
B: 107 / 27
(13, 39)


In [6]:
models = {}

for idx, label in enumerate(LABELS):
    models[label] = hmm.GMMHMM(n_components=STATES[idx], covariance_type="diag", n_iter=300)
    models[label].fit(X=np.vstack(X['train'][label]), lengths=[x.shape[0] for x in X['train'][label]])  


In [7]:
from sklearn.metrics import classification_report
y_true = []
y_preds = []

for label in LABELS:
    for mfcc, target in zip(X['test'][label], y['test'][label]):
        scores = [models[label].score(mfcc) for label in LABELS]
        preds = np.argmax(scores)
        y_true.append(target)
        y_preds.append(preds)

report = classification_report(y_true, y_preds)
print(report)

              precision    recall  f1-score   support

           0       1.00      0.88      0.94        26
           1       1.00      1.00      1.00        24
           2       0.93      1.00      0.96        25
           3       1.00      0.96      0.98        23
           4       0.96      0.93      0.95        28
           5       0.96      0.92      0.94        24
           6       0.96      1.00      0.98        25
           7       0.90      1.00      0.95        27

    accuracy                           0.96       202
   macro avg       0.96      0.96      0.96       202
weighted avg       0.96      0.96      0.96       202

