In [1]:
import os
import pickle

import librosa
import math

import hmmlearn.hmm as hmm
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

from preprocessing import get_mfcc

from utils import  get_all_words

import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
class_names = get_all_words()
states = np.ones(len(class_names), dtype=int) * 3
n_mix = 2

In [3]:
dataset_path = 'datasets_split'

model_path = 'models_train'

In [10]:
def load_data(dataset_path: str, class_names: str):
    length = 0
    for cn in class_names:
        length += len(os.listdir(f"{dataset_path}/{cn}"))
    print('Total samples:', length)

    X = {'train': {}, 'test': {}}
    y = {'train': {}, 'test': {}}

    all_data = {}
    all_labels = {}
    for cname in class_names:
        file_paths = [os.path.join(dataset_path, cname, i) for i in os.listdir(
            os.path.join(dataset_path, cname)) if i.endswith('.wav')]
        data = [get_mfcc(file_path) for file_path in file_paths]
        all_data[cname] = data
        all_labels[cname] = [class_names.index(cname) for _ in range(len(file_paths))]

    for cname in class_names:

        X['train'][cname] = all_data[cname]
        y['train'][cname] = all_labels[cname]
        X['test'][cname] = []

    total_train = 0
    total_test = 0
    for cname in class_names:
        train_count = len(X['train'][cname])
        test_count = len(X['test'][cname])
        print(cname, 'train:', train_count, '| test:', test_count)
        total_train += train_count
        total_test += test_count
    print('train samples:', total_train)
    print('test samples', total_test)

    return X, y

In [11]:
X, y = load_data(dataset_path, class_names)

Total samples: 520
chu train: 10 | test: 0
ve train: 10 | test: 0
vao train: 10 | test: 0
tai train: 10 | test: 0
dau train: 10 | test: 0
cuoi train: 10 | test: 0
ke train: 10 | test: 0
truoc train: 10 | test: 0
dung train: 10 | test: 0
ngung train: 10 | test: 0
doc train: 10 | test: 0
tiep train: 10 | test: 0
lui train: 10 | test: 0
toi train: 10 | test: 0
tang train: 10 | test: 0
to train: 10 | test: 0
giam train: 10 | test: 0
nho train: 10 | test: 0
lai train: 10 | test: 0
lap train: 10 | test: 0
nhanh train: 10 | test: 0
cham train: 10 | test: 0
luu train: 10 | test: 0
xoa train: 10 | test: 0
huy train: 10 | test: 0
chay train: 10 | test: 0
xong train: 10 | test: 0
ddung train: 10 | test: 0
sai train: 10 | test: 0
giup train: 10 | test: 0
gio train: 10 | test: 0
ngay train: 10 | test: 0
tuoi train: 10 | test: 0
co train: 10 | test: 0
khong train: 10 | test: 0
muc train: 10 | test: 0
bai train: 10 | test: 0
mot train: 10 | test: 0
hai train: 10 | test: 0
ba train: 10 | test: 0
bon t

In [12]:
def train(X: dict[str, dict], class_names: list[str], states: list[int], n_mix: int = 2):
    models = {}
    likelihoods = {}

    for idx, cname in enumerate(class_names):
        start_prob = np.full(states[idx], 0.0)
        start_prob[0] = 1.0
        trans_matrix = np.full((states[idx], states[idx]), 0.0)
        p = 0.5
        np.fill_diagonal(trans_matrix, p)
        np.fill_diagonal(trans_matrix[0:, 1:], 1 - p)
        trans_matrix[-1, -1] = 1.0
        trans_matrix = trans_matrix / (states[idx] - 2)

        # trans matrix
        print(cname)
        print(trans_matrix)
        
        models[cname] = hmm.GMMHMM(
            n_components=states[idx],
            n_mix=n_mix,
            verbose=False,
            n_iter=300,
            startprob_prior=start_prob,
            transmat_prior=trans_matrix,
            params='stmc',
            init_params='mc',
            random_state=42
        )

        models[cname].fit(X=np.vstack(X['train'][cname]),
                                lengths=[x.shape[0] for x in X['train'][cname]])

        likelihoods[cname] = models[cname].monitor_.history

    return models, likelihoods

In [13]:
models, likelihoods = train(X, class_names, states, n_mix)

chu
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
ve
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
vao
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
tai
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
dau
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
cuoi
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
ke
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
truoc
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
dung
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
ngung
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
doc
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
tiep
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
lui
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
toi
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
tang
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
to
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
giam
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
nho
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
lai
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  1. ]]
lap
[[0.5 0.5 0. ]
 [0.  0.5 0.5]
 [0.  0.  

In [14]:
def show_prob(models: dict[str, hmm.GMMHMM], class_names: list[str]):
    for cname in class_names:
        print("\nMa trận chuyển tiếp trạng thái ẩn của \"" + cname +"\":")
        print(models[cname].transmat_)

show_prob(models, class_names)


Ma trận chuyển tiếp trạng thái ẩn của "chu":
[[0.96174018 0.03825982 0.        ]
 [0.         0.99198663 0.00801337]
 [0.04202085 0.         0.95797915]]

Ma trận chuyển tiếp trạng thái ẩn của "ve":
[[0.95487588 0.04512412 0.        ]
 [0.         0.99263564 0.00736436]
 [0.03257195 0.         0.96742805]]

Ma trận chuyển tiếp trạng thái ẩn của "vao":
[[0.96926557 0.         0.03073443]
 [0.         0.99162385 0.00837615]
 [0.03015436 0.03124859 0.93859705]]

Ma trận chuyển tiếp trạng thái ẩn của "tai":
[[0.99206947 0.00793053 0.        ]
 [0.         0.96349442 0.03650558]
 [0.02895167 0.         0.97104833]]

Ma trận chuyển tiếp trạng thái ẩn của "dau":
[[0.99264755 0.00735245 0.        ]
 [0.         0.95705963 0.04294037]
 [0.03013293 0.         0.96986707]]

Ma trận chuyển tiếp trạng thái ẩn của "cuoi":
[[0.96110327 0.         0.03889673]
 [0.         0.98846921 0.01153079]
 [0.03754812 0.05907796 0.90337392]]

Ma trận chuyển tiếp trạng thái ẩn của "ke":
[[0.95086619 0.04913381 0

In [18]:
def save_model(models: dict, class_names, model_path: str, model_key: str):
    print(f'Saving models to {model_path}/{model_key}')
    for cname in class_names:
        name = f'{model_path}/{model_key}/model_{cname}.pkl'
        if not os.path.exists(f'{model_path}/{model_key}'):
            os.makedirs(f'{model_path}/{model_key}')
        with open(name, 'wb') as file:
            pickle.dump(models[cname], file)

In [19]:
save_model(models, class_names, model_path, f"full")

Saving models to models_train/full
