In [None]:
# define the set of commands we want to classify with hmm
import librosa
import numpy as np 
import os

commands = ["demo_fider_ac", # 0
            "demo_fider_bilgileri", # 1 
            "demo_fider_kapat", # 2
            "nem_durumu", # 3
            "gsm_durumu" # 4
            ]
# since we have multiple .wav files in the dirs
data = []
labels = []

dataset_path = "/home/ix502iv/Documents/Audio_Trad/HMM/custom_commands"
for i, command in enumerate(commands):
    # loop through each file in the folder
    for file in os.listdir(dataset_path):
        if file.endswith(".wav"): # find any file that ends with .wav
            file_path = os.path.join(dataset_path, file) # there's need to have the full path to the file.
                                                         # for librosa's filepath
            # print(file_path)
            audio_data, sr = librosa.load(file_path, sr=None)

            # print("File:", filename)
            # print("Sample rate:", sr)
            # print("Number of samples:", len(audio_data))

            # Extract the MFCC features
            mfcc = librosa.feature.mfcc(
                y=audio_data,
                sr=sr,
                n_mfcc = 40
            )

            data.append(mfcc.T) # mfcc transposed
            labels.append(i)
        
# save the features and labels to a .npy file
# simply saves an array to a binary file
np.save("data.npy", np.vstack(data))
np.save("labels.npy", np.array(labels))

In [None]:
print(i, command)

In [None]:
# extract the features from each file: mfcc features & save them along with their labels
data = np.load("data.npy", allow_pickle=True)
data_reshaped = data.ravel()[:100]
labels = np.load("labels.npy", allow_pickle=True)
print(len(labels), len(data), len(data_reshaped))

# param for mfcc extraction
n_mfcc = 13

# preprocess the .wav file: extract mfcc features and save them with their label
features = []
for i in range(len(data_reshaped)): # len(data) = 24615
    mfcc = librosa.feature.mfcc(
        y = data[i],
        sr = 44100,
        n_mfcc= n_mfcc
    )

    features.append((mfcc.T, labels[i])) 
    # we run into a size error: IndexError: index 100 is out of bounds for axis 0 with size 100
    # labels = 100, 24615 > 100 (actually 99: 0 to 99 = 100): soution reshape data to match 100:
    # may reduce the accuracy of our model
np.save("features.npy", np.array(features))    

In [None]:
# split into training and the test dataset
from sklearn.model_selection import train_test_split

features = np.load("features.npy", allow_pickle=True)

# the SPLIT
X = np.array([f[0] for f in features]) # mfcc features
y = np.array([f[1] for f in features]) # labels


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)
np.save('X_train.npy', X_train)
np.save("X_test.npy", X_test)
np.save("y_train.npy", y_train)
np.save("y_test.npy", y_test)

In [None]:
# mdoelling time
from hmmlearn import hmm

# load the training set and their labels
X_train = np.load("X_train.npy", allow_pickle=True)
y_train = np.load("y_train.npy", allow_pickle=True)

# define the number of states and features for the model
n_states = 5 # the number of hidden states
n_features = X_train.shape[2] # number of mfcc features : 15 == X_train.shape[2]


# initialize the hmm model
model = hmm.GaussianHMM(
    n_components=n_states,
    covariance_type="diag",
    n_iter=100
)

In [None]:
# model training: there is some reshaping of the input features to a 2D array with the shape
# (-1, n_features)
# model.fit(X_train.reshape(-1, n_features), lengths=[len(seq) for seq in X_train])

In [None]:
X_train_2D = np.reshape(X_train, (X_train.shape[0],-1))
X_test_2D = np.reshape(X_test, (X_test.shape[0],-1))

In [None]:
model.fit(X_train_2D)

In [None]:
import numpy as np
from hmmlearn import hmm
from sklearn.metrics import accuracy_score

# Load the testing set of MFCC features and their labels
X_test = np.load("X_test.npy", allow_pickle=True)
y_test = np.load("y_test.npy", allow_pickle=True)

# Define a list of candidate numbers of hidden states to try
n_states_list = [3, 5, 7, 9]

# Loop over the candidate numbers of hidden states and train a HMM model for each one
best_accuracy = 0
best_model = None

for n_states in n_states_list:
    # Initialize the HMM model
    model = hmm.GaussianHMM(n_components=n_states, covariance_type="diag", n_iter=100)

    # Train the HMM model using the training set of MFCC features and their labels
    # model.fit(X_train.reshape(-1, n_features), lengths=[len(seq) for seq in X_train])
    model.fit(X_train_2D)

    # Use the HMM model to predict the labels of the testing set of MFCC features
    # y_pred = model.predict(X_test.reshape(-1, n_features), lengths=[len(seq) for seq in X_test])
    y_pred = model.predict(X_test_2D)

    # Compute the accuracy of the HMM model on the testing set of labels
    accuracy = accuracy_score(y_test, y_pred)

    # Update the best model and accuracy if the current model is better than the previous ones
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Print the best accuracy and number of hidden states found
print("Accuracy:", best_accuracy)

In [None]:
# load the new file to test
filename = "/home/ix502iv/Documents/Audio_Trad/HMM/custom_commands_copy/demo_fider_ac/demo_fider_ac.wav"
y, sr = librosa.load(filename, sr=None)
mfcc = librosa.feature.mfcc(
    y=y,
    sr=sr,
    n_mfcc=13,
    n_fft=2048,
    hop_length=512
)

# save the mfcc features to a file
np.save("new_file.npy", mfcc)

In [None]:
# use viterbi algorithm to predict the most likely sequence for the features
from hmmlearn import hmm
# load the trained model
model = hmm.GaussianHMM(
    n_components=5,
    covariance_type="diag",
    # n_features=13
)

In [None]:
model.startprob_ = np.array([1.0, 0.0, 0.0, 0.0, 0.0])
model.transmat_ = np.array([
    [0.5, 0.5, 0.0, 0.0, 0.0],
    [0.0, 0.5, 0.5, 0.0, 0.0],
    [0.0, 0.0, 0.5, 0.5, 0.0],
    [0.0, 0.0, 0.0, 0.5, 0.5],
    [0.0, 0.0, 0.0, 0.0, 1.0],
])
model.means_ = np.random.randn(5, 13)
# model.covars_ = np.tile(np.identity(13), (5, 1, 1))

In [None]:
# Load the MFCC features from the new .wav file
filename = "new_file.npy"
mfcc = np.load(filename)

# Use the Viterbi algorithm to predict the most likely sequence of states
logprob, state_sequence = model.decode(mfcc)
print("Log probability: {:.2f}".format(logprob))
print("State sequence:", state_sequence)