In [1]:
import numpy as np
import hmmlearn.hmm
from python_speech_features import mfcc
import scipy.io.wavfile as wav

In [2]:
# Function to extract MFCC features from a WAV file
def extract_mfcc(file_path):
    rate, signal = wav.read(file_path)
    mfcc_features = mfcc(signal, rate, winlen=0.025, winstep=0.01, numcep=13, nfilt=26, nfft=512)
    return mfcc_features

In [3]:
# Function to train an HMM model
def train_hmm_model(training_data, num_hidden_states=3):
    label_to_int = {label: idx for idx, (path, label) in enumerate(training_data)}
    X_train = np.vstack([extract_mfcc(path) for path, label in training_data])
    y_train = np.array([label_to_int[label] for path, label in training_data])
    
    hmm_model = hmmlearn.hmm.GaussianHMM(n_components=num_hidden_states, covariance_type="diag", n_iter=100)
    hmm_model.fit(X_train)
    
    return hmm_model, label_to_int

In [4]:
# Function to recognize audio and predict labels
def recognize_audio(test_data, hmm_model, label_to_int):
    predicted_labels = []
    for test_path in test_data:
        test_mfcc = extract_mfcc(test_path)
        log_likelihoods = hmm_model.score(test_mfcc)
        predicted_label = predict_label(log_likelihoods, label_to_int)
        predicted_labels.append(predicted_label)
    return predicted_labels


In [5]:
# Function to predict labels from log likelihoods
def predict_label(log_likelihoods, label_dict):
    max_label = max(label_dict, key=lambda label: log_likelihoods[label_dict[label]])
    return max_label

In [6]:
# Example usage
if __name__ == "__main__":
    training_data = [
        ("path/to/speech1.wav", "label1"),
        ("path/to/speech2.wav", "label1"),
        ("path/to/speech3.wav", "label2"),
        # Add more training data as needed
    ]

    test_data = [
        "path/to/test_speech1.wav",
        "path/to/test_speech2.wav",
        # Add more test data as needed
    ]

    hmm_model, label_to_int = train_hmm_model(training_data)
    predicted_labels = recognize_audio(test_data, hmm_model, label_to_int)

    for test_path, predicted_label in zip(test_data, predicted_labels):
        print(f"Predicted label for {test_path}: {predicted_label}")

FileNotFoundError: [Errno 2] No such file or directory: 'path/to/speech1.wav'