In [9]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
import pickle

In [10]:
# Function to extract features from audio files
def extract_features_from_directory(data_directory):
    features = []
    labels = []
    for label in os.listdir(data_directory):  # Each subfolder represents a class
        label_path = os.path.join(data_directory, label)
        if os.path.isdir(label_path):  # Ensure it's a folder
            for file in os.listdir(label_path):
                if file.endswith(".wav"):  # Only process .wav files
                    file_path = os.path.join(label_path, file)
                    try:
                        # Load audio file
                        y, sr = librosa.load(file_path, duration=2.5, offset=0.5)
                        
                        # Extract features
                        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
                        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
                        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
                        zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y))

                        # Combine features into a single array
                        feature_vector = np.hstack([
                            np.mean(mfcc.T, axis=0),
                            np.mean(chroma.T, axis=0),
                            np.mean(spectral_contrast.T, axis=0),
                            zero_crossing_rate
                        ])
                        
                        # Append features and labels
                        features.append(feature_vector)
                        labels.append(label)
                    except Exception as e:
                        print(f"Error processing {file_path}: {e}")
    return np.array(features), np.array(labels)


In [11]:
# Function to train the model
def train_model(data_directory):
    print("Extracting features from training data...")
    features, labels = extract_features_from_directory(data_directory)

    if len(set(labels)) < 2:
        raise ValueError("The number of classes must be at least 2. Add more data!")

    # Encode labels into numerical format
    label_encoder = LabelEncoder()
    labels_encoded = label_encoder.fit_transform(labels)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(features, labels_encoded, test_size=0.2, random_state=42)

    # Train an SVM model
    print("Training the model...")
    model = SVC(kernel="linear", probability=True)
    model.fit(X_train, y_train)

    # Print training accuracy
    print("Model trained with accuracy:", model.score(X_test, y_test))

    # Save the model and label encoder for later use
    with open("voice_model.pkl", "wb") as model_file:
        pickle.dump(model, model_file)
    with open("label_encoder.pkl", "wb") as encoder_file:
        pickle.dump(label_encoder, encoder_file)

    print("Model and label encoder saved.")


In [12]:
def predict_voice():
    try:
        # Load the saved model and label encoder
        with open("voice_model.pkl", "rb") as model_file:
            model = pickle.load(model_file)
        with open("label_encoder.pkl", "rb") as encoder_file:
            label_encoder = pickle.load(encoder_file)

        # Record or load a test audio file
        file_path = input("Enter the path to the audio file for testing: ")
        y, sr = librosa.load(file_path, duration=2.5, offset=0.5)

        # Extract the same features as used during training
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y=y))

        # Combine features into a single array
        feature_vector = np.hstack([
            np.mean(mfcc.T, axis=0),
            np.mean(chroma.T, axis=0),
            np.mean(spectral_contrast.T, axis=0),
            zero_crossing_rate
        ])

        # Predict the class of the audio
        prediction = model.predict([feature_vector])
        predicted_label = label_encoder.inverse_transform(prediction)

        # Display the result
        if predicted_label[0] in ["rahul", "margaret", "jens"]:
            print(f"Welcome {predicted_label[0].capitalize()}!")
        else:
            print("Voice not recognized.")

    except Exception as e:
        print(f"Error during prediction: {e}")


In [13]:
# Main program
def main():
    print("Voice Recognition Program")
    print("1. Train the model")
    print("2. Test the model")
    choice = int(input("Enter your choice (1 or 2): "))

    if choice == 1:
        data_directory = input("Enter the path to the training data directory: ")
        train_model(data_directory)
    elif choice == 2:
        predict_voice()
    else:
        print("Invalid choice. Please enter 1 or 2.")

In [15]:
if __name__ == "__main__":
    main()

Voice Recognition Program
1. Train the model
2. Test the model
Welcome Margaret!
