In [58]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder, StandardScaler
import pickle

# Set the path to the directory containing the audio files
audio_directory = 'Data3/'

# Set the number of MFCC coefficients to extract
num_mfcc = 13

# Initialize lists to store the MFCC features and corresponding labels
mfcc_features = []
labels = []

# Iterate over the audio files in the directory
for filename in os.listdir(audio_directory):
    if filename.endswith('.wav'):
        file_path = os.path.join(audio_directory, filename)
        speaker = filename.split('/')[-1].split('_')[-2]  # Extract the speaker label from the file name

        # Load the audio file and extract MFCC features
        audio, sr = librosa.load(file_path)
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=num_mfcc)

        # Pad or truncate the MFCC feature vectors to a fixed length of max_len
        max_len = 100  # Set the desired maximum length
        if mfcc.shape[1] > max_len:
            mfcc = mfcc[:, :max_len]
        else:
            pad_width = max_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

        # Append the MFCC features and label to the respective lists
        mfcc_features.append(mfcc.flatten())
        labels.append(speaker)

# Convert the lists to NumPy arrays
mfcc_features = np.array(mfcc_features)
labels = np.array(labels)
print(labels)
# Encode the labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Normalize the MFCC features
scaler = StandardScaler()
mfcc_features_scaled = scaler.fit_transform(mfcc_features)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(mfcc_features_scaled, labels_encoded, test_size=0.2, random_state=42)

# Initialize an SVM classifier
svm = SVC()

# Train the SVM classifier
svm.fit(X_train, y_train)

# Predict the labels for the test set
y_pred = svm.predict(X_test)

# Decode the predicted labels
y_pred_decoded = label_encoder.inverse_transform(y_pred)

# Calculate the accuracy of the classifier
accuracy = np.mean(np.array(y_pred_decoded) == label_encoder.inverse_transform(y_test)) * 100
print(f"Accuracy: {accuracy}%")

svm_model_file = 'svm_model.pkl'
with open(svm_model_file, 'wb') as file:
    pickle.dump(svm, file)


['farae' 'farae' 'farae' 'farae' 'farae' 'farae' 'farae' 'farae' 'farae'
 'farae' 'hamza' 'hamza' 'hamza' 'hamza' 'hamza' 'hamza' 'hamza' 'hamza'
 'hamza' 'hamza' 'umair' 'umair' 'umair' 'umair' 'umair' 'umair' 'umair'
 'umair' 'umair' 'umair' 'farae' 'farae' 'farae' 'farae' 'farae' 'farae'
 'farae' 'farae' 'farae' 'farae' 'hamza' 'hamza' 'hamza' 'hamza' 'hamza'
 'hamza' 'hamza' 'hamza' 'hamza' 'hamza' 'umair' 'umair' 'umair' 'umair'
 'umair' 'umair' 'umair' 'umair' 'umair' 'umair' 'farae' 'farae' 'farae'
 'farae' 'farae' 'farae' 'farae' 'farae' 'farae' 'farae' 'hamza' 'hamza'
 'hamza' 'hamza' 'hamza' 'hamza' 'hamza' 'hamza' 'hamza' 'hamza' 'umair'
 'umair' 'umair' 'umair' 'umair' 'umair' 'umair' 'umair' 'umair' 'umair']
Accuracy: 100.0%


In [59]:
import librosa
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler

# Load the SVM model
svm_model_file = 'svm_model.pkl'
with open(svm_model_file, 'rb') as file:
    svm_model = pickle.load(file)

# Load and extract MFCC features from the test audio file
test_audio_file = 'Data\spoken_numbers_pcm\helix_umair_1.wav'
audio, sr = librosa.load(test_audio_file)
num_mfcc = 13  # Same as used during training
mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=num_mfcc)

# Pad or truncate the MFCC feature vectors to a fixed length
max_len = 100  # Same as used during training
if mfcc.shape[1] > max_len:
    mfcc = mfcc[:, :max_len]
else:
    pad_width = max_len - mfcc.shape[1]
    mfcc = np.pad(mfcc, pad_width=((0, 0), (0, pad_width)), mode='constant')

# Normalize the MFCC features using the same StandardScaler instance used during training
scaler = StandardScaler()
mfcc_scaled = scaler.fit_transform(mfcc.flatten().reshape(1, -1))

# Predict the label for the test features
label_pred = svm_model.predict(mfcc_scaled)

# Map the predicted label to the speaker name
speaker_names = np.array(['farae', 'hamza', 'umair'])  # Add the names of the speakers in the same order as the labels during training
predicted_speaker = speaker_names[label_pred[0]]

print(f"Predicted speaker: {predicted_speaker}")


Predicted speaker: umair


In [41]:
newl = labels

In [42]:
newl

array(['farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'hamza', 'hamza', 'hamza', 'hamza', 'hamza', 'hamza',
       'hamza', 'hamza', 'hamza', 'hamza', 'umair', 'umair', 'umair',
       'umair', 'umair', 'umair', 'umair', 'umair', 'umair', 'umair',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'farae', 'farae', 'farae', 'farae', 'farae', 'farae',
       'farae', 'far

In [43]:
newl.unique()

AttributeError: 'numpy.ndarray' object has no attribute 'unique'

In [44]:
newl = np.array(labels)

In [46]:
newl = np.unique(newl)

In [47]:
newl

array(['farae', 'hamza', 'umair'], dtype='<U5')