**Import Libraries**

In [17]:
import numpy as np
import librosa
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC

**MFCC Feature Extraction**

In [2]:
# Function to extract MFCC features from audio files
def extract_features(file_path, max_pad_len=174):
    audio, sample_rate = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)

    # Trim or pad the sequence to ensure they all have the same length
    if mfccs.shape[1] > max_pad_len:
        mfccs = mfccs[:, :max_pad_len]
    else:
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')

    return mfccs

**Data Loading and Preprocessing**

In [3]:
# Load data and preprocess
def load_data(file_paths, max_pad_len=174):
    data = []
    labels = []

    for file_path, label in file_paths:
        features = extract_features(file_path, max_pad_len)
        data.append(features)
        labels.append(label)

    return np.array(data), np.array(labels)

**Training Data Setup**

In [4]:
# Define audio files and their corresponding labels for training
train_audio_files = [("/content/audio (2).wav", "speaker1"), ("/content/audio 4.wav", "speaker2")]

**Load and preprocess training data**

In [5]:
# Load and preprocess training data
X_train, y_train = load_data(train_audio_files)


**Encode labels**

In [6]:
# Encode labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)

**SVM Model Training**

In [7]:
# Train SVM model
svm_model = SVC(kernel='linear',probability=True)  # Set probability=True to enable predict_proba()
svm_model.fit(X_train.reshape(X_train.shape[0], -1), y_train_encoded)

**Test Data Setup**

In [10]:
# Define audio file and its corresponding label for testing
test_audio_file = "/content/priya.wav"

**Load and preprocess test data**

In [11]:
# Load and preprocess test data
X_test, _ = load_data([(test_audio_file, _)])

**Speaker Prediction**

In [12]:
# Predict the speaker of the test audio
predicted_proba = svm_model.predict_proba(X_test.reshape(X_test.shape[0], -1))[0]
max_proba = np.max(predicted_proba)
print("pf",predicted_proba)
print("max",max_proba)

pf [0.5101725 0.4898275]
max 0.5101725047512846


**Set confidence threshold**

In [13]:
# Set confidence threshold
confidence_threshold = 0.6

**check the speaker prediction for unknown person**

In [14]:
# Check if the maximum predicted probability is above the confidence threshold
if max_proba >confidence_threshold:
    predicted_label_index = svm_model.predict(X_test.reshape(X_test.shape[0], -1))[0]
    predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]
else:
    predicted_label = "Unknown"

In [15]:
print("Predicted Speaker:", predicted_label)

Predicted Speaker: Unknown


**check the speaker prediction for trained speaker**

In [16]:
# Define audio file and its corresponding label for testing
test_audio_file = "/content/audio 4.wav"
# test_label = "Unknown"

# Load and preprocess test data
X_test, _ = load_data([(test_audio_file, _)])

# Predict the speaker of the test audio
predicted_proba = svm_model.predict_proba(X_test.reshape(X_test.shape[0], -1))[0]
max_proba = np.max(predicted_proba)
print("pf",predicted_proba)
print("max",max_proba)
# Set confidence threshold
confidence_threshold = 0.6

# Check if the maximum predicted probability is above the confidence threshold
if max_proba >confidence_threshold:
    predicted_label_index = svm_model.predict(X_test.reshape(X_test.shape[0], -1))[0]
    predicted_label = label_encoder.inverse_transform([predicted_label_index])[0]
else:
    predicted_label = "Unknown"

print("Predicted Speaker:", predicted_label)


pf [0.66350713 0.33649287]
max 0.6635071318041246
Predicted Speaker: speaker2
