<a href="https://colab.research.google.com/github/komal682/Speaker-Verification/blob/main/SpeakerVerification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import os

import librosa
import librosa.display
import random

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import random
import pickle

from itertools import product




import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm  # For progress bar

### Load Data

In [None]:
dataset_path = r"C:\Users\prkom\OneDrive\Desktop\MTech\1st-sem\ML\Project\SpeakerVerification"

len(os.listdir(dataset_path))

25

### New Load


In [None]:
audio_data = {}

# Traverse all directories and subdirectories
for root, _, files in os.walk(dataset_path):
    # Extract speaker ID from the folder structure
    speaker_id = os.path.basename(os.path.dirname(root))
    # Add only .wav files
    wav_files = [os.path.join(root, file) for file in files if file.endswith('.wav')]

    if wav_files:  # If there are .wav files in this folder
        if speaker_id not in audio_data:
            audio_data[speaker_id] = []
        audio_data[speaker_id].extend(wav_files)  # Append files to the speaker's list


### Pairs

In [None]:
def create_audio_pairs(audio_data):
    pairs = []
    speakers = list(audio_data.keys())

    # Create positive pairs
    for speaker_id, files in audio_data.items():
        if len(files) > 1:  # Check if there are at least two files for the speaker
            for i in range(len(files)):
                for j in range(i + 1, len(files)):  # Pair each file with every other file in the same folder
                    pairs.append((files[i], files[j], 1))  # Label 1 for same speaker
        else:
            print(f"Speaker {speaker_id} has less than 2 files. Skipping positive pair generation.")

    # Create negative pairs
    if len(speakers) < 2:  # Ensure there are at least 2 speakers
        print("Not enough speakers to create negative pairs.")
        return pairs

  #Selects speakers Randomly, to keep balacned dataset for 1 an 0

    for _ in range(len(pairs)):  # Generate as many negative pairs as positive pairs
        speaker1, speaker2 = random.sample(speakers, 2)  # Pick two different speakers
        file1 = random.choice(audio_data[speaker1])
        file2 = random.choice(audio_data[speaker2])
        pairs.append((file1, file2, 0))  # Label 0 for different speakers

    return pairs


In [None]:
pairs = create_audio_pairs(audio_data)
if not pairs:
    print("No pairs were created. Check your data.")
else:
    print(f"Total pairs created: {len(pairs)}")
    print(f"Sample pair: {pairs[0]}")


Total pairs created: 448960
Sample pair: ('C:\\Users\\prkom\\OneDrive\\Desktop\\MTech\\1st-sem\\ML\\Project\\SpeakerVerification\\id10270\\5r0dWxy17C8\\00001.wav', 'C:\\Users\\prkom\\OneDrive\\Desktop\\MTech\\1st-sem\\ML\\Project\\SpeakerVerification\\id10270\\5r0dWxy17C8\\00002.wav', 1)


### Features

In [None]:
def extract_features(audio_path, sr=22050, wavelet_duration=1.0, feature_type='mfcc'):
    audio, sr = librosa.load(audio_path, sr=sr)

    # Calculate wavelet size in samples
    wavelet_size = int(sr * wavelet_duration)
    num_wavelets = len(audio) // wavelet_size

    features = []

    for i in range(num_wavelets):
        # Slice the wavelet
        wavelet = audio[i * wavelet_size : (i + 1) * wavelet_size]

        # Extract features for the wavelet
        if feature_type == 'mfcc':
            wavelet_features = librosa.feature.mfcc(y=wavelet, sr=sr, n_mfcc=13)
            # Aggregate the features, e.g., by averaging across time frames
            wavelet_features = np.mean(wavelet_features, axis=1)
        elif feature_type == 'spectrogram':
            wavelet_features = np.abs(librosa.stft(wavelet))
            wavelet_features = np.mean(wavelet_features, axis=1)
        else:
            raise ValueError("Unsupported feature type. Use 'mfcc' or 'spectrogram'.")

        features.append(wavelet_features)

    # Aggregate features from all wavelets
    aggregated_features = np.mean(features, axis=0)
    return aggregated_features


In [None]:
len(pairs)

448960

Balanced Pairs

In [None]:

# Number of samples to select for each label
samples_per_label = 25000

# Separate pairs by label
label_1_pairs = [pair for pair in pairs if pair[2] == 1]  # Label 1
label_0_pairs = [pair for pair in pairs if pair[2] == 0]  # Label 0

# Randomly sample `samples_per_label` pairs from each group
sampled_label_1_pairs = random.sample(label_1_pairs, min(samples_per_label, len(label_1_pairs)))
sampled_label_0_pairs = random.sample(label_0_pairs, min(samples_per_label, len(label_0_pairs)))

# Combine the samples
balanced_pairs = sampled_label_1_pairs + sampled_label_0_pairs

# Shuffle the combined pairs
random.shuffle(balanced_pairs)

print(f"Selected {len(balanced_pairs)} pairs: {len(sampled_label_1_pairs)} from label 1 and {len(sampled_label_0_pairs)} from label 0.")


Selected 50000 pairs: 25000 from label 1 and 25000 from label 0.


Pair Feature

In [None]:
# Library

def process_pair(pair):
    audio1_path, audio2_path, label = pair
    features_audio1 = extract_features(audio1_path)
    features_audio2 = extract_features(audio2_path)
    combined_features = np.concatenate((features_audio1, features_audio2))
    return combined_features, label


In [None]:

from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

# Function to process pairs and save results
def compute_and_save_results(pairs, save_path="results.pkl"):
    results = []
    with ThreadPoolExecutor() as executor:
        results = list(tqdm(executor.map(process_pair, pairs), total=len(pairs)))

    # Save results to file
    with open(save_path, "wb") as f:
        pickle.dump(results, f)
    print(f"Results saved to {save_path}")

# Compute and save results
compute_and_save_results(balanced_pairs, save_path="results.pkl")


 41%|███████████████████████████████▏                                            | 20548/50000 [49:07<52:34,  9.34it/s]

Predict

In [None]:
def load_results(file_path="results.pkl"):
    """
    Loads precomputed results from a file.
    :param file_path: Path to the saved results.
    :return: List of feature pairs and labels.
    """
    with open(file_path, "rb") as f:
        results = pickle.load(f)
    print(f"Results loaded from {file_path}")
    return results

# Load results
results = load_results("results.pkl")


Results loaded from results.pkl


In [None]:
# SPLIT
# Separate features and labels
X, y = zip(*results)
X = np.array(X)
y = np.array(y)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
len(results)

10000

SVM


In [None]:

# Train a classical ML model (SVM in this case)
model = SVC(kernel='linear', probability=True)  # Use 'rbf' kernel for non-linear decision boundaries
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 66.10%
Classification Report:
               precision    recall  f1-score   support

           0       0.66      0.63      0.65       983
           1       0.66      0.69      0.67      1017

    accuracy                           0.66      2000
   macro avg       0.66      0.66      0.66      2000
weighted avg       0.66      0.66      0.66      2000



GMM


In [None]:
from sklearn.mixture import GaussianMixture
from sklearn.metrics import roc_auc_score

# Train a GMM model
model = GaussianMixture(n_components=2, covariance_type='full', random_state=42)
model.fit(X_train)

# Predict probabilities for the test set
y_prob = model.predict_proba(X_test)[:, 1]  # Probability of being in the "1" class

# Convert probabilities to binary predictions (threshold = 0.5)
y_pred = (y_prob >= 0.5).astype(int)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_prob)

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"ROC AUC Score: {roc_auc:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))


In [None]:
import pickle
def save_model(fname, model):
    """Save the model to a file using pickle."""
    print(f"Saving the model to {fname}...")

    with open(fname, 'wb') as f:
        pickle.dump(model, f)

    print(f"Model saved successfully to {fname}.")

save_model("SVModel",model)


In [None]:
import numpy as np
import pickle
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load the result.pkl file containing extracted pair features and labels
with open('results.pkl', 'rb') as file:
    results = pickle.load(file)
len(results)

6000

In [None]:

# Separate features and labels
X, y = zip(*results)  # Extract features and labels
X = np.array(X)
y = np.array(y)

# Standardize the features for better performance in SVM and KMeans
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply K-means clustering to the features
n_clusters = 2  # We are assuming binary classification (same or different speakers)
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans_labels = kmeans.fit_predict(X_scaled)  # Get the cluster labels as target for SVM

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, kmeans_labels, test_size=0.2, random_state=42)

# Train an SVM classifier using the cluster labels
svm_model = SVC(kernel='linear', probability=True)
svm_model.fit(X_train, y_train)

# Evaluate the SVM model
y_pred = svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy of SVM model with K-means clustering: {accuracy * 100:.2f}%")

# Save the trained SVM model and KMeans model for later use
with open('svm_model_with_kmeans.pkl', 'wb') as file:
    pickle.dump(svm_model, file)

Accuracy of SVM model with K-means clustering: 99.92%
