### Imporintg Necessary Libraries

In [3]:
import torchaudio
import warnings
warnings.filterwarnings("ignore")

# These lib is to get the pretrained embeddings model ECAPA-TDNN on Voxceleb For Speech verification
from speechbrain.inference.speaker import SpeakerRecognition

# This lib is used to get the cosine distance between the embeddings
from scipy.spatial.distance import cosine

In [4]:

# Load the ECAPA-TDNN pre-trained model
model = SpeakerRecognition.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="tmp")

# Preprocess audio
def preprocess_audio(file_path, target_sample_rate=16000):
    waveform, sample_rate = torchaudio.load(file_path)
    if sample_rate != target_sample_rate:
        waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=target_sample_rate)(waveform)
    return waveform

# Extract embeddings
def extract_embeddings(model, file_path):
    waveform = preprocess_audio(file_path)
    embeddings = model.encode_batch(waveform)
    return embeddings.squeeze(0)

# Store embeddings in a hashmap where we take 4 samples of the same person in each folder
voice_data = {}

def store_embeddings(person_name, folder_path):
    embeddings_list = []
    for i in range(1, 5):
        file_path = f"{folder_path}/voice{i}.mp3"
        embeddings = extract_embeddings(model, file_path)
        embeddings_list.append(embeddings)
    voice_data[person_name] = embeddings_list

# Compare Embeddings function to find closest match
def compare_embeddings(new_embedding, stored_embeddings):
    similarities = []
    for emb in stored_embeddings:
        distance = cosine(new_embedding.cpu().numpy().flatten(), emb.cpu().numpy().flatten())
        similarities.append(distance)
    return min(similarities)

# Authenticate
def authenticate(new_recording_path):
    new_embedding = extract_embeddings(model, new_recording_path)
    closest_match = None
    lowest_distance = float('inf')

    for person, embeddings in voice_data.items():
        distance = compare_embeddings(new_embedding, embeddings)
        if distance < lowest_distance:
            lowest_distance = distance
            closest_match = person

    print("Distance To The Closest Point: ", lowest_distance)
    if lowest_distance > 0.4:
        return "No Match Found"
    return closest_match


### Creating Embedding from the voice samples from each person to generate the dataset to compare from

In [5]:
# Store Embeddings And Create Dataset

# Creating dataset for every person from the voice modules in their paricular folder 
store_embeddings("Harshita","/Users/rutwikshete/Desktop/Codeing/AIProjects/VoiceAuth/dataset/h")
store_embeddings("Rutwik","/Users/rutwikshete/Desktop/Codeing/AIProjects/VoiceAuth/dataset/r")
store_embeddings("Teni","/Users/rutwikshete/Desktop/Codeing/AIProjects/VoiceAuth/dataset/t")
store_embeddings("Appa","/Users/rutwikshete/Desktop/Codeing/AIProjects/VoiceAuth/dataset/a")

### Auth test for a person who **Exist** in the dataset

In [6]:
# Auth test with different peaople from the dataset uncomment one and try

Auth_Person = authenticate("/Users/rutwikshete/Desktop/Codeing/AIProjects/VoiceAuth/dataset/test/test(h).mp3")
# Auth_Person = authenticate("/Users/rutwikshete/Desktop/Codeing/AIProjects/VoiceAuth/dataset/test/test(r).mp3")
# Auth_Person = authenticate("/Users/rutwikshete/Desktop/Codeing/AIProjects/VoiceAuth/dataset/test/test(t).mp3")
# Auth_Person = authenticate("/Users/rutwikshete/Desktop/Codeing/AIProjects/VoiceAuth/dataset/test/test(a).mp3")

print("The Recording Belongs To : ",Auth_Person)

Distance To The Closest Point:  0.19660139
The Recording Belongs To :  Harshita


### Auth test for a person who **Does Not** Exist in the dataset

In [7]:
Auth_Person = authenticate("/Users/rutwikshete/Desktop/Codeing/AIProjects/VoiceAuth/dataset/test/test(random).mp3")

print("The Recording Belongs To : ",Auth_Person)

Distance To The Closest Point:  0.7854238
The Recording Belongs To :  No Match Found
