# Speaker verification with SpeechBrain

In [None]:
!pip install speechbrain
!pip install torchaudio
import torchaudio
from speechbrain.pretrained import SpeakerRecognition
import torch

Collecting speechbrain
[?25l  Downloading https://files.pythonhosted.org/packages/89/7b/19434dc600e91abd066e30bdd6027e9231419ac6a016d076125bbfbcb89e/speechbrain-0.5.7-py3-none-any.whl (355kB)
[K     |█                               | 10kB 11.1MB/s eta 0:00:01[K     |█▉                              | 20kB 15.6MB/s eta 0:00:01[K     |██▊                             | 30kB 13.1MB/s eta 0:00:01[K     |███▊                            | 40kB 9.3MB/s eta 0:00:01[K     |████▋                           | 51kB 5.5MB/s eta 0:00:01[K     |█████▌                          | 61kB 5.1MB/s eta 0:00:01[K     |██████▌                         | 71kB 5.7MB/s eta 0:00:01[K     |███████▍                        | 81kB 6.3MB/s eta 0:00:01[K     |████████▎                       | 92kB 6.2MB/s eta 0:00:01[K     |█████████▏                      | 102kB 5.2MB/s eta 0:00:01[K     |██████████▏                     | 112kB 5.2MB/s eta 0:00:01[K     |███████████                     | 122kB 5.2M

## Core functions

In [None]:
def load_sv_model():
    """
    Loads pre-trained speaker verification model from speechbrain
    """
    verif_model = SpeakerRecognition.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb", savedir="pretrained_models/spkrec-ecapa-voxceleb")
    return verif_model

def compare_emeddings(emb1, emb2):
    """
    To run speaker verification on embeddings directly
    """
    similarity = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)
    score = similarity(emb1, emb2)
    return score

def get_embedding(path:str):
    """
    Gets the embeddings on a WAV file
    """

    signal, fs = torchaudio.load(path)
    embeddings = verif_model.encode_batch(signal)
    return embeddings



def run_sv(path:str, speaker_model, verif_model, threshold = 0.85):
    """
    Extracts embeddings of a wav file and compares it (cosine similarity) with existing speaker model
    """
    test_emb = get_embedding(path, verif_model)
    score = compare_emeddings(test_emb, speaker_model)
    return score, score > threshold


# Test the model

In [None]:
verif_model = load_sv_model()

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1920.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=83316686.0, style=ProgressStyle(descrip…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=1921.0, style=ProgressStyle(description…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=5534328.0, style=ProgressStyle(descript…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=128619.0, style=ProgressStyle(descripti…




You need to upload two wav files of your choice, named here `a.wav` and `b.wav`. `a.wav` is the enrollment data, while `b.wav` is the test data.

In [None]:
speaker_model = get_embedding("a.wav", verif_model)
speaker_model.shape

torch.Size([2, 1, 192])

In [None]:
score, decision = run_sv("b.wav", speaker_model, verif_model, 0.85)
score, decision

(tensor([[0.6440],
         [0.7231]]), tensor([[False],
         [False]]))