In [1]:
import os
import sys
import torch
import logging
import torchaudio
import speechbrain as sb
from tqdm.contrib import tqdm
from hyperpyyaml import load_hyperpyyaml
from speechbrain.utils.metric_stats import EER, minDCF
from speechbrain.utils.data_utils import download_file
from speechbrain.utils.distributed import run_on_main
import shutil
import numpy as np
os.environ["CUDA_VISIBLE_DEVICES"] = "2"

In [2]:
from verification import compute_embedding,compute_embedding_loop,get_verification_scores,dataio_prep
from voxceleb_prepare import prepare_voxceleb  # noqa E402

In [3]:
from speechbrain.lobes.features import Fbank

In [4]:
params = {} ## 参数列表

In [5]:
import sys
sys.path.append("/home/qinyc/02_exp/notebook/model/")
from ECAPA_TDNN_2 import ECAPA_TDNN

params["output_folder"] = "/home/qinyc/02_exp/exp13/"
params['pretrain_path'] = '/home/qinyc/02_exp/exp13/save/CKPT+2022-03-19+04-49-19+00'

In [6]:
params["device"] = "cuda:0"

params["voxceleb_source"] = "/data0/qyc/vox1_2"
params["data_folder"] = "/data0/qyc/vox1_2/vox1_test"

params["save_folder"] = os.path.join(params["output_folder"],"save")
params["train_data"] = "/home/qinyc/02_exp/exp/train.csv"

params['enrol_data'] = "/home/qinyc/02_exp/exp/enrol_O.csv"
params['test_data']="/home/qinyc/02_exp/exp/test_O.csv"

veri_file_path = "/home/qinyc/02_exp/exp/veri_test2.txt"

In [7]:
params["train_dataloader_opts"] = {"batch_size":1}
params["enrol_dataloader_opts"] = {"batch_size":1}
params["test_dataloader_opts"] = {"batch_size":1}

params["compute_features"] = Fbank(n_mels=80)
params["mean_var_norm"] = sb.processing.features.InputNormalization(norm_type="sentence",std_norm=False)
params["embedding_model"] = ECAPA_TDNN(input_size=80)
params["mean_var_norm_emb"] = sb.processing.features.InputNormalization(norm_type="global",std_norm=False)
params["pretrainer"] = sb.utils.parameter_transfer.Pretrainer(collect_in=params["save_folder"],\
                                                                       loadables={"embedding_model": params["embedding_model"]},\
                                                                       paths={"embedding_model": os.path.join(params["pretrain_path"],"embedding_model.ckpt")})
params["left_frames"]=0
params["right_frames"]=0
params["deltas"] = False
params["score_norm"]="s-norm"
params["cohort_size"]=2000
params["n_train_snts"]=40000

In [8]:
params_file = "verification.yaml"
sb.core.create_experiment_directory(
    experiment_directory=params["output_folder"],
    hyperparams_to_save=params_file,
    overrides="",
)

speechbrain.core - Beginning experiment!
speechbrain.core - Experiment folder: /home/qinyc/02_exp/exp13/


In [9]:
## 设置数据加载的内容
# voxceleb - O 的数量为 40000 4715 4713 * 1   40000 4715 4713
# voxceleb - E 的数量为 40000 145160 142540 * 1 
# voxceleb - H 的数量为 40000 137924 135415 * 1

## batch-size：采用1 

train_dataloader, enrol_dataloader, test_dataloader = dataio_prep(params)
print(len(train_dataloader) ,len(enrol_dataloader) ,len(test_dataloader))

40000 4715 4713


In [10]:
run_on_main(params["pretrainer"].collect_files)
params["pretrainer"].load_collected() ## 加载权重

params["embedding_model"].eval()
params["embedding_model"].cuda()

speechbrain.pretrained.fetching - Fetch embedding_model.ckpt: Linking to local file in /home/qinyc/02_exp/exp13/save/CKPT+2022-03-19+04-49-19+00/embedding_model.ckpt.
speechbrain.utils.parameter_transfer - Loading pretrained files for: embedding_model


ECAPA_TDNN(
  (blocks): ModuleList(
    (0): TDNNBlock(
      (conv): Conv1d(
        (conv): Conv1d(80, 512, kernel_size=(5,), stride=(1,), padding=same)
      )
      (activation): ReLU()
      (norm): BatchNorm1d(
        (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): SERes2NetBlock(
      (tdnn1): TDNNBlock(
        (conv): Conv1d(
          (conv): Conv1d(512, 512, kernel_size=(1,), stride=(1,), padding=same)
        )
        (activation): ReLU()
        (norm): BatchNorm1d(
          (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (res2net_block): Res2NetBlock_2(
        (layer_1): TDNNBlock(
          (conv): Conv1d(
            (conv): Conv1d(512, 512, kernel_size=(3,), stride=(1,), padding=same)
          )
          (activation): ReLU()
          (norm): BatchNorm1d(
            (norm): BatchNorm1d(512, eps=1e-05, momentum=0.1, affine=True, track_

In [11]:
# 第一次提取embedding ? 
enrol_dict_1 = compute_embedding_loop(enrol_dataloader,params)
test_dict_1 = compute_embedding_loop(test_dataloader,params)

100%|██████████| 4715/4715 [00:52<00:00, 89.97it/s] 
100%|██████████| 4713/4713 [00:48<00:00, 97.99it/s] 


In [12]:
# Second run (normalization stats are more stable)
enrol_dict = compute_embedding_loop(enrol_dataloader,params)
test_dict = compute_embedding_loop(test_dataloader,params)

100%|██████████| 4715/4715 [00:48<00:00, 97.48it/s] 
100%|██████████| 4713/4713 [00:47<00:00, 98.27it/s] 


In [13]:
if "score_norm" in params:                                                                                                          
    train_dict = compute_embedding_loop(train_dataloader,params)

100%|██████████| 40000/40000 [06:43<00:00, 99.22it/s] 


In [14]:
with open(veri_file_path) as f:
    veri_test = [line.rstrip() for line in f]
print(len(veri_test))

37611


In [15]:
def get_verification_scores(veri_test,params):
    """ Computes positive and negative scores given the verification split.
    """
    scores = []
    positive_scores = []
    negative_scores = []

    save_file = os.path.join(params["output_folder"], "scores.txt")
    s_file = open(save_file, "w")

    # Cosine similarity initialization
    similarity = torch.nn.CosineSimilarity(dim=-1, eps=1e-6)

    # creating cohort for score normalization
    if "score_norm" in params:
        train_cohort = torch.stack(list(train_dict.values()))

    for i, line in enumerate(veri_test):

        # Reading verification file (enrol_file test_file label)
        lab_pair = int(line.split(" ")[0].rstrip().split(".")[0].strip())
        enrol_id = line.split(" ")[1].rstrip().split(".")[0].strip()
        test_id = line.split(" ")[2].rstrip().split(".")[0].strip()
        enrol = enrol_dict[enrol_id]
        test = test_dict[test_id]

        if "score_norm" in params:
            # Getting norm stats for enrol impostors
            enrol_rep = enrol.repeat(train_cohort.shape[0], 1, 1)
            score_e_c = similarity(enrol_rep, train_cohort)

            if "cohort_size" in params:
                score_e_c = torch.topk(
                    score_e_c, k=params["cohort_size"], dim=0
                )[0]

            mean_e_c = torch.mean(score_e_c, dim=0)
            std_e_c = torch.std(score_e_c, dim=0)

            # Getting norm stats for test impostors
            test_rep = test.repeat(train_cohort.shape[0], 1, 1)
            score_t_c = similarity(test_rep, train_cohort)

            if "cohort_size" in params:
                score_t_c = torch.topk(
                    score_t_c, k=params["cohort_size"], dim=0
                )[0]

            mean_t_c = torch.mean(score_t_c, dim=0)
            std_t_c = torch.std(score_t_c, dim=0)

        # Compute the score for the given sentence
        score = similarity(enrol, test)[0]

        # Perform score normalization
        if "score_norm" in params:
            if params["score_norm"] == "z-norm":
                score = (score - mean_e_c) / std_e_c
            elif params["score_norm"] == "t-norm":
                score = (score - mean_t_c) / std_t_c
            elif params["score_norm"] == "s-norm":
                score_e = (score - mean_e_c) / std_e_c
                score_t = (score - mean_t_c) / std_t_c
                score = 0.5 * (score_e + score_t)

        # write score file
        s_file.write("%s %s %i %f\n" % (enrol_id, test_id, lab_pair, score))
        scores.append(score)

        if lab_pair == 1:
            positive_scores.append(score)
        else:
            negative_scores.append(score)

    s_file.close()
    return positive_scores, negative_scores

In [16]:
## 生成分数
positive_scores, negative_scores = get_verification_scores(veri_test,params)
# del enrol_dict, test_dict

In [17]:
import numpy as np

## 给定一些类数据，计算等错误率，
def compute_eer(fnr, fpr):
    """ computes the equal error rate (EER) given FNR and FPR values calculated
        for a range of operating points on the DET curve
    """

    diff_pm_fa = fnr - fpr
    x1 = np.flatnonzero(diff_pm_fa >= 0)[0]
    x2 = np.flatnonzero(diff_pm_fa < 0)[-1]
    a = (fnr[x1] - fpr[x1]) / (fpr[x2] - fpr[x1] - (fnr[x2] - fnr[x1]))
    return fnr[x1] + a * (fnr[x2] - fnr[x1])


def compute_pmiss_pfa(scores, labels):
    """ computes false positive rate (FPR) and false negative rate (FNR)
    given trial scores and their labels. A weights option is also provided
    to equalize the counts over score partitions (if there is such
    partitioning).
    """

    sorted_ndx = np.argsort(scores)
    labels = labels[sorted_ndx]

    tgt = (labels == 1).astype('f8')
    imp = (labels == 0).astype('f8')

    fnr = np.cumsum(tgt) / np.sum(tgt)
    fpr = 1 - np.cumsum(imp) / np.sum(imp)
    return fnr, fpr


def compute_min_cost(scores, labels, p_target=0.01):
    fnr, fpr = compute_pmiss_pfa(scores, labels)
    eer = compute_eer(fnr, fpr)
    min_c = compute_c_norm(fnr, fpr, p_target)
    return eer, min_c


def compute_c_norm(fnr, fpr, p_target, c_miss=1, c_fa=1):
    """ computes normalized minimum detection cost function (DCF) given
        the costs for false accepts and false rejects as well as a priori
        probability for target speakers
    """
    dcf = c_miss * fnr * p_target + c_fa * fpr * (1 - p_target)
    c_det = np.min(dcf)
    c_def = min(c_miss * p_target, c_fa * (1 - p_target))
    return c_det/c_def

In [18]:
scores_list = []
label_list = []

for i in range(len(positive_scores)):
    scores_list.append(positive_scores[i].cpu().item())
    label_list.append(1)


for i in range(len(negative_scores)):
    scores_list.append(negative_scores[i].cpu().item())
    label_list.append(0)
    
print(len(scores_list),len(label_list))
scores_list_n = np.array(scores_list)
label_list_n = np.array(label_list)

37611 37611


In [19]:
eer, min_c = compute_min_cost(scores_list_n, label_list_n, p_target=0.01)

_, min_c2 = compute_min_cost(scores_list_n, label_list_n, p_target=0.001)

print(eer * 100,min_c,min_c2)

0.871923015577647 0.11502723601489293 0.1808850122327412
