# Test score fusion and calibration for Speaker Verification (SV)

In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys
os.chdir('../..')
sys.path.insert(1, os.path.join(sys.path[0], '../..'))

In [None]:
import torch

import matplotlib
matplotlib.rc('pdf', fonttype=42)

from notebooks.notebooks_utils import (
    load_models,
    evaluate_models,
    create_metrics_df
)

from notebooks.evaluation.sv_visualization import scores_distribution
from notebooks.evaluation.ScoreCalibration import ScoreCalibration

from sslsv.evaluations.CosineSVEvaluation import CosineSVEvaluation, CosineSVEvaluationTaskConfig
from sslsv.evaluations.CosineSVEvaluation import SpeakerVerificationEvaluation, SpeakerVerificationEvaluationTaskConfig

In [None]:
models = load_models(
    [
        'models/export/models/ssl/voxceleb2/simclr/simclr_enc-ECAPATDNN-1024_proj-none_t-0.03/config.yml',
        'models/export/models/ssl/voxceleb2/dino/dino_plus_nohann_ecapa/config.yml'
    ],
    override_names={
        'models/export/models/ssl/voxceleb2/simclr/simclr_enc-ECAPATDNN-1024_proj-none_t-0.03'   : 'simclr',
        'models/export/models/ssl/voxceleb2/dino/dino_plus_nohann_ecapa' : 'dino'
    },
    checkpoint_name="model_avg.pt"
)

# "SimCLR":       "models/export/models/ssl/voxceleb2/simclr/simclr_enc-ECAPATDNN-1024_proj-none_t-0.03/",
# "MoCo":         "models/export/models/ssl/voxceleb2/moco/moco_enc-ECAPATDNN-1024_proj-none_Q-32768_t-0.03_m-0.999/",
# "SwAV":         "models/export/models/ssl/voxceleb2/swav/swav_enc-ECAPATDNN-1024_proj-2048-BN-R-2048-BN-R-512_K-6000_t-0.1/",
# "VICReg":       "models/export/models/ssl/voxceleb2/vicreg/vicreg_enc-ECAPATDNN-1024_proj-2048-BN-R-2048-BN-R-512_inv-1.0_var-1.0_cov-0.1/",
# "DINO":         "models/export/models/ssl/voxceleb2/dino/dino_plus_nohann_ecapa/",
# "Supervised":   "models/export/models/ssl/voxceleb2/supervised/supervised_enc-ECAPATDNN-1024_loss-AAM_s-30_m-0.2/",

In [None]:
evals = evaluate_models(models, CosineSVEvaluation, CosineSVEvaluationTaskConfig(), return_evals=True)
create_metrics_df(models)

In [None]:
class FusedAndCalibratedSVEvaluation(SpeakerVerificationEvaluation):
    
    def __init__(self, evaluations, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
        self.evaluations = evaluations
        self.sc = ScoreCalibration(evaluations)

    def _prepare_evaluation(self):
        self.sc.train()
    
    def _get_sv_score(self, a, b):
        scores = [evaluation._get_sv_score(a, b) for evaluation in self.evaluations]
        score = self.sc.predict(torch.tensor(scores).unsqueeze(0))
        return score.detach().item()

In [None]:
evaluation = FusedAndCalibratedSVEvaluation(
    evaluations=evals,
    model=None,
    config=evals[0].config,
    task_config=SpeakerVerificationEvaluationTaskConfig()
)

models['final'] = {
    'metrics': evaluation.evaluate(),
    'scores': evaluation.scores,
    'targets': evaluation.targets
}

create_metrics_df(models)

In [None]:
scores_distribution(models, use_angle=False)