# Evaluate

In [1]:
%load_ext autoreload
%autoreload 2

import os
import sys
os.chdir('..')
sys.path.insert(1, os.path.join(sys.path[0], '..'))

## Load models and embeddings

In [3]:
MODELS_TO_LOAD = [
    './configs/simclr_ecapa.yml',
    './configs/simclr_ecapa_simo.yml'
]

In [4]:
from sslsv.utils.helpers import load_config
import pickle5 as pickle

MODELS = {}

for model in MODELS_TO_LOAD:
    config, checkpoint_dir = load_config(model, verbose=False)

    with open(checkpoint_dir + '/embeddings.pkl', 'rb') as f:
        embeddings = pickle.load(f)

    model_name = model.split('/')[-1][:-4]
    MODELS[model_name] = {
        'config': config,
        'checkpoint_dir': checkpoint_dir,
        'embeddings': embeddings
    }

## Extract utterance info from representations

In [78]:
from sklearn.neural_network import MLPClassifier

def fit_mlp_on_representations(model, y_key_pos):
    keys = list(MODELS[model]['embeddings'][0].keys())
    
    X = [MODELS[model]['embeddings'][0][key][0].numpy() for key in keys]
    y = [key.split('/')[y_key_pos] for key in keys]
    
    clf = MLPClassifier()
    clf.fit(X, y)
    
    print(f'Accuracy: {clf.score(X, y)}')
    
    return clf, X, y

### Speaker ID

In [79]:
_ = fit_mlp_on_representations(model='simclr_ecapa', y_key_pos=0)

Accuracy: 1.0


In [81]:
_ = fit_mlp_on_representations(model='simclr_ecapa_simo', y_key_pos=0)

Accuracy: 1.0


### Video ID

In [82]:
_ = fit_mlp_on_representations(model='simclr_ecapa', y_key_pos=1)

Accuracy: 0.9927782497875955




In [83]:
_ = fit_mlp_on_representations(model='simclr_ecapa_simo', y_key_pos=1)

Accuracy: 0.9932030586236194




### Segment ID

In [84]:
_ = fit_mlp_on_representations(model='simclr_ecapa', y_key_pos=2)

Accuracy: 0.4203483432455395




In [85]:
_ = fit_mlp_on_representations(model='simclr_ecapa_simo', y_key_pos=2)

Accuracy: 0.326677994902294


