In [1]:
import os
import pandas as pd
import numpy as np
import torch.nn.functional as F

In [2]:
from sv_system.data.dataloader import init_default_loader
from sv_system.data.dataset import SpeechDataset
from sv_system.utils import secToSample, secToFrames
from sv_system.sv_score import embeds

In [3]:
from sv_system.utils.parser import get_sv_parser
options = get_sv_parser().parse_args(args=[])
options.n_dct_filters = 40
options.n_mels = 40
options.timeshift_ms = 100
options.window_size= 0.025
options.window_stride= 0.010
options.cache_size = 32768

In [8]:
options.input_format = 'fbank'
options.input_clip = True
options.input_length = secToSample(3)
options.splice_frames = secToFrames(0.2)

In [9]:
options.data_folder = "wavs/reddots/"

In [10]:
from sv_system.model.AuxModels import LongCNN, SimpleCNN
import pickle
model = SimpleCNN(vars(options), 1300)
model.load("models/voxc/si_train/full_train/si_voxc_longcnn_3s_3s_full.pt")
model.cuda()
# lda = pickle.load(open("models/lda/si_reddots_0.2s_random_2_lda.pkl", "rb"))
lda = None

loaded from models/voxc/si_train/full_train/si_voxc_longcnn_3s_3s_full.pt


In [11]:
test_samples = []
for dirpath, dirnames, filenames in os.walk("../human_sv/wavs/reddots/"):
    filenames = [file for file in filenames if file.endswith(".wav")]
    for file in filenames:
        test_samples.append(os.path.abspath(os.path.join(dirpath,file)))
test_samples = sorted(test_samples)        

samples = [sample.split("/")[-2:] + [os.path.join(*sample.split("/")[-2:])] for sample in test_samples]
test_df = pd.DataFrame.from_records(samples, columns=['spk', 'file_n', 'file'])
usage = test_df.file_n.apply(lambda x: "enroll" if x.startswith("e") else "test")
test_df['usage'] = usage
dataset = SpeechDataset.read_df(vars(options), test_df, "test")

In [12]:
val_dataloader = init_default_loader(dataset, shuffle=False)

In [13]:
embeddings, _ = embeds(options, val_dataloader, model, lda)

In [14]:
import torch
spks = test_df.spk.unique().tolist()
scores = dict()
for spk in spks:
    print(spk)
    enroll_idxs = np.nonzero((test_df.spk == spk) & (test_df.usage == "enroll"))
    test_idxs = np.nonzero((test_df.spk == spk) & (test_df.usage == "test"))
    enroll_embeddings = torch.mean(embeddings[enroll_idxs], 0, True)
    test_embeddings = embeddings[test_idxs]
    scores[spk] = (F.cosine_similarity(enroll_embeddings, test_embeddings))

A_m0001_31
B_m0015_40
C_m0008_39


In [15]:
spk_t = spks[1]

In [16]:
labels = []
with open("wavs/reddots/{0}/test_labels.txt".format(spk_t), "r") as f:
    for line in f.readlines():
        labels.append(int(line.rstrip().split()[-1]))

In [17]:
for row in zip(scores[spk_t], labels):
    print(row)

(tensor(0.8863), 0)
(tensor(0.9298), 0)
(tensor(0.8992), 0)
(tensor(0.8696), 1)
(tensor(0.8928), 1)
(tensor(0.8778), 1)
(tensor(0.9181), 0)
(tensor(0.8330), 1)
(tensor(0.9142), 0)
(tensor(0.8707), 1)
