# SV Live Demo

In [10]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [22]:
import pandas as pd
import numpy as np
import os
import itertools
import torch

In [12]:
from demo_utils import *
from demo_sv_system import demo_sv_system

In [13]:
from IPython.display import display
from IPython.display import Audio

### Model load

In [14]:
model = load_model("models/tdnn_model.pth.tar").cuda()

In [15]:
cr_model = load_cr_model("models/cr_model_best.pth.tar").cuda()

loaded from models/cr_model_best.pth.tar


### Parse dataset 

데이터셋에 존재하는 사람들.

['inpyo', 'Barend2', 'Simeon', 'Hyemi', 'minsu', 'ingyu']

"데이터셋에 존재하는 명령어"

['stop', 'go', 'on', 'right', 'left', 'no', 'off', 'down', 'up', 'yes']

In [16]:
df = load_dataset()

## sv_system

초기등록할 사람, 명령어, 명령어당 개수 선택

In [17]:
init_spk = ['inpyo', 'minsu']
init_cmd = ['stop', 'go', 'down']
n_per_sent = 2
enroll_df, test_df = split_dataset(df, init_spk, init_cmd, n_per_sent)

등록된 음성

In [18]:
for idx, row in enroll_df.iterrows():
    print("speaker:{}, command:{}".format(row.spk, row.sent))
    display(Audio(row.wav))

speaker:inpyo, command:down


speaker:inpyo, command:down


speaker:inpyo, command:go


speaker:inpyo, command:go


speaker:inpyo, command:stop


speaker:inpyo, command:stop


speaker:minsu, command:down


speaker:minsu, command:down


speaker:minsu, command:go


speaker:minsu, command:go


speaker:minsu, command:stop


speaker:minsu, command:stop


sv_system 초기화

In [19]:
test_sv_system = load_sv_system(model)

# enrollment
test_sv_system.init_enrollemnt()
wav_dict = {}
for idx, row in enroll_df.iterrows():
    if row.spk not in wav_dict:
        wav_dict[row.spk] = []
    wav_dict[row.spk].append(row.wav)
for spk, wavs in wav_dict.items():
    test_sv_system.batch_enroll(wavs, spk)

cr_system 초기화

In [20]:
from demo_cr_system import cr_system

test_cr_system = cr_system(cr_model)

전체 시스템 수행

In [23]:
from termcolor import colored

print("enrolled speakers: {}".format(init_spk))
print("verification thres:{}\nenrollment thres:{}".format(0.705, 0.827))
print("")
print("No.\tIn\tscore\tresult\t\tcr_result")
print("="*50)

records = []
for i in range(20):
    test_sample = test_df.iloc[np.random.randint(0, len(test_df))]
    # SV
    result, pred_spk, score = test_sv_system.verify(test_sample.wav)
    record = [test_sample.spk, test_sample.wav]
    if (test_sample.spk in init_spk) and (result!='Reject') \
        or (test_sample.spk not in init_spk) and (result=='Reject'):
        result = colored(result, 'green')
        pred_spk = ""
    else:
        result = colored(result, 'red')
    record += [result]
    records += [record]        
    # CR
    cr_pred = test_cr_system.recog(test_sample.wav)   
    cr_result = colored(cr_pred, 'red')
    if test_sample.wav.split('/')[-2] == cr_pred:
        cr_result = colored(cr_pred, 'green')
        
    print("{}\t{}\t{:.3f}\t{}({})\t{}".format(i, test_sample.spk,score,
                                         result, pred_spk, cr_result))
    print("-"*50)

enrolled speakers: ['inpyo', 'minsu']
verification thres:0.705
enrollment thres:0.827

No.	In	score	result		cr_result
0	Hyemi	0.423	[32mReject[0m()	[32moff[0m
--------------------------------------------------
1	Hyemi	0.502	[32mReject[0m()	[32mgo[0m
--------------------------------------------------
2	Simeon	0.593	[32mReject[0m()	[32myes[0m
--------------------------------------------------
3	Simeon	0.645	[32mReject[0m()	[32mleft[0m
--------------------------------------------------
4	Hyemi	0.444	[32mReject[0m()	[32myes[0m
--------------------------------------------------
5	ingyu	0.658	[32mReject[0m()	[32mdown[0m
--------------------------------------------------
6	Hyemi	0.508	[32mReject[0m()	[32mright[0m
--------------------------------------------------
7	Simeon	0.609	[32mReject[0m()	[32moff[0m
--------------------------------------------------
8	ingyu	0.736	[31mAccept[0m(minsu)	[32mgo[0m
--------------------------------------------------
9	inpyo	

In [24]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

def display_audio(No):
    print()
    print("{} ({})".format(records[No][0], records[No][2]))
    display(Audio(records[No][1]))
    
interact(display_audio, No=widgets.IntSlider(min=0,max=len(records)-1,step=1, value=0));


Hyemi ([32mReject[0m)


동작 후 등록된 음성

In [25]:
n_init_enroll = len(init_cmd)*n_per_sent
for spk, wavs in test_sv_system.enrolled_wavs.items():
    print("speaker:{}".format(spk))
    for wav in wavs[n_init_enroll:]:
        display(Audio(wav))

speaker:inpyo
speaker:minsu
