# Human performance at one-shot speaker identification
This notebook let's you perform the same kind of one-shot speaker identification task that this repository trains a machine learning model to perform. It uses the same 3 second clips of audiobook segment that the model receives.

In [1]:
import soundfile as sf
import numpy as np
import time
from IPython.display import Audio, clear_output, display

In [2]:
import sys
sys.path.append('../')
from config import PATH, LIBRISPEECH_SAMPLING_RATE
from data import LibriSpeechDataset

Using TensorFlow backend.


### Parameters

In [3]:
validation_set = ['train-clean-100','train-clean-360','dev-clean']
n_seconds = 3
n_shot_classification = 1
k_way_classification = 5

num_tasks = 10

# Get data

In [4]:
valid_sequence = LibriSpeechDataset(validation_set, n_seconds, stochastic=False)

Initialising LibriSpeechDataset with minimum length = 3s and subsets = ['train-clean-100', 'train-clean-360', 'dev-clean']
Finished indexing data. 131955 usable files found.


# Evaluation loop

In [5]:
name = raw_input('Enter your name: ')

correct = []
answers = []
for i in range(num_tasks):
    print '******* Trial {} of {} ******'.format(i+1, num_tasks)
    query_sample, support_set_samples = valid_sequence.build_n_shot_task(
            k_way_classification, n_shot_classification)
    
    query_audio = Audio(data=query_sample[0], rate=LIBRISPEECH_SAMPLING_RATE)
    
    print 'Match this sample:'
    display(query_audio)
    
    support_set_audio = [
        (i+1, Audio(data=support_set_samples[0][i, :], rate=LIBRISPEECH_SAMPLING_RATE)) for i in range(k_way_classification)]
    support_set_names = [
        valid_sequence.df[valid_sequence.df['speaker_id']==i]['name'].values[0] for i in support_set_samples[1]]
    
    # Index, name, audio
    support_set = zip(zip(*support_set_audio)[0], support_set_names, zip(*support_set_audio)[1])
    
    # Shuffle and record correct answer
    np.random.shuffle(support_set)
    correct.append(zip(*support_set)[0].index(1) + 1)
    support_set_audio = zip(*support_set)[2]
    support_set_names = zip(*support_set)[1]
    
    print 'To one of these 5 speakers:'
    for i, audio in enumerate(support_set_audio):
        print '{}: {}'.format(i+1, support_set_names[i])
        display(audio)
        
    
    time.sleep(0.01)
    while True:
        answer = raw_input('Enter correct speaker number: ')
        
        if answer in ('1','2','3','4','5'):
            break
        else:
            print 'Typo!'
    
    answers.append(int(answer))
    
    print 'The correct answer was {}'.format(correct[-1])
    
    _ = raw_input('Press any key to continue...')
    
    clear_output()

Enter your name: Oscar
******* Trial 1 of 10 ******
Match this sample:


To one of these 5 speakers:
1: tazzle


2: Ian Grae


3: Katie Gibboney


4: Kim S


5: Astrid Fingerhut


Enter correct speaker number: 5
The correct answer was 5


KeyboardInterrupt: 

In [6]:
num_correct = sum(a == c for a, c in zip(answers, correct))
with open(PATH + '/data/human_evaluation.csv', 'a') as f:
    print >>f, '{},{},{}'.format(name, num_correct, num_tasks)
print 'You got {} out {} correct!'.format(num_correct, num_tasks)

You got 7 out 10 correct!
