# Train Network from Scratch for Speaker Identification

##Download and Extract VoxCeleb1

In [0]:
! wget --user voxceleb1902 --password nx0bl2v2 http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaa
! wget --user voxceleb1902 --password nx0bl2v2 http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partab
! wget --user voxceleb1902 --password nx0bl2v2 http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partac
! wget --user voxceleb1902 --password nx0bl2v2 http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partad

! wget --user voxceleb1902 --password nx0bl2v2 http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_test_wav.zip

! wget http://www.robots.ox.ac.uk/~vgg/data/voxceleb/data/vox1_dev_txt.zip  
! wget http://www.robots.ox.ac.uk/~vgg/data/voxceleb/data/vox1_test_txt.zip

! wget http://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/iden_split.txt
! wget http://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test.txt
  
! cat vox1_dev* > vox1_dev_wav.zip
! rm vox1_dev_wav_partaa vox1_dev_wav_partab vox1_dev_wav_partac vox1_dev_wav_partad
! mkdir -p voxceleb1
! mv *.zip voxceleb1
! mv *.txt voxceleb1

--2019-07-21 15:01:01--  http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partaa
Resolving www.robots.ox.ac.uk (www.robots.ox.ac.uk)... 129.67.94.2
Connecting to www.robots.ox.ac.uk (www.robots.ox.ac.uk)|129.67.94.2|:80... connected.
HTTP request sent, awaiting response... 401 Unauthorized
Authentication selected: Basic realm="VoxCeleb1"
Reusing existing connection to www.robots.ox.ac.uk:80.
HTTP request sent, awaiting response... 200 OK
Length: 10737418240 (10G)
Saving to: ‘vox1_dev_wav_partaa’


2019-07-21 15:07:15 (27.4 MB/s) - ‘vox1_dev_wav_partaa’ saved [10737418240/10737418240]

--2019-07-21 15:07:17--  http://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/vox1_dev_wav_partab
Resolving www.robots.ox.ac.uk (www.robots.ox.ac.uk)... 129.67.94.2
Connecting to www.robots.ox.ac.uk (www.robots.ox.ac.uk)|129.67.94.2|:80... connected.
HTTP request sent, awaiting response... 401 Unauthorized
Authentication selected: Basic realm="VoxCeleb1"
Reusing existing connection to www.r

In [0]:
import os
import zipfile

DATA_PATH = 'voxceleb1/'

print('Starting to unpack vox1_dev_wav.zip')
zip = zipfile.ZipFile(os.path.join(DATA_PATH, 'vox1_dev_wav.zip'), 'r')
zip.extractall(DATA_PATH)
zip.close()
print('Done. Starting to unpack vox1_test_wav.zip')
zip = zipfile.ZipFile(os.path.join(DATA_PATH, 'vox1_test_wav.zip'), 'r')
zip.extractall(DATA_PATH)
zip.close()
print('Done. Starting to unpack vox1_dev_txt.zip')
zip = zipfile.ZipFile(os.path.join(DATA_PATH, 'vox1_dev_txt.zip'), 'r')
zip.extractall(DATA_PATH)
zip.close()
print('Done. Starting to unpack vox1_test_txt.zip')
zip = zipfile.ZipFile(os.path.join(DATA_PATH, 'vox1_test_txt.zip'), 'r')
zip.extractall(DATA_PATH)
zip.close()
print('Done.')

os.remove(os.path.join(DATA_PATH, 'vox1_dev_wav.zip'))
os.remove(os.path.join(DATA_PATH, 'vox1_test_wav.zip'))
os.remove(os.path.join(DATA_PATH, 'vox1_dev_txt.zip'))
os.remove(os.path.join(DATA_PATH, 'vox1_test_txt.zip'))


##with PyTorch

In [0]:
import os

import numpy as np
import pandas as pd
from scipy.io import wavfile
from scipy import signal

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset
from torchvision.transforms import Compose

! pip install tensorboardX
import tensorboardX
from tqdm import tqdm

import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore') # scipy throws future warnings on fft (known bug)



###Read wav files and calculate spectrogram

In [0]:
class IdentificationDataset(Dataset):
    
    def __init__(self, path, train, transform=None):
        iden_split_path = os.path.join(path, 'iden_split.txt')
        split = pd.read_table(iden_split_path, sep=' ', header=None, names=['phase', 'path'])
        
        if train:
            phases = [1, 2]
        
        else:
            phases = [3]
            
        mask = split['phase'].isin(phases)
        self.dataset = split['path'][mask].reset_index(drop=True)
        self.path = path
        self.train = train
        self.transform = transform
    
    def __len__(self):
        return len(self.dataset)
    
    def __getitem__(self, idx):
        # path
        track_path = self.dataset[idx]
        audio_path = os.path.join(self.path, 'wav', track_path)

        # read .wav
        rate, samples = wavfile.read(audio_path)
        # extract label from path like id10003/L9_sh8msGV59/00001.txt
        # subtracting 1 because PyTorch assumes that C_i in [0, 1251-1]
        label = int(track_path.split('/')[0].replace('id1', '')) - 1

        ## parameters
        window = 'hamming'
        # window width and step size
        Tw = 25 # ms
        Ts = 10 # ms
        # frame duration (samples)
        Nw = int(rate * Tw * 1e-3)
        Ns = int(rate * (Tw - Ts) * 1e-3)
        # overlapped duration (samples)
        # 2 ** to the next pow of 2 of (Nw - 1)
        nfft = 2 ** (Nw - 1).bit_length()
        pre_emphasis = 0.97
        
        # preemphasis filter
        samples = np.append(samples[0], samples[1:] - pre_emphasis * samples[:-1])
        
        # removes DC component of the signal and add a small dither
        samples = signal.lfilter([1, -1], [1, -0.99], samples)
        dither = np.random.uniform(-1, 1, samples.shape)
        spow = np.std(samples)
        samples = samples + 1e-6 * spow * dither
        
        if self.train:
            # segment selection
            segment_len = 3 # sec
            upper_bound = len(samples) - segment_len * rate
            start = np.random.randint(0, upper_bound)
            end = start + segment_len * rate
            samples = samples[start:end]
        
        # spectogram
        _, _, spec = signal.spectrogram(samples, rate, window, Nw, Ns, nfft, 
                                        mode='magnitude', return_onesided=False)
        
        # just multiplying it by 1600 makes spectrograms in the paper and here "the same"
        spec *= rate / 10
        
        if self.transform:
            spec = self.transform(spec)
            
         
        
        
        
        _, _, spec_phase = signal.spectrogram(samples, rate, window, Nw, Ns, nfft, 
                                                mode='phase', return_onesided=False)                
        spec_phase[1:,:] = np.diff(spec_phase, axis=0)
        spec_phase = spec_phase.reshape(1, spec_phase.shape[0], spec_phase.shape[1])
        spec_phase = spec_phase.astype(np.float32)
        spec_phase = torch.from_numpy(spec_phase)
        spec_ = np.concatenate((spec, spec_phase), axis=0)    
        
        

        return label, spec_

###mean and variance normalization

In [0]:
class Normalize(object):
    """Normalizes voice spectrogram (mean-varience)"""
    
    def __call__(self, spec):
        
        # (Freq, Time)
        # mean-variance normalization for every spectrogram (not batch-wise)
        mu = spec.mean(axis=1).reshape(512, 1)
        sigma = spec.std(axis=1).reshape(512, 1)
        spec = (spec - mu) / sigma

        return spec

class ToTensor(object):
    """Convert spectogram to Tensor."""
    
    def __call__(self, spec):
        F, T = spec.shape
        
        # now specs are of size (Freq, Time) and 2D but has to be 3D (channel dim)
        spec = spec.reshape(1, F, T)
        
        # make the ndarray to be of a proper type (was float64)
        spec = spec.astype(np.float32)
        
        return torch.from_numpy(spec)

###create model class

In [0]:
class VoiceNet(nn.Module):

    def __init__(self, num_classes=2):
        super(VoiceNet, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=2, out_channels=96, kernel_size=7, stride=2, padding=1)
        self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=2, padding=1)
        self.conv3 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1)
        
        self.bn1 = nn.BatchNorm2d(num_features=96)
        self.bn2 = nn.BatchNorm2d(num_features=256)
        self.bn3 = nn.BatchNorm2d(num_features=256)
        self.bn4 = nn.BatchNorm2d(num_features=256)
        self.bn5 = nn.BatchNorm2d(num_features=256)
        self.bn6 = nn.BatchNorm2d(num_features=4096)
        self.bn7 = nn.BatchNorm1d(num_features=1024)
        
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()
        
        self.mpool1 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.mpool2 = nn.MaxPool2d(kernel_size=3, stride=2)
        self.mpool5 = nn.MaxPool2d(kernel_size=(5, 3), stride=(3, 2))
        
        # Conv2d with weights of size (H, 1) is identical to FC with H weights
        self.fc6 = nn.Conv2d(in_channels=256, out_channels=4096, kernel_size=(9, 1))
        self.fc7 = nn.Linear(in_features=4096, out_features=1024)
        self.fc8 = nn.Linear(in_features=1024, out_features=num_classes)
        
    def forward(self, x):
        B, C, H, W = x.size()
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.mpool1(x)
        x = self.relu(self.bn2(self.conv2(x)))
        x = self.mpool2(x) 
        x = self.relu(self.bn3(self.conv3(x)))
        x = self.relu(self.bn4(self.conv4(x)))
        x = self.relu(self.bn5(self.conv5(x)))
        x = self.mpool5(x)
        x = self.relu(self.bn6(self.fc6(x)))
        
        _, _, _, W = x.size()
        self.apool6 = nn.AvgPool2d(kernel_size=(1, W))
        x = self.apool6(x)
        
        x = x.view(x.size(0), -1)
        x = self.relu(self.bn7(self.fc7(x)))
        x = self.fc8(x)
        
        # during training, there's no need for SoftMax because CELoss calculates it
        if self.training:
            return x
        
        else:
            return self.softmax(x)

###set hyper-parameters

In [0]:
LOG_PATH = 'logs/VoxCeleb/rm_dc_n_dither'
! mkdir -p logs/VoxCeleb/rm_dc_n_dither
EPOCH_NUM = 30

# in shared code B = 100 but PyTorch throws CUDA out of memory at B = 97 
# though B=96 takes only 90.6% of the GPU Mem (bug?):
# https://discuss.pytorch.org/t/lesser-memory-consumption-with-a-larger-batch-in-multi-gpu-setup/29087
# B = 96
# but when 
torch.backends.cudnn.deterministic = True
# I can set B = 100
B = 100

WEIGHT_DECAY = 5e-4
LR_INIT = 1e-2
LR_LAST = 1e-4
# lr scheduler parameter
gamma = 10 ** (np.log10(LR_LAST / LR_INIT) / (EPOCH_NUM - 1))
MOMENTUM = 0.9
DEVICE = 'cuda:0'
NUM_WORKERS = 4
TBoard = tensorboardX.SummaryWriter(log_dir=LOG_PATH)

###create model and data generator

In [0]:
net = VoiceNet(num_classes=1251)
net.to(DEVICE)

transforms = Compose([
    Normalize(),
    ToTensor()
])

trainset = IdentificationDataset(DATASET_PATH, train=True, transform=transforms)
trainsetloader = torch.utils.data.DataLoader(trainset, batch_size=B, num_workers=NUM_WORKERS, shuffle=True)

testset = IdentificationDataset(DATASET_PATH, train=False, transform=transforms)
testsetloader = torch.utils.data.DataLoader(testset, batch_size=1, num_workers=NUM_WORKERS*2)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), LR_INIT, MOMENTUM, weight_decay=WEIGHT_DECAY)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=gamma)

###model info

In [0]:
from torchsummary import summary
summary(net, input_size=(2, 512, 300))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 96, 254, 148]           9,504
       BatchNorm2d-2         [-1, 96, 254, 148]             192
              ReLU-3         [-1, 96, 254, 148]               0
         MaxPool2d-4          [-1, 96, 126, 73]               0
            Conv2d-5          [-1, 256, 62, 36]         614,656
       BatchNorm2d-6          [-1, 256, 62, 36]             512
              ReLU-7          [-1, 256, 62, 36]               0
         MaxPool2d-8          [-1, 256, 30, 17]               0
            Conv2d-9          [-1, 256, 30, 17]         590,080
      BatchNorm2d-10          [-1, 256, 30, 17]             512
             ReLU-11          [-1, 256, 30, 17]               0
           Conv2d-12          [-1, 256, 30, 17]         590,080
      BatchNorm2d-13          [-1, 256, 30, 17]             512
             ReLU-14          [-1, 256,

###Training and calculate accuracy on test data

In [0]:
for epoch_num in range(EPOCH_NUM):
    lr_scheduler.step()
    
    # train
    net.train()
    
    for iter_num, (labels, specs) in tqdm(enumerate(trainsetloader)):
        optimizer.zero_grad()
        labels, specs = labels.to(DEVICE), specs.to(DEVICE)
        scores = net(specs)
        loss = criterion(scores, labels)
        loss.backward()
        optimizer.step()
        
        # TBoard
        step_num = epoch_num * len(trainsetloader) + iter_num
        TBoard.add_scalar('Metrics/train_loss', loss.item(), step_num)
        TBoard.add_scalar('Metrics/lr', lr_scheduler.get_lr()[0], step_num)
        
#         TBoard.add_scalar('weights/conv1', net.conv1.weight.mean(), step_num)
#         TBoard.add_scalar('weights/conv5', net.conv5.weight.mean(), step_num)
#         TBoard.add_scalar('weights/fc6', net.fc6.weight.mean(), step_num)
#         TBoard.add_scalar('weights/fc7', net.fc7.weight.mean(), step_num)
#         TBoard.add_scalar('weights/fc8', net.fc8.weight.mean(), step_num)
#         TBoard.add_scalar('grads/conv1', net.conv1.weight.grad.mean(), step_num)
#         TBoard.add_scalar('grads/conv5', net.conv5.weight.grad.mean(), step_num)
#         TBoard.add_scalar('grads/fc6', net.fc6.weight.grad.mean(), step_num)
#         TBoard.add_scalar('grads/fc7', net.fc7.weight.grad.mean(), step_num)
#         TBoard.add_scalar('grads/fc8', net.fc8.weight.grad.mean(), step_num)

    
    # test
    net.eval()
    
    top5_accuracy = 0
    top1_accuracy = 0

    for _, (label, spec) in tqdm(enumerate(testsetloader)):
        label, spec = label.to(DEVICE), spec.to(DEVICE)
        probs = net(spec)

        # calculate Top-5 and Top-1 accuracy
        pred_top5 = probs.topk(5)[1].view(5)

        if label in pred_top5:
            # increment top-5 accuracy
            top5_accuracy += 1

            if label == pred_top5[0]:
                # increment top-1 accuracy
                top1_accuracy += 1

    top5_accuracy /= len(testsetloader)
    top1_accuracy /= len(testsetloader)

    TBoard.add_scalar('Metrics/test_top5', top5_accuracy, epoch_num)
    TBoard.add_scalar('Metrics/test_top1', top1_accuracy, epoch_num)
    
    print('\ntest_top5 =', round(100 * top5_accuracy, 2), '%')
    print('test_top1 =', round(100 * top1_accuracy, 2), '%')
        
# when the training is finished save the model
torch.save(net.state_dict(), os.path.join(LOG_PATH, 'model_snapshot.txt'))
TBoard.close()
print('top 1 accuracy @ the end: {}'.format(round(top1_accuracy, 3)))
print('top 5 accuracy @ the end: {}'.format(round(top5_accuracy, 3)))
print('loss @ the end: {}'.format(round(loss.item(), 3)))

1453it [1:13:42,  1.81s/it]
8251it [10:21, 13.27it/s]


test_top5 = 3.56 %
test_top1 = 3.56 %



526it [26:55,  3.18s/it]

# Pre-Trained Model as Feature Extractor

##with Keras

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
# ! wget https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-3/fa.tar.gz
# ! cp fa.tar.gz drive/My\ Drive/datasets/fa.tar.gz

In [5]:
! cp drive/My\ Drive/datasets/fa.tar.gz fa.tar.gz
! mkdir common_voice
! tar -C common_voice -xf fa.tar.gz

mkdir: cannot create directory ‘common_voice’: File exists


In [20]:
! git clone https://github.com/mohsenoon/iust-dl97-project.git
! apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0
! pip install pyaudio

Cloning into 'iust-dl97-project'...
remote: Enumerating objects: 39, done.[K
remote: Counting objects: 100% (39/39), done.[K
remote: Compressing objects: 100% (30/30), done.[K
remote: Total 39 (delta 4), reused 30 (delta 3), pack-reused 0[K
Unpacking objects: 100% (39/39), done.
Reading package lists... Done
Building dependency tree       
Reading state information... Done
libportaudio2 is already the newest version (19.6.0-1).
libportaudiocpp0 is already the newest version (19.6.0-1).
portaudio19-dev is already the newest version (19.6.0-1).
libasound2-dev is already the newest version (1.1.3-5ubuntu0.2).
The following package was automatically installed and is no longer required:
  libnvidia-common-410
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 4 not upgraded.


###for 1194 speakers

In [0]:
import os
import collections

with open('common_voice/validated.tsv', 'r') as val:
  lines = val.readlines()
  
clients_id = []
files_name = []
mp3_name = []
for x in lines[1:]:
  clients_id.append(x.split()[0])
  files_name.append(x.split()[1].replace('mp3','wav'))
  mp3_name.append(x.split()[1])
  
import collections
sample_per_speaker = 2
spk_id = [item for item, count in collections.Counter(clients_id).items() if count >= sample_per_speaker]


spk_index = []
file_index = []
mp3_index = []
for i, sid in enumerate(spk_id):
  idx = clients_id.index(sid)
  [spk_index.append(i) for f in clients_id[idx : idx+sps]]
  [file_index.append(os.path.join(DATA_PATH, f)) for f in files_name[idx : idx+sps]]
  [mp3_index.append(os.path.join('common_voice/clips', f)) for f in mp3_name[idx : idx+sps]]

In [0]:
import os
# mp3 to wav
if not os.path.isdir('common_voice/wav'):
  os.mkdir('common_voice/wav')
for i, wav in enumerate(file_index):
  os.system('ffmpeg -i {} -ar 16000 {}'.format(mp3_index[i], wav))

In [0]:
import csv

with open('iust-dl97-project/cfg/enroll_list.csv', mode='w') as csv_file:
    fieldnames = ['filename', 'speaker']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    for i in range(0, len(file_index), 2):
      writer.writerow({'filename': '../'+file_index[i], 'speaker': spk_index[i]})

with open('iust-dl97-project/cfg/test_list.csv', mode='w') as csv_file:
    fieldnames = ['filename', 'speaker']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    for i in range(1, len(file_index), 2):
      writer.writerow({'filename': '../'+file_index[i], 'speaker': spk_index[i]})


In [48]:
% cd iust-dl97-project
! python scoring.py
% cd ..

/content/iust-dl97-project
Using TensorFlow backend.
Loading model weights from [model/weights.h5]....
W0721 14:41:17.353217 140413200598912 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0721 14:41:17.370271 140413200598912 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0721 14:41:17.397982 140413200598912 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:245: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0721 14:41:17.398170 140413200598912 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_sessi

####Accuracy Calculation

In [49]:
import pandas as pd

df = pd.read_csv('iust-dl97-project/res/results.csv')

accuracy = sum(df['correct']) / len(df['correct'])

print('Accuracy =', accuracy * 100, '%')

Accuracy = 30.569514237855945 %


###for 196 speakers

In [30]:
import os
import collections

with open('common_voice/validated.tsv', 'r') as val:
  lines = val.readlines()
  
clients_id = []
files_name = []
mp3_name = []
for x in lines[1:]:
  clients_id.append(x.split()[0])
  files_name.append(x.split()[1].replace('mp3','wav'))
  mp3_name.append(x.split()[1])
  
import collections
sample_per_speaker = 40
spk_id = [item for item, count in collections.Counter(clients_id).items() if count >= sample_per_speaker]

print('number of speakers =', len(spk_id))

spk_index = []
file_index = []
mp3_index = []
for i, sid in enumerate(spk_id):
  idx = clients_id.index(sid)
  [spk_index.append(i) for f in clients_id[idx : idx+sps]]
  [file_index.append(os.path.join(DATA_PATH, f)) for f in files_name[idx : idx+sps]]
  [mp3_index.append(os.path.join('common_voice/clips', f)) for f in mp3_name[idx : idx+sps]]

number of speakers = 196


In [0]:
import csv

with open('iust-dl97-project/cfg/enroll_list.csv', mode='w') as csv_file:
    fieldnames = ['filename', 'speaker']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    for i in range(0, len(file_index), 2):
      writer.writerow({'filename': '../'+file_index[i], 'speaker': spk_index[i]})

with open('iust-dl97-project/cfg/test_list.csv', mode='w') as csv_file:
    fieldnames = ['filename', 'speaker']
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
    writer.writeheader()
    for i in range(1, len(file_index), 2):
      writer.writerow({'filename': '../'+file_index[i], 'speaker': spk_index[i]})

In [33]:
% cd iust-dl97-project
! python scoring.py
% cd ..

/content/iust-dl97-project
Using TensorFlow backend.
Loading model weights from [model/weights.h5]....
W0721 14:33:04.827537 140481328191360 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0721 14:33:04.844577 140481328191360 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0721 14:33:04.872748 140481328191360 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:245: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0721 14:33:04.872946 140481328191360 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_sessi

####Accuracy Calculation

In [45]:
import pandas as pd

df = pd.read_csv('iust-dl97-project/res/results.csv')

accuracy = sum(df['correct']) / len(df['correct'])

print('Accuracy =', accuracy * 100, '%')

Accuracy = 50.0 %
