# Preliminaries

In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [86]:
config = {
    'outputs':['/gdrive/MyDrive/shared_space/commonvoice-farsi/sb-outputs-1.npy',
               '/gdrive/MyDrive/shared_space/commonvoice-farsi/sb-outputs-2.npy',
               '/gdrive/MyDrive/shared_space/commonvoice-farsi/sb-outputs-3.npy',
               '/gdrive/MyDrive/shared_space/commonvoice-farsi/sb-outputs-4.npy',
               '/gdrive/MyDrive/shared_space/commonvoice-farsi/sb-outputs-5.npy',
               '/gdrive/MyDrive/shared_space/commonvoice-farsi/sb-outputs-6.npy'],
    'positive_pairs':'/gdrive/MyDrive/shared_space/commonvoice-farsi/positive-pairs.pkl',
    'negative_pairs':'/gdrive/MyDrive/shared_space/commonvoice-farsi/negative-pairs.pkl',
    'path_to_part':'/gdrive/MyDrive/shared_space/commonvoice-farsi/path-to-part.pkl',
    'path_to_index':'/gdrive/MyDrive/shared_space/commonvoice-farsi/path-to-index.pkl',
    'models_path':'/gdrive/MyDrive/arman/verification-models/',
    'num_epochs':200,
    'learning_rate':1e-1,
    'batch_size':32,
    'train_percentage':90
}

In [24]:
import numpy as np
import pickle as pkl
import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import os
from sklearn.model_selection import train_test_split

# Loading the Data

In [12]:
outputs = [np.load(f'/gdrive/MyDrive/shared_space/commonvoice-farsi/sb-outputs-{i}.npy', allow_pickle=True) for i in range(1,7)]

In [5]:
outputs[0].shape

(50000, 192)

In [13]:
outputs[3].shape

(50000, 192)

In [14]:
with open(config['positive_pairs'], 'rb') as file:
  pos_pairs = pkl.load(file)
with open(config['negative_pairs'], 'rb') as file:
  neg_pairs = pkl.load(file)
with open(config['path_to_part'], 'rb') as file:
  path_to_part = pkl.load(file)
with open(config['path_to_index'], 'rb') as file:
  path_to_index = pkl.load(file)

In [10]:
pos_pairs[0]

['common_voice_fa_20871593.wav', 'common_voice_fa_20871629.wav']

In [15]:
def pair_to_array(pair):
  first_array = outputs[int(path_to_part[pair[0]])-1][path_to_index[pair[0]]]
  second_array = outputs[int(path_to_part[pair[1]])-1][path_to_index[pair[1]]]
  return first_array, second_array

In [8]:
pair_to_array(pos_pairs[0])[0].shape

(192,)

In [19]:
pos_arrays = np.array([pair_to_array(pair) for pair in pos_pairs])
neg_arrays = np.array([pair_to_array(pair) for pair in neg_pairs])

In [20]:
pos_arrays.shape

(10230, 2, 192)

In [21]:
neg_arrays.shape

(11101, 2, 192)

In [29]:
data = np.concatenate((pos_arrays, neg_arrays), axis=0)

In [32]:
data.shape

(21331, 2, 192)

In [30]:
labels = np.array([1 if i < pos_arrays.shape[0] else 0 for i in range(data.shape[0])])

In [31]:
labels.shape

(21331,)

# Splitting the data into training, testing and validation (development) parts

In [35]:
f_train, f_rem, l_train, l_rem = train_test_split(data, labels, test_size=1-config['train_percentage']/100, random_state=50)
f_test, f_dev, l_test, l_dev = train_test_split(f_rem, l_rem, test_size=0.5, random_state=50)

In [40]:
print(f'train features: {f_train.shape}, dev features: {f_dev.shape}, test features: {f_test.shape}')

train features: (19197, 2, 192), dev features: (1067, 2, 192), test features: (1067, 2, 192)


In [41]:
print(f'train labels: {len(l_train)}, dev labels: {len(l_dev)}, test labels: {len(l_test)}')

train labels: 19197, dev labels: 1067, test labels: 1067


# Converting Everything to Tensors

In [45]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [62]:
class MyDataset(Dataset):
    def __init__(self, features, labels, device):
        self.features = torch.from_numpy(features).to(device)
        self.labels = torch.from_numpy(np.array(labels).reshape(-1, 1)).to(device)
    def __len__(self):
        return self.features.shape[0]
    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

In [63]:
train_dataset = MyDataset(f_train, l_train, device)
test_dataset = MyDataset(f_test, l_test, device)
dev_dataset = MyDataset(f_dev, l_dev, device)

In [64]:
train_dataloader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=True)
dev_dataloader = DataLoader(dev_dataset, batch_size=config['batch_size'], shuffle=True)

In [68]:
train_dataset[0][0].shape

torch.Size([2, 192])

# Neural Network Architecture

In [165]:
class my_neural_net(torch.nn.Module):
    def __init__(self):
        super(my_neural_net, self).__init__() 
        self.flatten = torch.nn.Flatten()
        self.first_layer = torch.nn.Sequential( 
            nn.Linear(2*192, 192),
            nn.ReLU(),
            nn.Linear(192, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        x = self.flatten(x)
        output = self.first_layer(x)
        return output

In [166]:
my_nn = my_neural_net()
my_nn = my_nn.to(device)

In [168]:
train_dataset[:2][0].shape

torch.Size([2, 2, 192])

In [167]:
my_nn(train_dataset[:2][0]).shape

torch.Size([2, 1])

In [153]:
my_nn(train_dataset[:2][0]).shape

torch.Size([2, 1])

# Training

In [154]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(my_nn.parameters(), lr=config['learning_rate'])

In [155]:
def output_to_label(out):
    dist_to_0 = abs(out)
    dist_to_1 = abs(out-1)
    if dist_to_0 <= dist_to_1:
        return 0
    else:
        return 1

In [156]:
def train_loop(dataloader, model, loss_fn, optimizer, epoch_num):
    num_points = len(dataloader.dataset)
    for batch, (features, labels) in enumerate(dataloader):        
        # Compute prediction and loss
        labels = labels.to(torch.float32)
        pred = model(features).to(torch.float32)
        loss = loss_fn(pred, labels)
        
        # Backpropagation
        optimizer.zero_grad() # sets gradients of all model parameters to zero
        loss.backward() # calculate the gradients again
        optimizer.step() # w = w - learning_rate * grad(loss)_with_respect_to_w

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(features)
            print(f"\r Epoch {epoch_num} - loss: {loss:>7f}  [{current:>5d}/{num_points:>5d}]", end=" ")


def test_loop(dataloader, model, loss_fn, epoch_num, name):
    num_points = len(dataloader.dataset)
    sum_test_loss, correct = 0, 0

    with torch.no_grad():
        for batch, (features, labels) in enumerate(dataloader):
            labels = labels.to(torch.float32)
            pred = model(features).to(torch.float32)
            sum_test_loss += loss_fn(pred, labels).item() # add the current loss to the sum of the losses
            # convert the outputs of the model on the current batch to a numpy array
            pred_lst = list(pred.cpu().numpy().squeeze())
            pred_lst = [output_to_label(item) for item in pred_lst]
            # convert the original labels corresponding to the current batch to a numpy array
            output_lst = list(labels.cpu().numpy().squeeze()) 
            # determine the points for which the model is correctly predicting the label (add a 1 for each)
            match_lst = [1 if p==o else 0 for (p, o) in zip(pred_lst, output_lst)] 
            # count how many points are labeled correctly in this batch and add the number to the overall count of the correct labeled points
            correct += sum(match_lst) 
            
    sum_test_loss /= num_points
    correct /= num_points
    print(f"\r Epoch {epoch_num} - {name} Error: Accuracy: {(100*correct):>0.1f}%, Avg loss: {sum_test_loss:>8f}", end=" ")

In [None]:
for epoch_num in range(1, config['num_epochs']+1):
    train_loop(train_dataloader, my_nn, loss_fn, optimizer, epoch_num)
    test_loop(dev_dataloader, my_nn, loss_fn, epoch_num, 'Development/Validation')

In [None]:
test_loop(train_dataloader, my_nn, loss_fn, epoch_num, 'Training')

In [None]:
test_loop(test_dataloader, my_nn, loss_fn, epoch_num, 'Test')

In [None]:
my_nn

In [161]:
torch.save(my_nn.state_dict(), config['models_path']+"neural_net3.pth")