In [1]:
# %load_ext autoreload
%autoreload 2
import torch
import numpy as np
import pandas as pd

#### Checks the KNN's rankings of datapoints is correct. i.e. anchor-i dist < anchor-j dist for i < j

In [2]:
def dist(x, y):
    return (x-y).pow(2).sum()

In [3]:
from datasets import TripletAudio
train_data = torch.from_numpy(np.loadtxt('data/trainData.txt', dtype=np.float32))
train_KNN = pd.read_csv('data/trainKNN.csv', index_col=0)
# check num elems are same in DF and dataset
assert(train_data.shape[0] == train_KNN.shape[0])
# #randomly pick 10 rows to check
selected_indicies = np.random.choice(list(range(0,train_KNN.shape[0])), size=10, replace=False)
# ensure that the ordering of neighbours is correct for that row
for index in selected_indicies:
    row = train_KNN.iloc[index]
    anchor = train_data[index]
    for prev, cur in zip(row, row[1:]):
        prev_dist = dist(anchor, train_data[prev])
        cur_dist = dist(anchor, train_data[cur])
        assert(prev_dist < cur_dist)
print("All Good!")

All Good!


#### Generates triplets and ensures that the anchor-pos distance < anchor-neg distance

In [4]:
from datasets import TripletAudio
dataset = TripletAudio(False, 5, 5, 5)
for i, triplet in enumerate(dataset):
    anchor, pos, neg = [x.reshape(-1,1) for x in triplet]
    assert( ((anchor-pos)**2).sum() - ((anchor-neg)**2).sum() < 0 )
print("All Good!")

All Good!


#### Runs two checks for the embedding process:

In [268]:
from networks import AnchorNet
#test batch size 1
train_data = torch.tensor([[[2.],[3.]]])
anchor = AnchorNet([], 2, 1, True)
expected_output = torch.tensor([[5**(1/2) - 1]])
output = anchor.forward(train_data)
assert(torch.equal(output, expected_output))
# test batch size 2
train_data = torch.tensor([[[12.],[8.]],[[6.],[9.]]])
anchor = AnchorNet(train_data, 2, 1, True)
expected_output = torch.tensor([[170**(1/2) - 1], [89**(1/2) - 1]])
output = anchor.forward(train_data)
assert(torch.equal(output, expected_output))
print("All Good!")

All Good!


#### Checks similarity generator 

In [309]:
from utils import similarity_calculator 
similarity = similarity_calculator(train_KNN, 5)

In [328]:
selected_indicies = np.random.choice(list(range(0,train_KNN.shape[0])), size=100, replace=False)
for index in selected_indicies:
    similarity_indicies = np.where(similarity[index])[0] #indicies from similarity matrix
    expected_indicies = np.append(train_KNN.iloc[index][:5], index) #get true KNN's and add index itself
    np.testing.assert_array_equal(np.sort(similarity_indicies), np.sort(expected_indicies))
print("All Good!")

All Good!


#### Checks the negative triplet selector

In [590]:
#mock df to use as the knn dataset
mock_df = pd.DataFrame(columns=list(range(0,5)))
mock_df.loc[0] = [1,2,5,6,7] #reciprocal neighbours with 1,2. One way with 3
mock_df.loc[1] = [0,6,7,8,9] #reciprocal neighbours with 0
mock_df.loc[2] = [0,10,11,12,13] #reciprocal neighbours with 0
mock_df.loc[3] = [0,15,16,17,18] #one way with 0
mock_df.loc[4] = [18,19,20,21,22] #no NN
# need to mock remaining for similaritycalc
for i in range(5,23):
    mock_df.loc[i] = [18,19,20,21,22]

In [618]:
from utils import SemihardNegativeTripletSelector
triplet_selector = SemihardNegativeTripletSelector(2.5, mock_df)
indicies = np.array([0,1,2,3,4])
embeddings = torch.tensor([[5.,5.],[5.,5.], [5.,3.],[7.,6.], [4.,2.5]])

generating similarity matrix
done


In [619]:
output = triplet_selector.get_triplets(embeddings, indicies)
expected_output = np.array([[0,2,3]])
np.testing.assert_array_equal(output, expected_output)

pair [0 1] neg indicies [3, 4]
loss values tensor([-2.5000, -4.7500])
neg selected None
--------
pair [0 2] neg indicies [3, 4]
loss values tensor([ 1.5000, -0.7500])
neg selected 0
--------
pair [3 0] neg indicies [4]
loss values tensor([-13.7500])
neg selected None
--------


In [620]:
output = triplet_selector.get_triplets(embeddings, indicies)
expected_output = np.array([[0,2,3]])
np.testing.assert_array_equal(output, expected_output)

pair [0 1] neg indicies [3, 4]
loss values tensor([-2.5000, -4.7500])
neg selected None
--------
pair [0 2] neg indicies [3, 4]
loss values tensor([ 1.5000, -0.7500])
neg selected 0
--------
pair [3 0] neg indicies [4]
loss values tensor([-13.7500])
neg selected None
--------
