In [1]:
import trax
from trax import layers as tl
import trax.fastmath.numpy as np
import numpy

# Setting random seeds
trax.supervised.trainer_lib.init_random_number_generators(10)
numpy.random.seed(10)



In [2]:
def normalize(x):
    return x / np.sqrt(np.sum(x * x, axis=-1, keepdims=True))

# Denominator can be replaced by np.linalg.norm(x, axis=-1, keepdims=True) for the same result

In [3]:
tensor = numpy.random.random((2,5))
print(f'The tensor is of type: {type(tensor)}\n and looks like this: {tensor}')

The tensor is of type: <class 'numpy.ndarray'>
 and looks like this: [[0.77132064 0.02075195 0.63364823 0.74880388 0.49850701]
 [0.22479665 0.19806286 0.76053071 0.16911084 0.08833981]]


In [10]:
# Siamese model layers: Serial combination of
# Embedding
# LSTMCell
# Mean
# Fn
# Parallel

vocab_size = 500
model_dimension = 128

LSTM = tl.Serial(
    tl.Embedding(vocab_size=vocab_size, d_feature=model_dimension),
    tl.LSTM(model_dimension),
    tl.Mean(axis=1),
    tl.Fn('Noarmlize', lambda x: normalize(x))
)

Siamese = tl.Parallel(LSTM, LSTM)

In [11]:
print(Siamese.sublayers)

[Serial[
  Embedding_500_128
  LSTM_128
  Mean
  Noarmlize
], Serial[
  Embedding_500_128
  LSTM_128
  Mean
  Noarmlize
]]


# Modified Triplet Loss

In [18]:
# In this notebook  you'll see how to calculate the full triplet loss, step by step
import numpy as np

# Similarity scores
v1 = np.array([1, 2, 3], dtype=float)
v2 = np.array([-1, -1, 3.5])

def cosine_similarity(v1, v2):
    numerator = np.dot(v1, v2)
    denominator = np.sqrt(np.dot(v1, v1)) * np.sqrt(np.dot(v2, v2))
    return numerator / denominator

print(cosine_similarity(v1, v2))

0.5309942440535915


In [20]:
# 2 bateches of vectors
# Input data
print("-- Inputs --")
v1_1 = np.array([1, 2, 3])
v1_2 = np.array([9, 8, 7])
v1_3 = np.array([-1, -4, -2])
v1_4 = np.array([1, -7, 2])
v1 = np.vstack([v1_1, v1_2, v1_3, v1_4])
print("v1 :")
print(v1, "\n")
v2_1 = v1_1 + np.random.normal(0, 2, 3)  # add some noise to create approximate duplicate
v2_2 = v1_2 + np.random.normal(0, 2, 3)
v2_3 = v1_3 + np.random.normal(0, 2, 3)
v2_4 = v1_4 + np.random.normal(0, 2, 3)
v2 = np.vstack([v2_1, v2_2, v2_3, v2_4])
print("v2 :")
print(v2, "\n")

b = len(v1)
print(b==len(v2))

print('outputs')
sim_1 = np.zeros([b,b])
for row in range(0, sim_1.shape[0]):
    for col in range(0, sim_1.shape[1]):
        sim_1[row, col] = cosine_similarity(v1[row], v2[col])

print('option 1: loop')
print(sim_1)

def norm(x):
    return x / np.sqrt(np.sum(x * x, axis=1, keepdims=True))

sim_2 = np.dot(norm(v1), norm(v2).T)

print(sim_2)
print(np.allclose(sim_1, sim_2))

-- Inputs --
v1 :
[[ 1  2  3]
 [ 9  8  7]
 [-1 -4 -2]
 [ 1 -7  2]] 

v2 :
[[ 3.969074   -0.15960977 -0.95545656]
 [ 5.51325541  8.53214033 11.76993466]
 [ 1.24738251 -0.65475557 -1.80170157]
 [ 3.79599275 -7.54249598  3.22640837]] 

True
outputs
option 1: loop
[[ 0.0512523   0.9950823  -0.63888057 -0.04759565]
 [ 0.48776666  0.9247955  -0.20792352 -0.02852344]
 [-0.07582989 -0.88679041  0.47468256  0.48091866]
 [ 0.10576754 -0.26846188  0.13252315  0.94913706]]
[[ 0.0512523   0.9950823  -0.63888057 -0.04759565]
 [ 0.48776666  0.9247955  -0.20792352 -0.02852344]
 [-0.07582989 -0.88679041  0.47468256  0.48091866]
 [ 0.10576754 -0.26846188  0.13252315  0.94913706]]
True


In [28]:
# Hardcoded matrix of similarity scores
sim_hardcoded = np.array(
    [
        [0.9, -0.8, 0.3, -0.5],
        [-0.4, 0.5, 0.1, -0.1],
        [0.3, 0.1, -0.4, -0.8],
        [-0.5, -0.2, -0.7, 0.5],
    ]
)

sim = sim_hardcoded
b=sim.shape[0]

print('sim', sim.shape)
print(sim)

# Positives
sim_ap = np.diag(sim)
print('sim_ap :')
print(sim_ap)
print(np.diag(sim_ap))

# Negatives
sim_an =  sim - np.diag(sim_ap)
print('sim_an:')
print(sim_an)

print('-- outputs --')
mean_neg = np.sum(sim_an, axis=1, keepdims=True) / (b-1)
print('mean_neg :')
print(mean_neg)

mask_1 = np.identity(b) == 1
print(sim_ap.reshape(b,1))
mask_2 = sim_an > sim_ap.reshape(b,1)
print('mask_1')
print(mask_1)
print('mask_2')
print(mask_2)
mask = mask_1 | mask_2
print('mask_overall')
print(mask)

sim_an_masked = np.copy(sim_an)         # create a copy to preserve sim_an
sim_an_masked[mask] = -2
print('sim_an_masked')
print(sim_an_masked)

closest_neg = np.max(sim_an_masked, axis=1, keepdims=True)
print("closest_neg :")
print(closest_neg, "\n")

sim (4, 4)
[[ 0.9 -0.8  0.3 -0.5]
 [-0.4  0.5  0.1 -0.1]
 [ 0.3  0.1 -0.4 -0.8]
 [-0.5 -0.2 -0.7  0.5]]
sim_ap :
[ 0.9  0.5 -0.4  0.5]
[[ 0.9  0.   0.   0. ]
 [ 0.   0.5  0.   0. ]
 [ 0.   0.  -0.4  0. ]
 [ 0.   0.   0.   0.5]]
sim_an:
[[ 0.  -0.8  0.3 -0.5]
 [-0.4  0.   0.1 -0.1]
 [ 0.3  0.1  0.  -0.8]
 [-0.5 -0.2 -0.7  0. ]]
-- outputs --
mean_neg :
[[-0.33333333]
 [-0.13333333]
 [-0.13333333]
 [-0.46666667]]
[[ 0.9]
 [ 0.5]
 [-0.4]
 [ 0.5]]
mask_1
[[ True False False False]
 [False  True False False]
 [False False  True False]
 [False False False  True]]
mask_2
[[False False False False]
 [False False False False]
 [ True  True  True False]
 [False False False False]]
mask_overall
[[ True False False False]
 [False  True False False]
 [ True  True  True False]
 [False False False  True]]
sim_an_masked
[[-2.  -0.8  0.3 -0.5]
 [-0.4 -2.   0.1 -0.1]
 [-2.  -2.  -2.  -0.8]
 [-0.5 -0.2 -0.7 -2. ]]
closest_neg :
[[ 0.3]
 [ 0.1]
 [-0.8]
 [-0.2]] 



In [29]:
# Loss functions
alpha = 0.25
l_1 = np.maximum(mean_neg - sim_ap.reshape(b,1) +  alpha, 0)
l_2 = np.maximum(closest_neg - sim_ap.reshape(b, 1) + alpha, 0)
l_full = l_1 + l_2
cost = np.sum(l_full)

print('loss full')
print(l_full)
print(cost)

loss full
[[0.        ]
 [0.        ]
 [0.51666667]
 [0.        ]]
0.5166666666666667


# Evaluate a Siamese Model