In [1]:
import numpy as np
import tensorflow as tf
import pprint

In [2]:
v1 = np.array([1, 2, 3], dtype=float)
v2 = np.array([1, 2, 3.5], dtype=float)

In [3]:
def cosine_similarity(v1, v2):
    numerator = tf.math.reduce_sum(v1*v2)
    denominator = tf.math.sqrt(tf.math.reduce_sum(v1*v1) * tf.math.reduce_sum(v2*v2))
    return numerator / denominator

In [4]:
print("cosine similarity :", cosine_similarity(v1, v2).numpy())

cosine similarity : 0.9974086507360697


In [5]:
# Define the original vectors
v1 = np.array([[1.0, 2.0, 3.0],
               [9.0, 8.0, 7.0],
               [-1.0, -4.0, -2.0],
               [1.0, -7.0, 2.0]])

v2 = np.vstack([v + np.random.normal(0, 2, 3) for v in v1])

In [6]:
b = len(v1)
print(f"Batch sizes match : {b == len(v2)}")

Batch sizes match : True


In [7]:
sim_1 = np.zeros([b, b])
for row in range(0, sim_1.shape[0]):
    for col in range(0, sim_1.shape[1]):
        sim_1[row, col] = cosine_similarity(v2[row], v1[col]).numpy()
sim_1

array([[ 0.67526083,  0.26899885, -0.34917056,  0.20389224],
       [ 0.84512129,  0.98493566, -0.76265658, -0.16496892],
       [-0.85824594, -0.99554314,  0.81000621,  0.24476757],
       [ 0.01974567,  0.09810441,  0.38876824,  0.88953395]])

In [8]:
def norm(x):
    return tf.math.l2_normalize(x, axis=1)

In [9]:
# faster computation with vectorization
sim_2 = tf.linalg.matmul(norm(v2), norm(v1), transpose_b=True).numpy()
sim_2

array([[ 0.67526083,  0.26899885, -0.34917056,  0.20389224],
       [ 0.84512129,  0.98493566, -0.76265658, -0.16496892],
       [-0.85824594, -0.99554314,  0.81000621,  0.24476757],
       [ 0.01974567,  0.09810441,  0.38876824,  0.88953395]])

### Hard Negative Mining

In [10]:
sim = sim_2
b = sim.shape[0]

In [11]:
sim_ap = np.diag(sim)
np.diag(sim_ap)

array([[0.67526083, 0.        , 0.        , 0.        ],
       [0.        , 0.98493566, 0.        , 0.        ],
       [0.        , 0.        , 0.81000621, 0.        ],
       [0.        , 0.        , 0.        , 0.88953395]])

In [12]:
sim_an = sim - np.diag(sim_ap)
sim_an

array([[ 0.        ,  0.26899885, -0.34917056,  0.20389224],
       [ 0.84512129,  0.        , -0.76265658, -0.16496892],
       [-0.85824594, -0.99554314,  0.        ,  0.24476757],
       [ 0.01974567,  0.09810441,  0.38876824,  0.        ]])

In [13]:
mean_neg = np.sum(sim_an, axis=1, keepdims=True) / (b - 1)
mean_neg

array([[ 0.04124018],
       [-0.0275014 ],
       [-0.5363405 ],
       [ 0.16887277]])

In [14]:
mask_1 = np.identity(b) == 1
mask_2 = sim_an > sim_ap.reshape(b, 1)
mask = mask_1 | mask_2

sim_an_masked = np.copy(sim_an)
sim_an_masked[mask] = -np.inf

In [15]:
sim_an_masked

array([[       -inf,  0.26899885, -0.34917056,  0.20389224],
       [ 0.84512129,        -inf, -0.76265658, -0.16496892],
       [-0.85824594, -0.99554314,        -inf,  0.24476757],
       [ 0.01974567,  0.09810441,  0.38876824,        -inf]])

In [16]:
closest_neg = np.max(sim_an_masked, axis=1, keepdims=True)
closest_neg

array([[0.26899885],
       [0.84512129],
       [0.24476757],
       [0.38876824]])

Same implementation but in Tensorflow

In [17]:
sim_ap = tf.linalg.diag_part(sim)
tf.linalg.diag(sim_ap)

<tf.Tensor: shape=(4, 4), dtype=float64, numpy=
array([[0.67526083, 0.        , 0.        , 0.        ],
       [0.        , 0.98493566, 0.        , 0.        ],
       [0.        , 0.        , 0.81000621, 0.        ],
       [0.        , 0.        , 0.        , 0.88953395]])>

In [18]:
sim_an = sim - tf.linalg.diag(sim_ap)
sim_an

<tf.Tensor: shape=(4, 4), dtype=float64, numpy=
array([[ 0.        ,  0.26899885, -0.34917056,  0.20389224],
       [ 0.84512129,  0.        , -0.76265658, -0.16496892],
       [-0.85824594, -0.99554314,  0.        ,  0.24476757],
       [ 0.01974567,  0.09810441,  0.38876824,  0.        ]])>

In [19]:
mean_neg = tf.math.reduce_sum(sim_an, axis=1) / (b - 1)
tf.expand_dims(mean_neg,1)

<tf.Tensor: shape=(4, 1), dtype=float64, numpy=
array([[ 0.04124018],
       [-0.0275014 ],
       [-0.5363405 ],
       [ 0.16887277]])>

In [20]:
mask_1 = tf.eye(b)==1
mask_2 = sim_an > tf.expand_dims(sim_ap,1)
mask = tf.cast((mask_1 | mask_2), tf.float64)
sim_an_masked = sim_an - 2.0*mask

In [21]:
sim_an_masked

<tf.Tensor: shape=(4, 4), dtype=float64, numpy=
array([[-2.        ,  0.26899885, -0.34917056,  0.20389224],
       [ 0.84512129, -2.        , -0.76265658, -0.16496892],
       [-0.85824594, -0.99554314, -2.        ,  0.24476757],
       [ 0.01974567,  0.09810441,  0.38876824, -2.        ]])>

In [23]:
closest_neg = tf.math.reduce_max(sim_an_masked, axis=1)
tf.expand_dims(closest_neg,1)

<tf.Tensor: shape=(4, 1), dtype=float64, numpy=
array([[0.26899885],
       [0.84512129],
       [0.24476757],
       [0.38876824]])>

### Triplet Loss

In [27]:
alpha = 0.5

l_1 = tf.maximum(mean_neg - sim_ap + alpha, 0)
l_2 = tf.maximum(closest_neg - sim_ap + alpha, 0)
l_full = l_1 + l_2
cost = tf.math.reduce_sum(l_full)
cost

<tf.Tensor: shape=(), dtype=float64, numpy=0.4539236582210886>