In [15]:
import numpy as np
import torch
from torchmetrics.functional import pairwise_cosine_similarity
from scipy.spatial.distance import cdist
from sklearn.metrics.pairwise import cosine_similarity
a = np.array([[1,2,3], [4,5,6], [7,8,9], [208,21,8], [35,36,37]])
b = np.array([[3,2,1], [4,5,6], [5,50,500]])
a_t = torch.Tensor(a).to('cuda')
b_t = torch.Tensor(b).to('cuda')
a.shape, b.shape

((5, 3), (3, 3))

In [8]:
sim  = 1- cdist(a, b, metric = 'cosine')
print(sim)
max_sim = np.max(sim, axis = 1)
print(max_sim)
bottom_N = max_sim.argsort()
bottom_N[:3]

[[0.71428571 0.97463185 0.85360869]
 [0.85280287 1.         0.74156667]
 [0.88265899 0.99819089 0.70507303]
 [0.86101867 0.53654673 0.0579271 ]
 [0.91701186 0.99032815 0.65327633]]
[0.97463185 1.         0.99819089 0.86101867 0.99032815]


array([3, 0, 4])

In [12]:
def sim_matrix(a, b, eps=1e-8):
    """
    added eps for numerical stability
    """
    a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
    a_norm = a / torch.max(a_n, eps * torch.ones_like(a_n))
    b_norm = b / torch.max(b_n, eps * torch.ones_like(b_n))
    sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
    return sim_mt

def sim_matrix_2(a, b, eps=1e-8):
    """
    added eps for numerical stability
    """
    a_n, b_n = a.norm(dim=1)[:, None], b.norm(dim=1)[:, None]
    a_norm = a / torch.clamp(a_n, min=eps)
    b_norm = b / torch.clamp(b_n, min=eps)
    sim_mt = torch.mm(a_norm, b_norm.transpose(0, 1))
    return sim_mt

In [32]:
arr_t = torch.empty((0,3))
elements = [[1,2,3], [4,5,6], [7,8,9]]
for el in elements:
    print(el)
    arr_t  = torch.cat((arr_t, torch.Tensor(el).unsqueeze(0)), 0)
arr_t

[1, 2, 3]
[4, 5, 6]
[7, 8, 9]


tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [37]:
torch.nn.functional.softmax(a_t, -1), torch.nn.Softmax()(a_t)

  torch.nn.functional.softmax(a_t, -1), torch.nn.Softmax()(a_t)


(tensor([[0.0900, 0.2447, 0.6652],
         [0.0900, 0.2447, 0.6652],
         [0.0900, 0.2447, 0.6652],
         [1.0000, 0.0000, 0.0000],
         [0.0900, 0.2447, 0.6652]], device='cuda:0'),
 tensor([[0.0900, 0.2447, 0.6652],
         [0.0900, 0.2447, 0.6652],
         [0.0900, 0.2447, 0.6652],
         [1.0000, 0.0000, 0.0000],
         [0.0900, 0.2447, 0.6652]], device='cuda:0'))

In [33]:
arr_t.shape

torch.Size([3, 3])

In [20]:
c, d = np.random.rand(5000,256), np.random.rand(100,256)
c_t, d_t = torch.Tensor(c).to('cuda'), torch.Tensor(d).to('cuda')

In [21]:
%timeit sim_matrix_2(c_t, d_t)
%timeit pairwise_cosine_similarity(c_t, d_t)
%timeit 1- cdist(c, d, metric = 'cosine')
%timeit sim_matrix(c_t, d_t)

153 µs ± 2.98 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
150 µs ± 245 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
50.4 ms ± 97.4 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
161 µs ± 4.97 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [10]:
sim  = 1- cdist(a, b, metric = 'cosine')
print(sim)
sim = sim/sim.sum(axis=1)[:,None]
print(sim)
entropy = np.inner(sim,np.log(sim))
entropy = np.zeros(sim.shape)
for i in range(sim.shape[0]):
    for j in range(sim.shape[1]):
        entropy[i,j] = (sim[i,j]*np.log(sim[i,j]))
print(entropy)
uncertainty = np.sum(-1*entropy, axis = 1)
score = np.flip(uncertainty.argsort())
print(uncertainty)
print(score)

[[0.71428571 0.97463185 0.85360869]
 [0.85280287 1.         0.74156667]
 [0.88265899 0.99819089 0.70507303]
 [0.86101867 0.53654673 0.0579271 ]
 [0.91701186 0.99032815 0.65327633]]
[[0.28093543 0.38333207 0.3357325 ]
 [0.32871295 0.3854501  0.28583695]
 [0.34133229 0.38600953 0.27265818]
 [0.59156517 0.36863586 0.03979897]
 [0.35812154 0.38675382 0.25512464]]
[[-0.35668417 -0.36755935 -0.36643207]
 [-0.3657163  -0.36746636 -0.35796325]
 [-0.36689768 -0.36743985 -0.35432922]
 [-0.31056191 -0.36787866 -0.12830846]
 [-0.36774887 -0.36740333 -0.34850104]]
[1.09067559 1.09114591 1.08866675 0.80674904 1.08365324]
[1 0 2 4 3]


In [11]:
np.multiply(b, b)


array([[     9,      4,      1],
       [    16,     25,     36],
       [    25,   2500, 250000]])

In [14]:
from sklearn.utils.extmath import softmax
sim  = 1- cdist(a, b, metric = 'cosine')
print(sim)
sim = softmax(sim) #np.exp(sim)
# sim = sim/sim.sum(axis=1)[:,None]
print(sim)
print(np.sum(sim, axis = 1))
entropy = np.multiply(sim,np.log(sim))
print(entropy)
uncertainty = np.sum(-1*entropy, axis = 1)
score = np.flip(uncertainty.argsort())
print(uncertainty)
print(score)

[[0.71428571 0.97463185 0.85360869]
 [0.85280287 1.         0.74156667]
 [0.88265899 0.99819089 0.70507303]
 [0.86101867 0.53654673 0.0579271 ]
 [0.91701186 0.99032815 0.65327633]]
[[0.29011791 0.37639291 0.33348918]
 [0.32751345 0.3794513  0.29303525]
 [0.33786531 0.37924378 0.28289091]
 [0.46064885 0.3330072  0.20634396]
 [0.35158684 0.37833235 0.27008081]]
[1. 1. 1. 1. 1.]
[[-0.35901159 -0.36778168 -0.36621943]
 [-0.36557908 -0.36769932 -0.35968974]
 [-0.36662034 -0.36770569 -0.35720464]
 [-0.35705779 -0.36617178 -0.32565426]
 [-0.36751321 -0.36773232 -0.35354498]]
[1.0930127  1.09296814 1.09153066 1.04888383 1.08879052]
[0 1 2 4 3]


In [33]:
np.log(sim)

array([[-1.0688469 , -1.0497363 , -1.18232322],
       [-1.10197927, -0.95857095, -1.25756116],
       [-1.11195687, -0.93321612, -1.28085927],
       [-2.14197228, -0.2274323 , -2.45340064],
       [-1.12617567, -0.89857379, -1.31460994]])

In [31]:
entropy

array([[-1.09694556, -1.09947812, -1.10117192, -1.56729902, -1.10427566],
       [-1.09378573, -1.09122991, -1.09144727, -1.49641382, -1.09248456],
       [-1.0928545 , -1.08879908, -1.08858132, -1.47552327, -1.08900961],
       [-1.06338288, -1.00112374, -0.98410193, -0.64367806, -0.96107886],
       [-1.09154338, -1.08537662, -1.08454623, -1.4461106 , -1.08411708]])

In [30]:
for i in range(sim.shape[0]):
    for j in range(sim.shape[1]):
        print(sim[i,j]*np.log(sim[i,j]))

-0.3670465879397818
-0.36743923899431324
-0.3624597350766718
-0.36609172181705896
-0.367554876614156
-0.35758331231468216
-0.3657389465382279
-0.36702159700652437
-0.3558207764644702
-0.25151686148919083
-0.18116719329996847
-0.21099400433766677
-0.3651861190797651
-0.3658542531182911
-0.3530767084026482


In [5]:
templates = [lambda c: f"an aerial photograph of {c}."]
classnames = ['a', 'b', 'c']
texts = []
for classname in classnames:
    texts += [template(classname) for template in templates]
texts

['an aerial photograph of a.',
 'an aerial photograph of b.',
 'an aerial photograph of c.']

In [22]:
np.flip(a, axis = 0)

array([[ 35,  36,  37],
       [208,  21,   8],
       [  7,   8,   9],
       [  4,   5,   6],
       [  1,   2,   3]])

In [17]:
sim  = 1- cdist(a, a, metric = 'cosine')
np.fill_diagonal(sim, 0)
s = sim[:,[0,2]]

print(s)
max_sim = np.max(s, axis = 1)
print(max_sim)
bottom_N = max_sim.argsort()
print(bottom_N)

[[0.         0.95941195]
 [0.97463185 0.99819089]
 [0.95941195 0.        ]
 [0.35002836 0.58202504]
 [0.93415227 0.99687849]]
[0.95941195 0.99819089 0.95941195 0.58202504 0.99687849]
[3 0 2 4 1]


In [6]:
a1 = np.array([[1,2,3], [4,5,6], [7,8,9], [208,21,8], [35,36,37]])
a2 = np.array([[1,2,4], [40,15,6], [7,88,9], [2,2,8], [3,3,3]])
sim  = 1- cdist(a1, a2, metric = 'cosine')
print(sim)
sim.diagonal()

[[0.99146013 0.54518697 0.63249569 0.94491118 0.9258201 ]
 [0.94499299 0.71589733 0.67038986 0.88640526 0.98692754]
 [0.92436076 0.75558234 0.6747882  0.86304424 0.99483201]
 [0.2941414  0.96208469 0.1818534  0.29404984 0.65404015]
 [0.89218667 0.80354037 0.67685442 0.82762397 0.9997429 ]]


array([0.99146013, 0.71589733, 0.6747882 , 0.29404984, 0.9997429 ])

In [26]:
a3 = np.append(a1,a2, axis = 0)
a3

array([[  1,   2,   3],
       [  4,   5,   6],
       [  7,   8,   9],
       [208,  21,   8],
       [ 35,  36,  37],
       [  1,   2,   4],
       [ 40,  15,   6],
       [  7,  88,   9],
       [  2,   2,   8],
       [  3,   3,   3]])

In [2]:
arr1 = np.random.rand(20,5)
arr = 1-cdist(arr1, arr1, metric = 'cosine')
# print(arr)
mask = np.zeros(arr.shape, dtype=bool)
np.fill_diagonal(arr, 0)
max = np.max(arr, axis = 0)
print(max)
m = max.argsort()
max[m]

[0.96987555 0.93117361 0.99175944 0.94328947 0.94435571 0.95077862
 0.98731626 0.95820886 0.98253808 0.95508419 0.95820886 0.97900068
 0.97631643 0.9662245  0.9409062  0.93504933 0.95508419 0.97900068
 0.99175944 0.98731626]


array([0.93117361, 0.93504933, 0.9409062 , 0.94328947, 0.94435571,
       0.95077862, 0.95508419, 0.95508419, 0.95820886, 0.95820886,
       0.9662245 , 0.96987555, 0.97631643, 0.97900068, 0.97900068,
       0.98253808, 0.98731626, 0.98731626, 0.99175944, 0.99175944])

In [3]:
c = np.random.rand(300, 3)
d = np.random.rand(100, 3)

In [4]:
%timeit cosine_similarity(c,d)
%timeit 1- cdist(c,d, metric = 'cosine')

655 µs ± 27 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
245 µs ± 1.4 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
