In [2]:
!pip install usearch

Collecting usearch
  Obtaining dependency information for usearch from https://files.pythonhosted.org/packages/59/72/7daf76b374f6a53deaa1536d69333c1a1280dbe88892d68fb07df21a4879/usearch-2.8.15-cp311-cp311-manylinux_2_28_x86_64.whl.metadata
  Downloading usearch-2.8.15-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (26 kB)
Collecting ucall (from usearch)
  Obtaining dependency information for ucall from https://files.pythonhosted.org/packages/7d/02/d532582bf4732645c298b0ddc8fd873359394919e21460fffad02625be8b/ucall-0.5.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata
  Downloading ucall-0.5.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (12 kB)
Downloading usearch-2.8.15-cp311-cp311-manylinux_2_28_x86_64.whl (1.5 MB)
[2K   [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m0:01[0m
[?25hDownloading ucall-0.5.1-cp311-cp311-manylinux_2_27_x86_64.manyl

In [1]:
import numpy as np
from scipy.spatial.distance import cdist
from usearch.index import search, MetricKind, Matches, BatchMatches

# Example with USearch library

Reference: https://github.com/unum-cloud/usearch

In [2]:
# Generate 10'000 random vectors with 1024 dimensions
vectors = np.random.rand(10000, 1024).astype(np.float32)
vector = np.random.rand(1024).astype(np.float32)

one_in_many: Matches = search(vectors, vector, vectors.shape[0], MetricKind.L2sq, exact=True)

many_in_many: BatchMatches = search(vectors, vectors, vectors.shape[0], MetricKind.L2sq, exact=True)



In [10]:
pairwise_distances1 = cdist(vectors, [vector], metric='euclidean')
pairwise_distances2 = cdist(vectors, vectors, metric='euclidean')

array([[0.9489768 , 0.22184363, 0.02418065, ..., 0.8075211 , 0.5554761 ,
        0.6378045 ],
       [0.82954603, 0.81962883, 0.9386333 , ..., 0.14193797, 0.47092098,
        0.6287613 ],
       [0.3138164 , 0.11954167, 0.17589971, ..., 0.9803737 , 0.55535626,
        0.2518607 ],
       ...,
       [0.5860322 , 0.63348436, 0.98931044, ..., 0.9566165 , 0.8487363 ,
        0.9923459 ],
       [0.13952401, 0.57835484, 0.27675444, ..., 0.82693315, 0.19215311,
        0.18101464],
       [0.9568119 , 0.6794955 , 0.30310842, ..., 0.429211  , 0.96527183,
        0.62788767]], dtype=float32)

In [None]:
vectors.shape

In [4]:
one_in_many.distances
many_in_many.distances

array([[  0.     , 151.80284, 152.1035 , ..., 190.18558, 190.81108,
        190.96123],
       [  0.     , 147.04694, 148.16756, ..., 188.85797, 189.96431,
        190.12527],
       [  0.     , 149.94037, 150.03108, ..., 191.68993, 192.018  ,
        193.61697],
       ...,
       [  0.     , 150.86911, 151.01382, ..., 192.04836, 192.694  ,
        192.7545 ],
       [  0.     , 149.73387, 150.4768 , ..., 194.8426 , 194.92531,
        196.29407],
       [  0.     , 149.84862, 150.16122, ..., 191.06216, 191.29985,
        193.41571]], dtype=float32)

In [11]:
one_in_many.to_list()

[(6398, 148.28492736816406),
 (143, 149.36392211914062),
 (5364, 149.3961181640625),
 (2408, 150.33773803710938),
 (4788, 150.7617645263672),
 (4569, 151.26873779296875),
 (8272, 151.555419921875),
 (3095, 151.58168029785156),
 (7043, 151.61358642578125),
 (5500, 151.66888427734375),
 (5613, 151.77276611328125),
 (6301, 151.88189697265625),
 (5890, 152.1690673828125),
 (540, 152.28231811523438),
 (9951, 152.41409301757812),
 (171, 152.41915893554688),
 (7865, 152.45040893554688),
 (9505, 152.47854614257812),
 (2065, 152.56646728515625),
 (762, 152.61732482910156),
 (2186, 152.81068420410156),
 (8400, 152.9129180908203),
 (649, 152.93020629882812),
 (4983, 152.9842071533203),
 (7042, 153.01051330566406),
 (5245, 153.03829956054688),
 (2278, 153.0926513671875),
 (609, 153.14703369140625),
 (3126, 153.23403930664062),
 (6393, 153.30552673339844),
 (950, 153.60391235351562),
 (1871, 153.6868133544922),
 (8731, 153.98638916015625),
 (1245, 154.013427734375),
 (8291, 154.26998901367188),
 (7

# Comparison with Scipy Spatial Distance

In [15]:
# Another example to compare with scipy spatial.distance
# Example vectors
data = np.random.uniform(2,3,size=[10,5])
vector =np.array([np.random.uniform(2,3,size=5)])


In [16]:
result: BatchMatches = search(data, vector, 10, MetricKind.L2sq, exact=True)

In [17]:
# get the indeces for ordering the pairwise distances
ind = np.array(result.to_list())[:,0].astype('int64')

In [20]:
np.sort(ind)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [21]:
d = result.distances

In [22]:
np.sqrt(d)[np.argsort(ind)]

array([0.98668766, 1.1879346 , 0.92372465, 0.92765003, 0.7264447 ,
       1.1178962 , 1.0378267 , 0.57018626, 0.54452884, 0.49036872],
      dtype=float32)

In [25]:
# Compute pairwise distances using Euclidean distance metric
pairwise_distances = cdist(data, vector, metric='euclidean')

print("Pairwise distances:")
print(np.column_stack([pairwise_distances,np.sqrt(d)[np.argsort(ind)]]))

Pairwise distances:
[[0.98668764 0.98668766]
 [1.18793465 1.18793464]
 [0.92372463 0.92372465]
 [0.92765006 0.92765003]
 [0.72644471 0.72644472]
 [1.11789616 1.1178962 ]
 [1.03782662 1.03782666]
 [0.57018625 0.57018626]
 [0.54452885 0.54452884]
 [0.49036873 0.49036872]]


In [26]:
pairwise_distances.shape

(10, 1)

# FAISS is dangerous

...and slower than USearch.

In [27]:
import faiss

In [28]:
# Concatenate the vectors into a single array
all_vectors = np.concatenate((data, vector.reshape(1,-1)), axis=0)

# Initialize Faiss index
d = all_vectors.shape[1]  # Dimension of vectors
index = faiss.IndexFlatL2(d)  # L2 distance (Euclidean distance)

# Add vectors to the index
index.add(all_vectors)

# Query for the nearest neighbors (in this case, excluding the vector itself)
k = all_vectors.shape[0] - 1  # Return distances to all other vectors
D, I = index.search(all_vectors, k)

# The first row of D corresponds to distances from vector1 to all vectors,
# and the second row corresponds to distances from vector2 to all vectors
pairwise_distances = D[1]

print("Pairwise distances:")
print(pairwise_distances)

Pairwise distances:
[0.         0.52158684 0.5947299  0.71165293 0.7919805  1.0101306
 1.0621352  1.3381842  1.3558803  1.4111887 ]


In [52]:
# Concatenate the vectors into a single array
all_vectors = np.concatenate((vector1, vector2), axis=0)

# Initialize Faiss index
d = all_vectors.shape[1]  # Dimension of vectors
index = faiss.IndexFlatL2(d)  # L2 distance (Euclidean distance)

# Add vectors to the index
index.add(all_vectors)

# Query for the nearest neighbors (in this case, excluding the vector itself)
k = all_vectors.shape[0] - 1  # Return distances to all other vectors
D, I = index.search(all_vectors, k)

# The first row of D corresponds to distances from vector1 to all vectors,
# and the second row corresponds to distances from vector2 to all vectors
pairwise_distances = D[1]

print("Pairwise distances:")
print(pairwise_distances)


Pairwise distances:
[0.         0.29514208 0.30921572 0.32049134 0.32110634 0.45318595
 0.69156986 0.7355278  0.94303757 1.1888701 ]


# Excuse for FAISS performance

https://medium.com/mlearning-ai/why-you-should-be-careful-using-faiss-c44996eda9ee