In [2]:
import hnswlib
import numpy as np

# Initialize data
data_size = 10000
dim = 128  # Dimension of the vectors
num_elements = 1000  # Number of elements (vectors)

# Generate random data
data = np.random.randn(num_elements, dim).astype(np.float32)

# Initialize the HNSWlib index
p = hnswlib.Index(space='l2', dim=dim)  # 'l2' means Euclidean distance

# Initialize index with the number of elements (we'll add elements later)
p.init_index(max_elements=data_size, ef_construction=200, M=16)

# Add elements to the index
p.add_items(data)

# Set ef parameter (controls the recall and query time)
p.set_ef(50)  # ef should always be > k (the number of neighbors)

# Query the index for the nearest neighbors
labels, distances = p.knn_query(data, k=5)  # k is the number of nearest neighbors

# Print the results
for i in range(10):  # Print the first 10 results
    print(f"Query {i}:")
    print(f"Neighbors: {labels[i]}")
    print(f"Distances: {distances[i]}\n")


Query 0:
Neighbors: [  0 830  82 290 115]
Distances: [  0.      179.98962 180.39163 181.67729 189.05643]

Query 1:
Neighbors: [  1  82  10 672 296]
Distances: [  0.      165.26791 180.867   183.32487 183.71414]

Query 2:
Neighbors: [  2 406 195 448 616]
Distances: [  0.      182.16898 183.13275 185.14429 187.04024]

Query 3:
Neighbors: [  3 207 106 150 448]
Distances: [  0.      198.76974 204.71278 205.54095 206.0872 ]

Query 4:
Neighbors: [  4 706 804 595 641]
Distances: [  0.      212.95074 217.5586  219.46474 220.11826]

Query 5:
Neighbors: [  5 745 854 170 654]
Distances: [  0.      182.33856 182.35709 183.69017 183.77151]

Query 6:
Neighbors: [  6 145 533 553 406]
Distances: [  0.      192.09126 194.00081 197.98654 200.80621]

Query 7:
Neighbors: [  7 911 745 988 106]
Distances: [  0.      162.74696 166.1969  167.26115 176.02084]

Query 8:
Neighbors: [  8 399 670 335 349]
Distances: [  0.      166.45467 172.41602 174.5196  174.58582]

Query 9:
Neighbors: [  9 339 657 586 585]
Dist

In [3]:
labels

array([[  0, 830,  82, 290, 115],
       [  1,  82,  10, 672, 296],
       [  2, 406, 195, 448, 616],
       ...,
       [997, 437, 399, 279, 335],
       [998, 821, 988, 349, 521],
       [999, 739, 927, 230, 264]], dtype=uint64)

In [4]:
distances

array([[  0.     , 179.98962, 180.39163, 181.67729, 189.05643],
       [  0.     , 165.26791, 180.867  , 183.32487, 183.71414],
       [  0.     , 182.16898, 183.13275, 185.14429, 187.04024],
       ...,
       [  0.     , 187.62189, 188.10855, 189.41367, 189.92006],
       [  0.     , 181.98383, 190.10242, 190.18205, 193.98013],
       [  0.     , 149.94748, 162.15645, 169.93254, 170.14034]],
      dtype=float32)