## Define a Latent Space and create a search index from it

In [None]:
from latentis.space.search import SearchMetric
from latentis.utils import seed_everything
from latentis.space import LatentSpace

import torch
import numpy as np

In [None]:
N_SAMPLES = 100
SPACE_DIM = 256
SEARCH_METRIC = SearchMetric.COSINE

In [None]:
# Setting seed for reproducibility
seed_everything(42)

In [None]:
# create random vectors to be added to the index
vectors = torch.randn(N_SAMPLES, SPACE_DIM)
# create keys for the vectors
keys = [str(i) for i in range(N_SAMPLES)]

In [None]:
# Create a LatentSpace object
space = LatentSpace(
    vector_source=(vectors, keys),
)

# Converting the LatentSpace to a SearchIndex using the given metric
index = space.to_index(metric_fn=SEARCH_METRIC)

Equivalent code, without resorting to the `LatentSpace` class, would be:

```python
from latentis.search import SearchIndex


index = SearchIndex.create(
    num_dimensions=SPACE_DIM,
    metric_fn=SEARCH_METRIC,
    name="demo_space",
)
index.add_vectors(vectors=vectors, keys=keys)
```

## Search kNN

By single vector

In [None]:
query_vectors = index.get_vector(query_offset=0)

neighbor_ids, distances = index.search_knn(query_vectors=query_vectors, k=5)

f"Neighbors: {neighbor_ids.tolist()} | Distances {[f'{d:.3f}' for d in distances.tolist()]}"

By multiple vectors, already present in the index, identified by their corresponding index offset 

In [None]:
neighbor_ids, distances = index.search_knn(query_offsets=[0, 1], k=5)
# the result is a tuple of two numpy arrays, one for the neighbor ids and one for their distances.
# The first dimension is the number of queries (index offsets), the second dimension is the number of neighbors (k)
neighbor_ids.shape, distances.shape

By multiple vectors, not already present in the index:

In [None]:
neighbor_ids, distances = index.search_knn(query_vectors=torch.randn(2, SPACE_DIM), k=7)
# the result is a tuple of two numpy arrays, one for the neighbor ids and one for their distances.
# The first dimension is the number of queries (query vectors), the second dimension is the number of neighbors (k)
neighbor_ids.shape, distances.shape

By a vector not already present in the index

In [None]:
new = torch.randn(256)

neighbor_ids, distances = index.search_knn(query_vectors=new, k=5)
f"Neighbors: {neighbor_ids.tolist()} | Distances {[f'{d:.3f}' for d in distances.tolist()]}"

Add the vector to the index with a custom key and retrieve the most similars

In [None]:
new_offset = index.add_vector(vector=new, key="custom")

print(f"Index offset for the custom vector: {new_offset}")

Now there are two ways to retrieve the custom vector (up to the index transformations/precision/approximations):

In [None]:
custom_vector1 = index.get_vector(query_key="custom")
custom_vector2 = index.get_vector(query_offset=new_offset)

np.allclose(custom_vector1, custom_vector2)

## Add multiple vectors

In [None]:
new_vectors = torch.randn(N_SAMPLES, SPACE_DIM)

new_vector_offsets = index.add_vectors(vectors=new_vectors)

## Search by range

In [None]:
query_vectors = index.get_vector(query_offset=10)

index.search_range(query_vectors=query_vectors, radius=0.99)

Change the metric and search by range

In [None]:
eu_index = space.to_index(metric_fn=SearchMetric.EUCLIDEAN, keys=[str(i) for i in range(N_SAMPLES)])
query_vectors = eu_index.get_vector(query_offset=10)

index.search_range(query_vectors=query_vectors, radius=0.99)