In [30]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
from qdrant_client.http.models import Distance, VectorParams
from qdrant_client.http.models import PointStruct
import numpy as np
import random
import pandas as pd
import time


import utils

In [16]:
qdrantClient = QdrantClient(host='localhost', port=6333)
base_vectors = utils.read_fvecs("../../dataset/siftsmall/siftsmall_base.fvecs")
query_vectors = utils.read_fvecs("../../dataset/siftsmall/siftsmall_query.fvecs")
knn_groundtruth = utils.read_ivecs("../../dataset/siftsmall/siftsmall_groundtruth.ivecs")

Loading file: siftsmall_base.fvecs
    The dimension of the vectors in the file is: 128
    The final shape of the loaded dataset siftsmall_base.fvecs is (10000, 128).
Loading file: siftsmall_query.fvecs
    The dimension of the vectors in the file is: 128
    The final shape of the loaded dataset siftsmall_query.fvecs is (100, 128).
 Loading file: siftsmall_groundtruth.ivecs
    The dimension of the vectors in the file is: 100
    The final shape of the loaded dataset is (100, 100).


In [17]:
vector_size = 128
collection_name = "ann"

qdrantClient.delete_collection(collection_name=collection_name)

qdrantClient.recreate_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=vector_size, distance=Distance.EUCLID),
)

True

In [18]:
query_vectors = pd.DataFrame({'vector': query_vectors.tolist()})
base_vectors = pd.DataFrame({'vector': base_vectors.tolist()})


In [19]:
batch_points = [PointStruct(id=i, vector=elem["vector"]) for i, elem in base_vectors.iterrows()]

operation_info = qdrantClient.upsert(
    collection_name=collection_name,
    wait=True,
    points=batch_points
)
print(operation_info)

operation_id=0 status=<UpdateStatus.COMPLETED: 'completed'>


In [20]:
truth = utils.top_k_neighbors(query_vectors, base_vectors, k=100, function='euclidean', filtering=False) 


In [37]:
print(f'Search function starting')
start_time = time.time()
result_ids = []
for _,elem in query_vectors.iterrows():
    # print(elem)
    vec = elem["vector"]
    search_result = qdrantClient.search(
        collection_name=collection_name, 
        query_vector=vec, 
        limit=100
    )
    result_ids.append([elem.id for elem in search_result])

end_time = time.time()
time_span = end_time - start_time
print(f'Search function took {end_time - start_time} seconds')


Search function starting
Search function took 0.4060990810394287 seconds
QPS = 246.24532452534868


In [42]:
true_positives = 0
n_classified = 0
for i,elem in enumerate(result_ids):
    true_positives_iter = len(np.intersect1d(truth[i], elem))
    true_positives += true_positives_iter
    n_classified += len(elem)

print(f'QPS = {(len(query_vectors) / time_span):.4f}')
print(f'Average recall: {true_positives/n_classified}')

QPS = 246.2453
Average recall: 1.0
