In [1]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
from qdrant_client.http.models import Distance, VectorParams
from qdrant_client.http.models import PointStruct
import numpy as np
import random
import pandas as pd
import utils

In [2]:
qdrantClient = QdrantClient(host='localhost', port=6333)
base_vectors = utils.read_fvecs("../../dataset/siftsmall/siftsmall_base.fvecs")
query_vectors = utils.read_fvecs("../../dataset/siftsmall/siftsmall_query.fvecs")
knn_groundtruth = utils.read_ivecs("../../dataset/siftsmall/siftsmall_groundtruth.ivecs")

Loading file: siftsmall_base.fvecs
    The dimension of the vectors in the file is: 128
    The final shape of the loaded dataset siftsmall_base.fvecs is (10000, 128).
Loading file: siftsmall_query.fvecs
    The dimension of the vectors in the file is: 128
    The final shape of the loaded dataset siftsmall_query.fvecs is (100, 128).
 Loading file: siftsmall_groundtruth.ivecs
    The dimension of the vectors in the file is: 100
    The final shape of the loaded dataset is (100, 100).


In [3]:
vector_size = 128
collection_name = "ann_collection"

qdrantClient.delete_collection(collection_name=collection_name)

qdrantClient.recreate_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(size=vector_size, distance=Distance.EUCLID),
)

True

In [4]:
base_vectors = pd.DataFrame({'vector': base_vectors.tolist()})
batch_points = [PointStruct(id=i, vector=elem["vector"]) for i, elem in base_vectors.iterrows()]

operation_info = qdrantClient.upsert(
    collection_name=collection_name,
    wait=True,
    points=batch_points
)
print(operation_info)

operation_id=0 status=<UpdateStatus.COMPLETED: 'completed'>


In [5]:
query_vectors = pd.DataFrame({'vector': query_vectors.tolist()})

In [6]:
result_ids = []
for _,elem in query_vectors.iterrows():
    vec = elem["vector"]
    search_result = qdrantClient.search(
            collection_name=collection_name, 
            query_vector=vec,
            score_threshold=500,
            limit=10000 
            )
    

In [7]:
len(search_result)

3177

In [8]:
ids = utils.range_truth(query_vectors, base_vectors, threshold=600)

In [24]:
import pandas as pd

def calculate_distance(vector1, vector2, similarity_function='euclidean'):
    if similarity_function == 'euclidean':
        return np.linalg.norm(np.array(vector1) - np.array(vector2), ord=2)
    elif similarity_function == 'cosine':
        return np.dot(vector1, vector2) / (np.linalg.norm(vector1) * np.linalg.norm(vector2))
    else:
        raise ValueError("Unsupported similarity function")

# def range_truth(query_vectors, base_vectors, threshold = 300, function='euclidean'):
threshold = 500

distances_df = pd.DataFrame(index=query_vectors.index, columns=base_vectors.index)


for i, vector1 in enumerate(query_vectors['vector']):
    for j, vector2 in enumerate(base_vectors['vector']):
        distance = calculate_distance(vector1, vector2, similarity_function='euclidean')
        distances_df.at[i, j] = distance


mask = distances_df < threshold

# Create a new DataFrame "ids_range" with IDs below the threshold
# ids_range = pd.DataFrame(index=query_vectors.index, columns=base_vectors.index)
# ids_range = ids_range.applymap(lambda x: [])
ids = np.empty((len(query_vectors),), dtype=object)

# Populate the ids array
for i in range(len(query_vectors)):
    ids[i] = np.where(mask.iloc[i])[0]


# print(range_truth(query_vectors, base_vectors))

In [29]:
df = pd.DataFrame(ids)
df['size'] = df.iloc[:, 0].apply(lambda x: len(x))
df

Unnamed: 0,0,size
0,"[22, 24, 25, 26, 37, 41, 42, 43, 45, 47, 49, 6...",3899
1,"[22, 24, 25, 26, 35, 36, 37, 40, 41, 42, 43, 4...",4294
2,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14,...",4533
3,"[22, 24, 25, 26, 36, 37, 40, 42, 43, 44, 45, 4...",4181
4,"[0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15...",4213
...,...,...
95,"[22, 24, 26, 34, 36, 40, 43, 44, 46, 48, 67, 7...",2729
96,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14,...",3862
97,"[0, 1, 3, 5, 6, 7, 8, 9, 10, 11, 13, 14, 17, 1...",3535
98,"[0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 14, 15...",3423


In [17]:
distances_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,9990,9991,9992,9993,9994,9995,9996,9997,9998,9999
0,615.588336,632.036391,640.9922,645.517622,661.588241,655.126705,598.906504,630.780469,601.458228,607.010708,...,504.561195,494.535135,616.36434,601.262838,499.944997,417.714017,635.899363,646.642869,472.942914,425.373953
1,610.535011,606.677839,624.342054,626.532521,626.92663,622.500602,601.693444,588.870105,589.647352,585.72007,...,424.028301,460.405256,635.192097,592.323391,405.002469,446.254412,632.045884,598.121225,387.908494,372.831061
2,368.975609,393.133565,354.997183,413.22028,316.235672,475.872882,371.630193,384.523081,338.939523,407.91911,...,612.011438,652.990812,383.826784,564.677784,637.428427,626.57721,328.112785,473.071876,601.294437,644.699155
3,655.442599,652.196289,674.282582,668.457179,679.575603,669.987313,639.219055,639.237045,643.880424,630.391942,...,431.979166,434.67114,650.091532,642.007009,456.680413,416.30878,660.536146,668.675557,470.28821,383.315797
4,361.78723,441.64805,381.625471,459.551956,418.829321,514.416174,385.61639,436.409212,375.996011,388.179082,...,646.465003,679.018409,364.540807,561.225445,608.425838,596.241562,434.868946,510.374372,622.862746,661.681192
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,613.087269,629.719779,626.56524,630.880337,635.307799,609.747489,595.068063,606.503916,599.115181,582.926239,...,394.962023,473.075047,644.291859,601.986711,403.824219,436.742487,625.091193,593.600876,462.873633,443.156857
96,452.076321,396.570296,440.578029,423.569357,436.851233,478.13701,462.215318,393.946697,370.469972,445.162892,...,564.961946,644.990698,422.562421,541.358476,586.125413,607.846198,424.61512,483.045547,569.979824,615.609454
97,492.067069,439.011389,504.735574,449.417401,508.629531,399.452125,481.194347,446.867989,466.505091,397.495912,...,621.648615,616.647387,592.035472,395.757502,641.76008,535.083171,591.701783,389.625461,543.080105,577.842539
98,409.576611,449.797732,397.651858,446.254412,371.39063,543.300101,403.572794,426.412945,365.781355,465.151588,...,621.26162,681.400763,357.050417,592.446622,626.668174,648.497494,247.741801,558.627783,612.095581,670.640738
