In [1]:
import numpy as np
from tqdm import tqdm

from pymilvus import (
    connections,
    utility,
    FieldSchema,
    CollectionSchema,
    DataType,
    Collection,
)



In [2]:
vecs = np.load("../data/als_vecs.npy")
n_vecs, dim = vecs.shape

In [3]:
connections.connect("default", host="localhost", port="19530")

In [4]:
if utility.has_collection("tracks"):
    utility.drop_collection("tracks")

In [5]:
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=False),
    FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = CollectionSchema(fields, "")
tracks = Collection("tracks", schema)

In [6]:
tracks

<Collection>:
-------------
<name>: tracks
<description>: 
<schema>: {'auto_id': False, 'description': '', 'fields': [{'name': 'id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'embeddings', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 128}}]}

In [7]:
batch_size = 1000

for i in tqdm(range(0, n_vecs, batch_size)):
    entities = [list(range(i, min(i + batch_size, n_vecs))), vecs[i:min(i + batch_size, n_vecs)].tolist()]
    insert_result = tracks.insert(entities)
    
tracks.flush()  

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 170/170 [00:03<00:00, 47.50it/s]


In [8]:
utility.has_collection("tracks")

True

In [9]:
n_vecs, tracks.num_entities

(169542, 169542)

In [10]:
index = {
    "index_type": "IVF_FLAT",
    "metric_type": "L2",
    "params": {"nlist": 128},
}
tracks.create_index("embeddings", index)
tracks.load()

In [11]:
def similar(id):
    vectors_to_search = [vecs[id].tolist()]
    search_params = {
        "metric_type": "L2",
        "params": {"nprobe": 10},
    }
    resp = tracks.search(vectors_to_search, "embeddings", search_params, limit=10, output_fields=["id", "embeddings"])
    return [r.entity.get('id') for r in resp[0]]

In [12]:
similar(0)

[0, 34574, 6761, 14766, 74274, 46600, 19795, 3331, 30335, 36532]

In [13]:
similar(1)

[1, 21776, 18594, 9976, 17919, 8285, 12400, 6736, 18560, 13668]

In [14]:
similar(2)

[2, 1189, 1488, 20638, 10329, 51061, 16045, 18576, 1116, 52169]