In [1]:
import numpy as np

In [2]:
from pymilvus import connections
connections.connect(
  alias="default", 
  host='localhost', 
  port='19530'
)

In [3]:
from pymilvus import utility
utility.list_collections()

['test']

In [None]:
# utility.drop_collection("reverse_image_search")

In [4]:
from pymilvus import CollectionSchema, FieldSchema, DataType
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility

def create_milvus_collection(collection_name, dim):
    if utility.has_collection(collection_name):
        utility.drop_collection(collection_name)
    
    fields = [
        FieldSchema(name='name', dtype=DataType.VARCHAR, descrition='image name', max_length=500, 
                    is_primary=True, auto_id=False),
        FieldSchema(name='id', dtype=DataType.INT64, descrition='image id'),
        FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, descrition='image embedding vectors', dim=dim)
    ]
    schema = CollectionSchema(fields=fields, description='reverse image search')
    collection = Collection(name=collection_name, schema=schema)

    index_params = {
        'metric_type':'L2',
        'index_type':"IVF_FLAT",
        'params':{"nlist":2048}
    }
    collection.create_index(field_name="embedding", index_params=index_params)
    return collection

collection = create_milvus_collection('test', 768)

In [5]:
n = 2508110
features = np.ones((n, 768))
features = features * np.arange(n).reshape(-1, 1)

ids = [x for x in range(n)]

image_names = [f"image{n - i}" for i in range(n)]

data = [
    image_names, ids, features
]
mr = collection.insert(data)

: 

: 

In [8]:
print('Total number of inserted data is {}.'.format(collection.num_entities))

Total number of inserted data is 0.


In [7]:
collection.load()

In [46]:
search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
query = np.ones((1, 768)) * 7

results = collection.search(
	data=query, 
	anns_field="embedding", 
	param=search_params, 
	limit=10, 
	expr= "id in [1,2]",
	consistency_level="Strong"
)

In [47]:
results[0].ids

['image8', 'image9']

In [51]:
search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
query = np.ones((1, 768)) * 7

results = collection.search(
	data=query, 
	anns_field="embedding", 
	param=search_params, 
	limit=10, 
	expr= 'name in ["image1","image2","image3"]',
	consistency_level="Strong"
)

In [52]:
results[0].ids

['image3', 'image2', 'image1']