In [1]:
from pymilvus import Collection, FieldSchema, CollectionSchema, DataType, connections, utility

connections.connect (
    alias = 'default',
    host = 'localhost',
    port = '19530'
)

collection = Collection('Album1')
collection.compact()
utility.list_collections()

['Album1']

In [34]:
# load collection into the memory
collection.load(replica_number = 1)

In [18]:
## vector similarity search

results = collection.search (
    data = [[0.1, 0.2]], # data to be searched for
    anns_field = "song_vec", # the field to search for data in
    param = {"metric_type": "L2", "params": {"search_k": 64}}, ## search params, depends on the index type
    limit = 10, ## to limit the number of results
    expr = None, ## to be used only for hybrid search (i.e. searching on a mix of vector and scalar fields)
    output_fields=['song_id', 'song_name']
)

for result in results[0]:
    print(result)


id: 664, distance: 0.00013574730837717652, entity: {'song_id': 664, 'song_name': 'LSQAWGD'}
id: 2450, distance: 0.0002820241206791252, entity: {'song_id': 2450, 'song_name': 'RTQOPDA'}
id: 3227, distance: 0.0002841656096279621, entity: {'song_id': 3227, 'song_name': 'PSUOXHO'}
id: 1328, distance: 0.00041068007703870535, entity: {'song_id': 1328, 'song_name': 'DXRMATA'}
id: 4507, distance: 0.0004612235352396965, entity: {'song_id': 4507, 'song_name': 'AMBOZIR'}
id: 4918, distance: 0.00048470887122675776, entity: {'song_id': 4918, 'song_name': 'YBWTVWC'}
id: 2216, distance: 0.0005587755586020648, entity: {'song_id': 2216, 'song_name': 'YAQEEIM'}
id: 2658, distance: 0.0006584832444787025, entity: {'song_id': 2658, 'song_name': 'AAAVNPA'}
id: 1495, distance: 0.000728147104382515, entity: {'song_id': 1495, 'song_name': 'JAUCQLX'}
id: 1661, distance: 0.0007731809164397418, entity: {'song_id': 1661, 'song_name': 'PNEOOOL'}


In [None]:
## distance above is the Euclidean distance betwen the input vector and returned vector

In [45]:
# Query the data in scalar fields

query_res = collection.query (
    expr = 'song_name like "AZ%" && 2000 < song_id < 5000',
    limit = 10,
    output_fields = ['song_id', 'song_name', 'listen_count']
)

for result in query_res:
    print(result)

{'song_id': 2635, 'song_name': 'AZLMLPJ', 'listen_count': 19562}
{'song_id': 4672, 'song_name': 'AZUUOIZ', 'listen_count': 19713}


In [31]:
## hybrid search 

hybrid_res = collection.search (
    data = [[0.1, 0.2]], # data to be searched for
    anns_field = "song_vec", # the field to search for data in
    param = {"metric_type": "L2", "params": {"search_k": 64}}, ## search params, depends on the index type
    limit = 10, ## to limit the number of results
    expr = "listen_count <= 500", ## to be used only for hybrid search (i.e. searching on a mix of vector and scalar fields)
    output_fields=['song_id', 'song_name', 'listen_count']
)

for result in hybrid_res[0]:
    print(result)

id: 4636, distance: 0.013402235694229603, entity: {'listen_count': 445, 'song_id': 4636, 'song_name': 'TFFWCEJ'}
id: 1658, distance: 0.014259373769164085, entity: {'listen_count': 404, 'song_id': 1658, 'song_name': 'ZDWWGXL'}
id: 4725, distance: 0.029278799891471863, entity: {'listen_count': 324, 'song_id': 4725, 'song_name': 'COOAZPO'}
id: 4953, distance: 0.04059791937470436, entity: {'listen_count': 476, 'song_id': 4953, 'song_name': 'EUUWLWY'}
id: 2471, distance: 0.050674326717853546, entity: {'listen_count': 472, 'song_id': 2471, 'song_name': 'BMLYPDN'}
id: 814, distance: 0.06299011409282684, entity: {'listen_count': 289, 'song_id': 814, 'song_name': 'JSLUZVC'}
id: 1014, distance: 0.11874350905418396, entity: {'listen_count': 193, 'song_id': 1014, 'song_name': 'KCUYLCA'}
id: 561, distance: 0.12167960405349731, entity: {'listen_count': 118, 'song_id': 561, 'song_name': 'XEFTBPQ'}
id: 4726, distance: 0.13499119877815247, entity: {'listen_count': 348, 'song_id': 4726, 'song_name': 'YN

In [32]:
## release the collection from the memory 

collection.release()