In [None]:
import time

import numpy as np
import string
import random

from pymilvus import MilvusClient, DataType

fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"
num_entities, dim = 3000, 8

In [None]:
print(fmt.format("start connecting to Milvus"))
client = MilvusClient(uri="http://192.168.1.106:19530") # Replace with your Milvus server address

has = client.has_collection("hello_milvus")
print(f"Does collection hello_milvus exist in Milvus: {has}")

In [None]:
schema = client.create_schema(
    auto_id=False,
    enable_dynamic_fields=True,
    description="hello_milvus is the simplest demo to introduce the APIs",
)

schema.add_field(field_name="pk", datatype=DataType.VARCHAR, is_primary=True, max_length=100)
schema.add_field(field_name="random", datatype=DataType.DOUBLE)
schema.add_field(field_name="embeddings", datatype=DataType.FLOAT_VECTOR, dim=dim)

print(fmt.format("Create collection `hello_milvus`"))
client.create_collection(
    collection_name="hello_milvus", 
    schema=schema,
    consistency_level="Strong"
)

In [None]:
print(fmt.format("Start inserting entities"))

def generate_random_string(length):
    return ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(length))

def generate_random_entities(num_entities, dim):
    entities = []
    for _ in range(num_entities):
        pk = generate_random_string(10)  # Generate a random primary key string of length 10
        random_value = random.random()  # Generate a random double value
        embeddings = np.random.rand(dim).tolist()  # Generate a random float vector of dimension 'dim'
        entities.append({"pk": pk, "random": random_value, "embeddings": embeddings})
    return entities

entities = generate_random_entities(num_entities, dim)

insert_result = client.insert(
    collection_name="hello_milvus",
    data=entities,
)

print(f"Number of entities in Milvus: {insert_result['insert_count']}")  # check the num_entities

In [None]:
print(fmt.format("Start Creating index IVF_FLAT"))

index_params = client.prepare_index_params()

index_params.add_index(
    field_name="pk"
)

index_params.add_index(
    field_name="embeddings", 
    index_type="IVF_FLAT",
    metric_type="L2",
    params={"nlist": 128}
)

client.create_index(
    collection_name="hello_milvus",
    index_params=index_params
)

In [None]:
print(fmt.format("Start loading"))
client.load_collection("hello_milvus")

# -----------------------------------------------------------------------------
# search based on vector similarity
print(fmt.format("Start searching based on vector similarity"))
last_entity = entities[-1]  # Get the last entity
vectors_to_search = [last_entity["embeddings"]]  # Extract the embeddings vector and put it in a list
search_params = {
    "metric_type": "L2",
    "params": {"nprobe": 10},
}

start_time = time.time()
result = client.search(
    collection_name="hello_milvus",
    data=vectors_to_search, 
    anns_field="embeddings", 
    search_params=search_params, 
    limit=3, 
    output_fields=["random"]
)
end_time = time.time()

for hits in result:
    for hit in hits:
        print(f"hit: {hit}, random field: {hit.get('random')}")
print(search_latency_fmt.format(end_time - start_time))

# -----------------------------------------------------------------------------
# query based on scalar filtering(boolean, int, etc.)
print(fmt.format("Start querying with `random > 0.5`"))

start_time = time.time()
result = client.query(
    collection_name="hello_milvus",
    filter="random > 0.5", 
    output_fields=["random", "embeddings"]
)
end_time = time.time()

print(f"query result:\n-{result[0]}")
print(search_latency_fmt.format(end_time - start_time))

# -----------------------------------------------------------------------------
# pagination
r1 = client.query(
    collection_name="hello_milvus",
    filter="random > 0.5", 
    limit=4, 
    output_fields=["random"]
)
r2 = client.query(
    collection_name="hello_milvus",
    filter="random > 0.5", 
    offset=1, 
    limit=3, 
    output_fields=["random"]
)
print(f"query pagination(limit=4):\n\t{r1}")
print(f"query pagination(offset=1, limit=3):\n\t{r2}")


# -----------------------------------------------------------------------------
# filtered search
print(fmt.format("Start filtered searching with `random > 0.5`"))

start_time = time.time()
result = client.search(
    collection_name="hello_milvus",
    data=vectors_to_search, 
    anns_field="embeddings", 
    search_params=search_params, 
    limit=3, 
    filter="random > 0.5", 
    output_fields=["random"]
)
end_time = time.time()

for hits in result:
    for hit in hits:
        print(f"hit: {hit}, random field: {hit.get('random')}")
print(search_latency_fmt.format(end_time - start_time))