Skip to content

[Bug]: [Add Field] Add field is stuck when continuously add the field to the 64th with non-stop query(count(*)) #41808

@binbinlv

Description

@binbinlv

Is there an existing issue for this?

  • I have searched the existing issues

Environment

- Milvus version:master-20250512-476984c5-amd64
- Deployment mode(standalone or cluster):both
- MQ type(rocksmq, pulsar or kafka):    all
- SDK version(e.g. pymilvus v2.0.0rc2): 2.6.0rc121
- OS(Ubuntu or CentOS): 
- CPU/Memory: 
- GPU: 
- Others:

Current Behavior

Add field stuck when continuously add the field t the 64th with non-stop query(count(*))

2025-05-13 17:13:07.907 | INFO     | __main__:<module>:31 - added field 58 (original 4 fields, and add 59 fields, totally 63 fields now, and creating the 64th field)

then the program stuck

Expected Behavior

Success to add field

Steps To Reproduce

concurrent query count(*) script 1:

from pymilvus import MilvusClient, DataType
import numpy as np
import random

from loguru import logger
import time

uri = ""
client = MilvusClient(uri=uri)

dim = 128
collection_name = "customized_setup_1"
if client.has_collection(collection_name):
   client.drop_collection(collection_name)
   logger.info("dropped")


schema = client.create_schema()
schema.add_field(field_name="my_id", datatype=DataType.INT64, is_primary=True)
schema.add_field(field_name="my_vector", datatype=DataType.FLOAT_VECTOR, dim=dim)
schema.add_field(field_name="my_varchar", datatype=DataType.VARCHAR, max_length=512)

index_params = client.prepare_index_params()
index_params.add_index(field_name="my_vector", index_type="AUTOINDEX", metric_type="COSINE")
client.create_collection(collection_name=collection_name, schema=schema, index_params=index_params)
logger.info("created collection")


field_schema = client.create_field_schema(name="field_new", data_type=DataType.INT64, max_length=100, nullable=True, is_clustering_key=False)
logger.info(field_schema)
res = client.add_collection_field(collection_name=collection_name, field_schema=field_schema)
logger.info("added field")
logger.info(res)
res = client.describe_collection(collection_name)
logger.info(res)


nb = 50000
vectors = [[random.random() for _ in range(dim)] for _ in range(2*nb)]

data = [{"my_id": i, "my_vector": vectors[i], "my_varchar": "varchar", "field_new": i} for i in range(nb)]
client.insert(collection_name="customized_setup_1", data=data)
logger.info("inserted")
logger.info("start to flush")
client.flush(collection_name="customized_setup_1")
logger.info("flushed")

logger.info("start to compact")
is_clustering = True
compact_id = client.compact(collection_name="customized_setup_1", is_clustering=is_clustering)
cost = 180
start = time.time()
while True:
   time.sleep(1)
   res = client.get_compaction_state(compact_id, is_clustering=is_clustering)
   logger.info(res)
   if res == "Completed":
        break
   if time.time() - start > cost:
      raise Exception(1, f"Compact after index cost more than {cost}s")
logger.info("compacted")

#res = client.delete(collection_name, filter="field_new == 0")
#logger.info(res)

while True:
   res = client.query(collection_name, output_fields=["count(*)"])
   logger.info(res)
   logger.info("queried")

concurrent add field script 2:

from pymilvus import MilvusClient, DataType
import numpy as np
import random

from loguru import logger
import time

uri = ""
client = MilvusClient(uri=uri)

analyzer_params_built_in = {
    "type": "english"
}

# Verify built-in analyzer configuration
sample_text = "Milvus simplifies text analysis for search."
result = client.run_analyzer(sample_text, analyzer_params_built_in)
logger.info(f"Built-in analyzer output: {result}")

dim = 128
collection_name = "customized_setup_1"
i = 0
while i >= 0:
#   field_schema = client.create_field_schema(name=f"field_new_{i}", data_type=DataType.VARCHAR, max_length=100, nullable=True, is_clustering_key=False, enable_analyzer=True, analyzer_params=analyzer_params_built_in,enable_match=True)
   enable = False
   if i == 0:
       enable = True
   field_schema = client.create_field_schema(name=f"field_new_{i}", data_type=DataType.VARCHAR, max_length=100, nullable=True, mmap_enabled=True, is_clustering_key=enable, enable_analyzer=True)
   logger.info(field_schema)
   res = client.add_collection_field(collection_name=collection_name, field_schema=field_schema)
   logger.info(f"added field {i}")
   res = client.describe_collection(collection_name)
   logger.info(res)
   index_params = client.prepare_index_params()
   index_params.add_index(field_name=f"field_new_{i}", index_type="INVERTED", index_name = f"index_name_{i}", params={"mmap.enabled": True})
   client.create_index(collection_name, index_params)
   logger.info("created indexes")
   index_list = client.list_indexes(collection_name)
   for single in index_list:
      res = client.describe_index(collection_name, single)
      logger.info(res)
   i = i + 1

Milvus Log

https://grafana-4am.zilliz.cc/explore?orgId=1&left=%7B%22datasource%22:%22Loki%22,%22queries%22:%5B%7B%22refId%22:%22A%22,%22expr%22:%22%7Bcluster%3D%5C%22devops%5C%22,namespace%3D%5C%22chaos-testing%5C%22,pod%3D~%5C%22add-field-master-luqbp.*%5C%22%7D%22%7D%5D,%22range%22:%7B%22from%22:%22now-1h%22,%22to%22:%22now%22%7D%7D

Anything else?

timeline:

2025-05-13 17:13:07.907 | INFO     | __main__:<module>:31 - added field 58
2025-05-13 17:13:08.615 | INFO     | __main__:<module>:37 - created indexes
2025-05-13 17:13:10.562: show index

then the script is stuck

Metadata

Metadata

Labels

kind/bugIssues or changes related a bugtriage/acceptedIndicates an issue or PR is ready to be actively worked on.

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions