Skip to content

Commit

Permalink
enhance: [cherry-pick] Refine index code and support analyze data (#3…
Browse files Browse the repository at this point in the history
…4311)

This PR primary picks up the support analyzing functionality, including
the following commits:
- main functionality: #33651
- refine indexnode code: #33458
- related fixes:
  - #33832
  - #33161

issue: #30633 
master prs: #33651 , #33458 , #33832 , #33161

---------

Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
Signed-off-by: Patrick Weizhi Xu <weizhi.xu@zilliz.com>
Co-authored-by: chasingegg <chao.gao@zilliz.com>
Co-authored-by: Patrick Weizhi Xu <weizhi.xu@zilliz.com>
  • Loading branch information
3 people committed Jul 2, 2024
1 parent 3c5ad49 commit c924b0b
Show file tree
Hide file tree
Showing 81 changed files with 8,806 additions and 3,673 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,7 @@ generate-mockery-datacoord: getdeps
$(INSTALL_PATH)/mockery --name=ChannelManager --dir=internal/datacoord --filename=mock_channelmanager.go --output=internal/datacoord --structname=MockChannelManager --with-expecter --inpackage
$(INSTALL_PATH)/mockery --name=SubCluster --dir=internal/datacoord --filename=mock_subcluster.go --output=internal/datacoord --structname=MockSubCluster --with-expecter --inpackage
$(INSTALL_PATH)/mockery --name=Broker --dir=internal/datacoord/broker --filename=mock_coordinator_broker.go --output=internal/datacoord/broker --structname=MockBroker --with-expecter --inpackage
$(INSTALL_PATH)/mockery --name=WorkerManager --dir=internal/datacoord --filename=mock_worker_manager.go --output=internal/datacoord --structname=MockWorkerManager --with-expecter --inpackage

generate-mockery-datanode: getdeps
$(INSTALL_PATH)/mockery --name=Allocator --dir=$(PWD)/internal/datanode/allocator --output=$(PWD)/internal/datanode/allocator --filename=mock_allocator.go --with-expecter --structname=MockAllocator --outpkg=allocator --inpackage
Expand Down
29 changes: 29 additions & 0 deletions configs/milvus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,30 @@ dataCoord:
rpcTimeout: 10
maxParallelTaskNum: 10
workerMaxParallelTaskNum: 2
clustering:
enable: true
autoEnable: false
triggerInterval: 600
stateCheckInterval: 10
gcInterval: 600
minInterval: 3600
maxInterval: 259200
newDataRatioThreshold: 0.2
newDataSizeThreshold: 512m
timeout: 7200
dropTolerance: 86400
# clustering compaction will try best to distribute data into segments with size range in [preferSegmentSize, maxSegmentSize].
# data will be clustered by preferSegmentSize, if a cluster is larger than maxSegmentSize, will spilt it into multi segment
# buffer between (preferSegmentSize, maxSegmentSize) is left for new data in the same cluster(range), to avoid globally redistribute too often
preferSegmentSize: 512m
maxSegmentSize: 1024m
maxTrainSizeRatio: 0.8 # max data size ratio in analyze, if data is larger than it, will down sampling to meet this limit
maxCentroidsNum: 10240
minCentroidsNum: 16
minClusterSizeRatio: 0.01
maxClusterSizeRatio: 10
maxClusterSize: 5g

levelzero:
forceTrigger:
minSize: 8388608 # The minmum size in bytes to force trigger a LevelZero Compaction, default as 8MB
Expand Down Expand Up @@ -542,6 +566,9 @@ dataNode:
slot:
slotCap: 2 # The maximum number of tasks(e.g. compaction, importing) allowed to run concurrently on a datanode.

clusteringCompaction:
memoryBufferRatio: 0.1 # The ratio of memory buffer of clustering compaction. Data larger than threshold will be spilled to storage.

# Configures the system log output.
log:
level: info # Only supports debug, info, warn, error, panic, or fatal. Default 'info'.
Expand Down Expand Up @@ -622,6 +649,8 @@ common:
traceLogMode: 0 # trace request info
bloomFilterSize: 100000 # bloom filter initial size
maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter
usePartitionKeyAsClusteringKey: false
useVectorAsClusteringKey: false

# QuotaConfig, configurations of Milvus quota and limits.
# By default, we enable:
Expand Down
6 changes: 6 additions & 0 deletions internal/core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,12 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/indexbuilder/
FILES_MATCHING PATTERN "*_c.h"
)

# Install clustering
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/clustering/
DESTINATION include/clustering
FILES_MATCHING PATTERN "*_c.h"
)

# Install common
install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/src/common/
DESTINATION include/common
Expand Down
1 change: 1 addition & 0 deletions internal/core/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,6 @@ add_subdirectory( index )
add_subdirectory( query )
add_subdirectory( segcore )
add_subdirectory( indexbuilder )
add_subdirectory( clustering )
add_subdirectory( exec )
add_subdirectory( bitset )
24 changes: 24 additions & 0 deletions internal/core/src/clustering/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Copyright (C) 2019-2020 Zilliz. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under the License


set(CLUSTERING_FILES
analyze_c.cpp
KmeansClustering.cpp
)

milvus_add_pkg_config("milvus_clustering")
add_library(milvus_clustering SHARED ${CLUSTERING_FILES})

# link order matters
target_link_libraries(milvus_clustering milvus_index)

install(TARGETS milvus_clustering DESTINATION "${CMAKE_INSTALL_LIBDIR}")
Loading

0 comments on commit c924b0b

Please sign in to comment.