From 39e341e83a70023f3cc58c78768437a229418c72 Mon Sep 17 00:00:00 2001 From: Buqian Zheng Date: Fri, 7 Jun 2024 18:36:07 +0800 Subject: [PATCH] fix: [2.4] update check for sparse hnsw index (#33714) issue: #29419 pr: #33713 Signed-off-by: Buqian Zheng --- internal/proxy/task_index.go | 12 +++++++++- pkg/common/common.go | 2 ++ pkg/util/indexparamcheck/base_checker.go | 23 +++++++++++++++++-- pkg/util/indexparamcheck/base_checker_test.go | 16 +++++++++++++ 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/internal/proxy/task_index.go b/internal/proxy/task_index.go index b8e0f88af3c9..151051947b14 100644 --- a/internal/proxy/task_index.go +++ b/internal/proxy/task_index.go @@ -51,6 +51,7 @@ const ( AutoIndexName = "AUTOINDEX" DimKey = common.DimKey + IsSparseKey = common.IsSparseKey ) type createIndexTask struct { @@ -350,10 +351,15 @@ func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) erro return fmt.Errorf("invalid index type: %s", indexType) } - if !typeutil.IsSparseFloatVectorType(field.DataType) { + isSparse := typeutil.IsSparseFloatVectorType(field.DataType) + + if !isSparse { if err := fillDimension(field, indexParams); err != nil { return err } + } else { + // used only for checker, should be deleted after checking + indexParams[IsSparseKey] = "true" } if err := checker.CheckValidDataType(field.GetDataType()); err != nil { @@ -366,6 +372,10 @@ func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) erro return err } + if isSparse { + delete(indexParams, IsSparseKey) + } + return nil } diff --git a/pkg/common/common.go b/pkg/common/common.go index 2b9ebc4d8208..a7cd25503635 100644 --- a/pkg/common/common.go +++ b/pkg/common/common.go @@ -110,6 +110,8 @@ const ( MaxCapacityKey = "max_capacity" DropRatioBuildKey = "drop_ratio_build" + + IsSparseKey = "is_sparse" ) // Collection properties key diff --git a/pkg/util/indexparamcheck/base_checker.go b/pkg/util/indexparamcheck/base_checker.go index 50716a9d91cc..c9f8f7bbdaad 100644 --- a/pkg/util/indexparamcheck/base_checker.go +++ b/pkg/util/indexparamcheck/base_checker.go @@ -19,18 +19,37 @@ package indexparamcheck import ( "fmt" "math" + "strings" "github.com/cockroachdb/errors" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/common" ) type baseChecker struct{} func (c baseChecker) CheckTrain(params map[string]string) error { // vector dimension should be checked on collection creation. this is just some basic check - if !CheckIntByRange(params, DIM, 1, math.MaxInt) { - return fmt.Errorf("failed to check vector dimension, should be larger than 0 and smaller than math.MaxInt") + isSparse := false + if val, exist := params[common.IsSparseKey]; exist { + val = strings.ToLower(val) + if val != "true" && val != "false" { + return fmt.Errorf("invalid is_sparse value: %s, must be true or false", val) + } + if val == "true" { + isSparse = true + } + } + if isSparse { + if !CheckStrByValues(params, Metric, SparseMetrics) { + return fmt.Errorf("metric type not found or not supported for sparse float vectors, supported: %v", SparseMetrics) + } + } else { + // we do not check dim for sparse + if !CheckIntByRange(params, DIM, 1, math.MaxInt) { + return fmt.Errorf("failed to check vector dimension, should be larger than 0 and smaller than math.MaxInt") + } } return nil } diff --git a/pkg/util/indexparamcheck/base_checker_test.go b/pkg/util/indexparamcheck/base_checker_test.go index a016d4da8849..45eb4e22cc5b 100644 --- a/pkg/util/indexparamcheck/base_checker_test.go +++ b/pkg/util/indexparamcheck/base_checker_test.go @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/metric" ) @@ -18,12 +19,27 @@ func Test_baseChecker_CheckTrain(t *testing.T) { paramsWithoutDim := map[string]string{ Metric: metric.L2, } + sparseParamsWithoutDim := map[string]string{ + Metric: metric.IP, + common.IsSparseKey: "tRue", + } + sparseParamsWrongMetric := map[string]string{ + Metric: metric.L2, + common.IsSparseKey: "True", + } + badSparseParams := map[string]string{ + Metric: metric.IP, + common.IsSparseKey: "ds", + } cases := []struct { params map[string]string errIsNil bool }{ {validParams, true}, {paramsWithoutDim, false}, + {sparseParamsWithoutDim, true}, + {sparseParamsWrongMetric, false}, + {badSparseParams, false}, } c := newBaseChecker()