Skip to content

Commit

Permalink
enhance: autoindex for multi data type
Browse files Browse the repository at this point in the history
Signed-off-by: xianliang.li <xianliang.li@zilliz.com>

Signed-off-by: chasingegg <chao.gao@zilliz.com>
  • Loading branch information
foxspy authored and chasingegg committed Jun 14, 2024
1 parent f67b6dc commit 46b09d6
Show file tree
Hide file tree
Showing 16 changed files with 394 additions and 45 deletions.
62 changes: 47 additions & 15 deletions internal/proxy/task_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,19 +177,29 @@ func (cit *createIndexTask) parseIndexParams() error {

metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]

// override params by autoindex
for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
// override float vector index params by autoindex
for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
// override sparse float vector index params by autoindex
for k, v := range Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
// override binary vector index params by autoindex
for k, v := range Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}
}

if metricTypeExist {
// make the users' metric type first class citizen.
indexParamsMap[common.MetricTypeKey] = metricType
}
} else { // behavior change after 2.2.9, adapt autoindex logic here.
autoIndexConfig := Params.AutoIndexConfig.IndexParams.GetAsJSONMap()

useAutoIndex := func() {
useAutoIndex := func(autoIndexConfig map[string]string) {
fields := make([]zap.Field, 0, len(autoIndexConfig))
for k, v := range autoIndexConfig {
indexParamsMap[k] = v
Expand All @@ -198,13 +208,13 @@ func (cit *createIndexTask) parseIndexParams() error {
log.Ctx(cit.ctx).Info("AutoIndex triggered", fields...)
}

handle := func(numberParams int) error {
handle := func(numberParams int, autoIndexConfig map[string]string) error {
// empty case.
if len(indexParamsMap) == numberParams {
// though we already know there must be metric type, how to make this safer to avoid crash?
metricType := autoIndexConfig[common.MetricTypeKey]
cit.newExtraParams = wrapUserIndexParams(metricType)
useAutoIndex()
useAutoIndex(autoIndexConfig)
return nil
}

Expand All @@ -221,20 +231,31 @@ func (cit *createIndexTask) parseIndexParams() error {

// only metric type is passed.
cit.newExtraParams = wrapUserIndexParams(metricType)
useAutoIndex()
useAutoIndex(autoIndexConfig)
// make the users' metric type first class citizen.
indexParamsMap[common.MetricTypeKey] = metricType
}

return nil
}

var config map[string]string
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
// override float vector index params by autoindex
config = Params.AutoIndexConfig.IndexParams.GetAsJSONMap()
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
// override sparse float vector index params by autoindex
config = Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
// override binary vector index params by autoindex
config = Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()
}
if !exist {
if err := handle(0); err != nil {
if err := handle(0, config); err != nil {
return err
}
} else if specifyIndexType == AutoIndexName {
if err := handle(1); err != nil {
if err := handle(1, config); err != nil {
return err
}
}
Expand All @@ -250,10 +271,21 @@ func (cit *createIndexTask) parseIndexParams() error {
return err
}
}
if indexType == indexparamcheck.IndexSparseInverted || indexType == indexparamcheck.IndexSparseWand {
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
if !metricTypeExist || metricType != metric.IP {
return fmt.Errorf("only IP is the supported metric type for sparse index")
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
if !metricTypeExist {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "metric type not set for vector index")

Check warning on line 276 in internal/proxy/task_index.go

View check run for this annotation

Codecov / codecov/patch

internal/proxy/task_index.go#L276

Added line #L276 was not covered by tests
}
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
if !funcutil.SliceContain(indexparamcheck.FloatVectorMetrics, metricType) {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "float vector index does not support metric type: "+metricType)

Check warning on line 280 in internal/proxy/task_index.go

View check run for this annotation

Codecov / codecov/patch

internal/proxy/task_index.go#L280

Added line #L280 was not covered by tests
}
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
if metricType != metric.IP {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "only IP is the supported metric type for sparse index")
}
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
if !funcutil.SliceContain(indexparamcheck.BinaryVectorMetrics, metricType) {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "binary vector index does not support metric type: "+metricType)

Check warning on line 288 in internal/proxy/task_index.go

View check run for this annotation

Codecov / codecov/patch

internal/proxy/task_index.go#L288

Added line #L288 was not covered by tests
}
}
}
Expand Down
138 changes: 137 additions & 1 deletion internal/proxy/task_index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -963,22 +963,158 @@ func Test_wrapUserIndexParams(t *testing.T) {
assert.Equal(t, "L2", params[1].Value)
}

func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) {
paramtable.Init()
mgr := config.NewManager()
mgr.SetConfig("autoIndex.enable", "true")
Params.AutoIndexConfig.Enable.Init(mgr)

mgr.SetConfig("autoIndex.params.build", `{"M": 30,"efConstruction": 360,"index_type": "HNSW"}`)
mgr.SetConfig("autoIndex.params.sparse.build", `{"drop_ratio_build": 0.2, "index_type": "SPARSE_INVERTED_INDEX"}`)
mgr.SetConfig("autoIndex.params.binary.build", `{"nlist": 1024, "index_type": "BIN_IVF_FLAT"}`)
Params.AutoIndexConfig.IndexParams.Init(mgr)
Params.AutoIndexConfig.SparseIndexParams.Init(mgr)
Params.AutoIndexConfig.BinaryIndexParams.Init(mgr)

floatFieldSchema := &schemapb.FieldSchema{
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "128"},
},
}
sparseFloatFieldSchema := &schemapb.FieldSchema{
DataType: schemapb.DataType_SparseFloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "64"},
},
}
binaryFieldSchema := &schemapb.FieldSchema{
DataType: schemapb.DataType_BinaryVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "4096"},
},
}

t.Run("case 1, float vector parameters", func(t *testing.T) {
task := &createIndexTask{
fieldSchema: floatFieldSchema,
req: &milvuspb.CreateIndexRequest{
ExtraParams: []*commonpb.KeyValuePair{
{Key: common.MetricTypeKey, Value: "L2"},
},
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: "HNSW"},
{Key: common.MetricTypeKey, Value: "L2"},
{Key: "M", Value: "30"},
{Key: "efConstruction", Value: "360"},
}, task.newIndexParams)
})

t.Run("case 2, sparse vector parameters", func(t *testing.T) {
Params.AutoIndexConfig.IndexParams.Init(mgr)
task := &createIndexTask{
fieldSchema: sparseFloatFieldSchema,
req: &milvuspb.CreateIndexRequest{
ExtraParams: []*commonpb.KeyValuePair{
{Key: common.MetricTypeKey, Value: "IP"},
},
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: "SPARSE_INVERTED_INDEX"},
{Key: common.MetricTypeKey, Value: "IP"},
{Key: "drop_ratio_build", Value: "0.2"},
}, task.newIndexParams)
})

t.Run("case 3, binary vector parameters", func(t *testing.T) {
task := &createIndexTask{
fieldSchema: binaryFieldSchema,
req: &milvuspb.CreateIndexRequest{
ExtraParams: []*commonpb.KeyValuePair{
{Key: common.MetricTypeKey, Value: "JACCARD"},
},
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: "BIN_IVF_FLAT"},
{Key: common.MetricTypeKey, Value: "JACCARD"},
{Key: "nlist", Value: "1024"},
}, task.newIndexParams)
})
}

func Test_parseIndexParams_AutoIndex(t *testing.T) {
paramtable.Init()
mgr := config.NewManager()
mgr.SetConfig("autoIndex.enable", "false")
mgr.SetConfig("autoIndex.params.build", `{"M": 30,"efConstruction": 360,"index_type": "HNSW", "metric_type": "IP"}`)
mgr.SetConfig("autoIndex.params.binary.build", `{"nlist": 1024, "index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD"}`)
mgr.SetConfig("autoIndex.params.sparse.build", `{"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}`)
Params.AutoIndexConfig.Enable.Init(mgr)
Params.AutoIndexConfig.IndexParams.Init(mgr)
Params.AutoIndexConfig.BinaryIndexParams.Init(mgr)
Params.AutoIndexConfig.SparseIndexParams.Init(mgr)
autoIndexConfig := Params.AutoIndexConfig.IndexParams.GetAsJSONMap()
autoIndexConfigBinary := Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()
autoIndexConfigSparse := Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()
fieldSchema := &schemapb.FieldSchema{
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "8"},
},
}

t.Run("case 1, empty parameters", func(t *testing.T) {
fieldSchemaBinary := &schemapb.FieldSchema{
DataType: schemapb.DataType_BinaryVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "8"},
},
}

fieldSchemaSparse := &schemapb.FieldSchema{
DataType: schemapb.DataType_SparseFloatVector,
}

t.Run("case 1, empty parameters binary", func(t *testing.T) {
task := &createIndexTask{
fieldSchema: fieldSchemaBinary,
req: &milvuspb.CreateIndexRequest{
ExtraParams: make([]*commonpb.KeyValuePair, 0),
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: AutoIndexName},
{Key: common.MetricTypeKey, Value: autoIndexConfigBinary[common.MetricTypeKey]},
}, task.newExtraParams)
})

t.Run("case 1, empty parameters sparse", func(t *testing.T) {
task := &createIndexTask{
fieldSchema: fieldSchemaSparse,
req: &milvuspb.CreateIndexRequest{
ExtraParams: make([]*commonpb.KeyValuePair, 0),
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: AutoIndexName},
{Key: common.MetricTypeKey, Value: autoIndexConfigSparse[common.MetricTypeKey]},
}, task.newExtraParams)
})

t.Run("case 1, empty parameters float vector", func(t *testing.T) {
task := &createIndexTask{
fieldSchema: fieldSchema,
req: &milvuspb.CreateIndexRequest{
Expand Down
1 change: 1 addition & 0 deletions internal/querynodev2/optimizers/query_hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ func OptimizeSearchParams(ctx context.Context, req *querypb.SearchRequest, query
common.SearchParamKey: queryInfo.GetSearchParams(),
common.SegmentNumKey: estSegmentNum,
common.WithFilterKey: withFilter,
common.DataTypeKey: plan.GetVectorAnns().GetVectorType(),
common.WithOptimizeKey: paramtable.Get().AutoIndexConfig.EnableOptimize.GetAsBool(),
common.CollectionKey: req.GetReq().GetCollectionID(),
}
Expand Down
1 change: 1 addition & 0 deletions pkg/common/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ const (
SearchParamKey = "search_param"
SegmentNumKey = "segment_num"
WithFilterKey = "with_filter"
DataTypeKey = "data_type"
WithOptimizeKey = "with_optimize"
CollectionKey = "collection"

Expand Down
2 changes: 1 addition & 1 deletion pkg/util/indexparamcheck/base_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func (c baseChecker) CheckValidDataType(dType schemapb.DataType) error {
return nil
}

func (c baseChecker) SetDefaultMetricTypeIfNotExist(m map[string]string) {}
func (c baseChecker) SetDefaultMetricTypeIfNotExist(m map[string]string, dType schemapb.DataType) {}

Check warning on line 43 in pkg/util/indexparamcheck/base_checker.go

View check run for this annotation

Codecov / codecov/patch

pkg/util/indexparamcheck/base_checker.go#L43

Added line #L43 was not covered by tests

func (c baseChecker) StaticCheck(params map[string]string) error {
return errors.New("unsupported index type")
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/indexparamcheck/binary_vector_base_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func (c binaryVectorBaseChecker) CheckValidDataType(dType schemapb.DataType) err
return nil
}

func (c binaryVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string) {
func (c binaryVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
setDefaultIfNotExist(params, common.MetricTypeKey, BinaryVectorDefaultMetricType)
}

Expand Down
6 changes: 4 additions & 2 deletions pkg/util/indexparamcheck/constraints.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ const (
SparseDropRatioBuild = "drop_ratio_build"
)

// METRICS is a set of all metrics types supported for float vector.
var METRICS = []string{metric.L2, metric.IP, metric.COSINE} // const
var (
FloatVectorMetrics = []string{metric.L2, metric.IP, metric.COSINE} // const
BinaryVectorMetrics = []string{metric.HAMMING, metric.JACCARD, metric.SUBSTRUCTURE, metric.SUPERSTRUCTURE} // const
)

// BinIDMapMetrics is a set of all metric types supported for binary vector.
var (
Expand Down
6 changes: 3 additions & 3 deletions pkg/util/indexparamcheck/float_vector_base_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ type floatVectorBaseChecker struct {
}

func (c floatVectorBaseChecker) staticCheck(params map[string]string) error {
if !CheckStrByValues(params, Metric, METRICS) {
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], METRICS)
if !CheckStrByValues(params, Metric, FloatVectorMetrics) {
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], FloatVectorMetrics)
}

return nil
Expand All @@ -35,7 +35,7 @@ func (c floatVectorBaseChecker) CheckValidDataType(dType schemapb.DataType) erro
return nil
}

func (c floatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string) {
func (c floatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
setDefaultIfNotExist(params, common.MetricTypeKey, FloatVectorDefaultMetricType)
}

Expand Down
13 changes: 12 additions & 1 deletion pkg/util/indexparamcheck/hnsw_checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@ import (
"fmt"

"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/typeutil"
)

type hnswChecker struct {
floatVectorBaseChecker
baseChecker
}

func (c hnswChecker) StaticCheck(params map[string]string) error {
Expand Down Expand Up @@ -38,6 +39,16 @@ func (c hnswChecker) CheckValidDataType(dType schemapb.DataType) error {
return nil
}

func (c hnswChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
if typeutil.IsDenseFloatVectorType(dType) {
setDefaultIfNotExist(params, common.MetricTypeKey, FloatVectorDefaultMetricType)
} else if typeutil.IsSparseFloatVectorType(dType) {
setDefaultIfNotExist(params, common.MetricTypeKey, SparseFloatVectorDefaultMetricType)
} else if typeutil.IsBinaryVectorType(dType) {
setDefaultIfNotExist(params, common.MetricTypeKey, BinaryVectorDefaultMetricType)
}
}

func newHnswChecker() IndexChecker {
return &hnswChecker{}
}
Loading

0 comments on commit 46b09d6

Please sign in to comment.