Skip to content

Commit

Permalink
Check and repair diskann index params
Browse files Browse the repository at this point in the history
Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
  • Loading branch information
xiaocai2333 committed Nov 10, 2023
1 parent a6d871e commit bfd6409
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 0 deletions.
Empty file added audit_2023_1110_194212.log
Empty file.
Empty file added audit_2023_1110_195156.log
Empty file.
1 change: 1 addition & 0 deletions states/etcd/commands.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ func RepairCommand(cli clientv3.KV, basePath string) *cobra.Command {
repair.EmptySegmentCommand(cli, basePath),
// repair miss index metric_type
repair.IndexMetricCommand(cli, basePath),
repair.DiskAnnIndexParamsCommand(cli, basePath),
)

return repairCmd
Expand Down
145 changes: 145 additions & 0 deletions states/etcd/repair/index_parmas.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
package repair

import (
"fmt"

"github.com/spf13/cobra"

commonpbv2 "github.com/milvus-io/birdwatcher/proto/v2.2/commonpb"
indexpbv2 "github.com/milvus-io/birdwatcher/proto/v2.2/indexpb"
clientv3 "go.etcd.io/etcd/client/v3"
)

// DiskAnnIndexParamsCommand return repair segment command.
func DiskAnnIndexParamsCommand(cli clientv3.KV, basePath string) *cobra.Command {
cmd := &cobra.Command{
Use: "diskann_index_params",
Aliases: []string{"diskann_index_params"},
Short: "check index parma and try to repair",
Run: func(cmd *cobra.Command, args []string) {
collID, err := cmd.Flags().GetInt64("collection")
if err != nil {
fmt.Println(err.Error())
return
}
run, err := cmd.Flags().GetBool("run")
if err != nil {
fmt.Println(err.Error())
return
}
indexes, err := listIndexMetaV2(cli, basePath)
if err != nil {
fmt.Println(err.Error())
return
}
DISKANNParamsMap := map[string]struct{}{
"index_type": {},
"max_degree": {},
"search_list_size": {},
"pq_code_budget_gb": {},
"build_dram_budget_gb": {},
"disk_pq_dims": {},
"partition_limit": {},
"accelerate_build": {},
"search_cache_budget_gb": {},
"warm_up": {},
"use_bfs_cache": {},
"beamwidth": {},
"min_k": {},
"max_k": {},
"search_list_and_k_ratio": {},
"filter_threshold": {},
"metric_type": {},
"k": {},
"num_build_thread": {},
"retrieve_friendly": {},
"data_path": {},
"index_prefix": {},
"build_quant_type": {},
"search_quant_type": {},
"radius": {},
"range_filter": {},
"trace_visit": {},
"enable_mmap": {},
"for_tuning": {},
"pq_code_budget_gb_ratio": {},
"num_build_thread_ratio": {},
"search_cache_budget_gb_ratio": {},
"num_load_thread_ratio": {},
"beamwidth_ratio": {},
}
newIndexes := make([]*indexpbv2.FieldIndex, 0)
unnecessaryParamsMap := make(map[int64][]string, 0)
for _, index := range indexes {
if collID != 0 && index.IndexInfo.CollectionID != collID {
continue
}
newIndex := &indexpbv2.FieldIndex{
IndexInfo: &indexpbv2.IndexInfo{
CollectionID: index.GetIndexInfo().GetCollectionID(),
FieldID: index.GetIndexInfo().GetFieldID(),
IndexName: index.GetIndexInfo().GetIndexName(),
IndexID: index.GetIndexInfo().GetIndexID(),
TypeParams: index.GetIndexInfo().GetTypeParams(),
IndexParams: make([]*commonpbv2.KeyValuePair, 0),
IndexedRows: index.GetIndexInfo().GetIndexedRows(),
TotalRows: index.GetIndexInfo().GetTotalRows(),
State: index.GetIndexInfo().GetState(),
IndexStateFailReason: index.GetIndexInfo().GetIndexStateFailReason(),
IsAutoIndex: index.GetIndexInfo().GetIsAutoIndex(),
UserIndexParams: index.GetIndexInfo().GetUserIndexParams(),
},
Deleted: index.GetDeleted(),
CreateTime: index.GetCreateTime(),
}
indexType := ""
for _, pair := range index.IndexInfo.IndexParams {
if pair.Key == "index_type" {
indexType = pair.Value
}
}
if indexType != "DISKANN" {
continue
}
unnecessaryParams := make([]string, 0)
for _, pair := range index.IndexInfo.IndexParams {
if _, ok := DISKANNParamsMap[pair.Key]; !ok {
unnecessaryParams = append(unnecessaryParams, pair.Key)
continue
}
newIndex.IndexInfo.IndexParams = append(newIndex.IndexInfo.IndexParams, pair)
}
unnecessaryParamsMap[newIndex.IndexInfo.IndexID] = unnecessaryParams
newIndexes = append(newIndexes, newIndex)

}
if !run {
fmt.Println("has unnecessary params index:")
fmt.Println(unnecessaryParamsMap)
fmt.Println("after repair index:")
for _, index := range newIndexes {
printIndexV2(*index)
}
return
}
for _, index := range newIndexes {
if err := writeRepairedIndex(cli, basePath, index); err != nil {
fmt.Println(err.Error())
return
}
}
afterRepairIndexes, err := listIndexMetaV2(cli, basePath)
if err != nil {
fmt.Println(err.Error())
return
}
for _, index := range afterRepairIndexes {
printIndexV2(index)
}
},
}

cmd.Flags().Int64("collection", 0, "collection id to filter with")
cmd.Flags().Bool("run", false, "actual do repair")
return cmd
}

0 comments on commit bfd6409

Please sign in to comment.