From 23fea06477e5512edd6c6efeeb697d54516bfa1a Mon Sep 17 00:00:00 2001 From: MrPresent-Han Date: Thu, 11 Jul 2024 05:34:56 -0400 Subject: [PATCH] enhance: optimize search reduce perf(#32507) Signed-off-by: MrPresent-Han --- internal/core/src/segcore/reduce/Reduce.cpp | 34 ++++++++++----------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/internal/core/src/segcore/reduce/Reduce.cpp b/internal/core/src/segcore/reduce/Reduce.cpp index b0086d69011a..a2263c5cec55 100644 --- a/internal/core/src/segcore/reduce/Reduce.cpp +++ b/internal/core/src/segcore/reduce/Reduce.cpp @@ -91,17 +91,21 @@ ReduceHelper::FilterInvalidSearchResult(SearchResult* search_result) { auto& offsets = search_result->seg_offsets_; auto& distances = search_result->distances_; + int segment_row_count = segment->get_row_count(); + //1. for sealed segment, segment_row_count will not change as delete records will take effect as bitset + //2. for growing segment, segment_row_count is the minimum position acknowledged, which will only increase after + //the time at which the search operation is executed, so it's safe here to keep this value inside stack for (auto i = 0; i < nq; ++i) { for (auto j = 0; j < topK; ++j) { auto index = i * topK + j; if (offsets[index] != INVALID_SEG_OFFSET) { AssertInfo(0 <= offsets[index] && - offsets[index] < segment->get_row_count(), + offsets[index] < segment_row_count, fmt::format("invalid offset {}, segment {} with " "rows num {}, data or index corruption", offsets[index], segment->get_segment_id(), - segment->get_row_count())); + segment_row_count)); real_topks[i]++; offsets[valid_index] = offsets[index]; distances[valid_index] = distances[index]; @@ -136,23 +140,22 @@ ReduceHelper::FillPrimaryKey() { search_results_[valid_index++] = search_result; } } - search_results_.resize(valid_index); - num_segments_ = search_results_.size(); + if(valid_index < search_results_.size()) { + search_results_.resize(valid_index); + num_segments_ = search_results_.size(); + } } void ReduceHelper::RefreshSearchResults() { tracer::AutoSpan span( "ReduceHelper::RefreshSearchResults", trace_ctx_, false); + std::vector real_topks(total_nq_, 0); for (int i = 0; i < num_segments_; i++) { - std::vector real_topks(total_nq_, 0); auto search_result = search_results_[i]; if (search_result->result_offsets_.size() != 0) { RefreshSingleSearchResult(search_result, i, real_topks); } - std::partial_sum(real_topks.begin(), - real_topks.end(), - search_result->topk_per_nq_prefix_sum_.begin() + 1); } } @@ -164,23 +167,20 @@ ReduceHelper::RefreshSingleSearchResult(SearchResult* search_result, for (int j = 0; j < total_nq_; j++) { size += final_search_records_[seg_res_idx][j].size(); } - std::vector primary_keys(size); - std::vector distances(size); - std::vector seg_offsets(size); uint32_t index = 0; for (int j = 0; j < total_nq_; j++) { for (auto offset : final_search_records_[seg_res_idx][j]) { - primary_keys[index] = search_result->primary_keys_[offset]; - distances[index] = search_result->distances_[offset]; - seg_offsets[index] = search_result->seg_offsets_[offset]; + search_result->primary_keys_[index] = search_result->primary_keys_[offset]; + search_result->distances_[index] = search_result->distances_[offset]; + search_result->seg_offsets_[index] = search_result->seg_offsets_[offset]; index++; real_topks[j]++; } } - search_result->primary_keys_.swap(primary_keys); - search_result->distances_.swap(distances); - search_result->seg_offsets_.swap(seg_offsets); + search_result->primary_keys_.resize(index); + search_result->distances_.resize(index); + search_result->seg_offsets_.resize(index); } void