Skip to content

Commit

Permalink
fix: Reduce duplicate PKs in segcore (#34267) (#34302)
Browse files Browse the repository at this point in the history
issue: issue: #34247

pr: #34267

Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
  • Loading branch information
bigsheeper committed Jul 1, 2024
1 parent 14a11e3 commit 3c5ad49
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
17 changes: 10 additions & 7 deletions internal/core/src/segcore/InsertRecord.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ class OffsetOrderedMap : public OffsetMap {
bool false_filtered_out) const override {
std::shared_lock<std::shared_mutex> lck(mtx_);

if (limit == Unlimited || limit == NoLimit) {
limit = map_.size();
}

// TODO: we can't retrieve pk by offset very conveniently.
// Selectivity should be done outside.
return find_first_by_index(limit, bitset, false_filtered_out);
Expand All @@ -138,15 +142,15 @@ class OffsetOrderedMap : public OffsetMap {
if (!false_filtered_out) {
cnt = size - bitset.count();
}
if (limit == Unlimited || limit == NoLimit) {
limit = cnt;
}
limit = std::min(limit, cnt);
std::vector<int64_t> seg_offsets;
seg_offsets.reserve(limit);
auto it = map_.begin();
for (; hit_num < limit && it != map_.end(); it++) {
for (auto seg_offset : it->second) {
// Offsets in the growing segment are ordered by timestamp,
// so traverse from back to front to obtain the latest offset.
for (int i = it->second.size() - 1; i >= 0; --i) {
auto seg_offset = it->second[i];
if (seg_offset >= size) {
// Frequently concurrent insert/query will cause this case.
continue;
Expand All @@ -155,9 +159,8 @@ class OffsetOrderedMap : public OffsetMap {
if (!(bitset[seg_offset] ^ false_filtered_out)) {
seg_offsets.push_back(seg_offset);
hit_num++;
if (hit_num >= limit) {
break;
}
// PK hit, no need to continue traversing offsets with the same PK.
break;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion internal/core/unittest/test_c_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) {
auto suc = query_result->ParseFromArray(retrieve_result.proto_blob,
retrieve_result.proto_size);
ASSERT_TRUE(suc);
ASSERT_EQ(query_result->ids().int_id().data().size(), 6);
ASSERT_EQ(query_result->ids().int_id().data().size(), 3);
DeleteRetrieveResult(&retrieve_result);

// delete data pks = {1, 2, 3}
Expand Down

0 comments on commit 3c5ad49

Please sign in to comment.