From a1f6142f38fc2130a522cf78e69ebc502f69bf92 Mon Sep 17 00:00:00 2001 From: JiYou Date: Wed, 3 Oct 2018 18:37:38 -0700 Subject: [PATCH] VersionSet: GetOverlappingInputs() fix overflow and optimize. (#4385) Summary: This fix is for `level == 0` in `GetOverlappingInputs()`: - In `GetOverlappingInputs()`, if `level == 0`, it has potential risk of overflow if `i == 0`. - Optmize process when `expand = true`, the expected complexity can be reduced to O(n). Signed-off-by: JiYou Pull Request resolved: https://github.com/facebook/rocksdb/pull/4385 Differential Revision: D10181001 Pulled By: riversand963 fbshipit-source-id: 46eef8a1d1605c9329c164e6471cd5c5b6de16b5 --- db/version_set.cc | 82 +++++++++++++++++++++++++++++------------------ 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/db/version_set.cc b/db/version_set.cc index cb950cad204..2b3106c009a 100644 --- a/db/version_set.cc +++ b/db/version_set.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include "db/compaction.h" #include "db/internal_stats.h" #include "db/log_reader.h" @@ -2034,13 +2035,6 @@ void VersionStorageInfo::GetOverlappingInputs( } inputs->clear(); - Slice user_begin, user_end; - if (begin != nullptr) { - user_begin = begin->user_key(); - } - if (end != nullptr) { - user_end = end->user_key(); - } if (file_index) { *file_index = -1; } @@ -2051,33 +2045,59 @@ void VersionStorageInfo::GetOverlappingInputs( return; } - for (size_t i = 0; i < level_files_brief_[level].num_files; ) { - FdWithKeyRange* f = &(level_files_brief_[level].files[i++]); - const Slice file_start = ExtractUserKey(f->smallest_key); - const Slice file_limit = ExtractUserKey(f->largest_key); - if (begin != nullptr && user_cmp->Compare(file_limit, user_begin) < 0) { - // "f" is completely before specified range; skip it - } else if (end != nullptr && user_cmp->Compare(file_start, user_end) > 0) { - // "f" is completely after specified range; skip it - } else { - inputs->push_back(files_[level][i-1]); - if (level == 0 && expand_range) { - // Level-0 files may overlap each other. So check if the newly - // added file has expanded the range. If so, restart search. - if (begin != nullptr && user_cmp->Compare(file_start, user_begin) < 0) { - user_begin = file_start; - inputs->clear(); - i = 0; - } else if (end != nullptr - && user_cmp->Compare(file_limit, user_end) > 0) { - user_end = file_limit; - inputs->clear(); - i = 0; + Slice user_begin, user_end; + if (begin != nullptr) { + user_begin = begin->user_key(); + } + if (end != nullptr) { + user_end = end->user_key(); + } + + // index stores the file index need to check. + std::list index; + for (size_t i = 0; i < level_files_brief_[level].num_files; i++) { + index.emplace_back(i); + } + + while (!index.empty()) { + bool found_overlapping_file = false; + auto iter = index.begin(); + while (iter != index.end()) { + FdWithKeyRange* f = &(level_files_brief_[level].files[*iter]); + const Slice file_start = ExtractUserKey(f->smallest_key); + const Slice file_limit = ExtractUserKey(f->largest_key); + if (begin != nullptr && user_cmp->Compare(file_limit, user_begin) < 0) { + // "f" is completely before specified range; skip it + iter++; + } else if (end != nullptr && user_cmp->Compare(file_start, user_end) > 0) { + // "f" is completely after specified range; skip it + iter++; + } else { + // if overlap + inputs->emplace_back(files_[level][*iter]); + found_overlapping_file = true; + // record the first file index. + if (file_index && *file_index == -1) { + *file_index = static_cast(*iter); + } + // the related file is overlap, erase to avoid checking again. + iter = index.erase(iter); + if (expand_range) { + if (begin != nullptr && + user_cmp->Compare(file_start, user_begin) < 0) { + user_begin = file_start; + } + if (end != nullptr && + user_cmp->Compare(file_limit, user_end) > 0) { + user_end = file_limit; + } } - } else if (file_index) { - *file_index = static_cast(i) - 1; } } + // if all the files left are not overlap, break + if (!found_overlapping_file) { + break; + } } }