diff --git a/CHANGELOG.md b/CHANGELOG.md index 408cedcb54e..c9242f790ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * Improved query performance for unindexed integer columns when the query has a chain of OR conditions. This will improve performance of "IN" queries generated by SDKs. ([PR #2888](https://github.com/realm/realm-sync/issues/2888). +* Use search index in queries on integer columns (equality only) ### Fixed * ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?) diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index 84fdc29f805..7b64ba67cb5 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -885,6 +885,27 @@ class IntegerNode : public IntegerNodeBase { : BaseType(value, column_ndx) { } + ~IntegerNode() + { + if (m_result.is_attached()) { + m_result.destroy(); + } + } + + void init() override + { + BaseType::init(); + m_nb_needles = m_needles.size(); + + if (has_search_index()) { + ref_type ref = IntegerColumn::create(Allocator::get_default()); + m_result.init_from_ref(Allocator::get_default(), ref); + + IntegerNodeBase::m_condition_column->find_all(m_result, this->m_value, 0, realm::npos); + m_index_get = 0; + m_index_end = m_result.size(); + } + } void consume_condition(IntegerNode* other) { @@ -904,7 +925,21 @@ class IntegerNode : public IntegerNodeBase { size_t find_first_local(size_t start, size_t end) override { REALM_ASSERT(this->m_table); - size_t nb_needles = m_needles.size(); + + if (has_search_index()) { + while (m_index_get < m_index_end) { + size_t ndx = size_t(m_result.get(m_index_get)); + if (ndx >= end) { + break; + } + m_index_get++; + if (ndx >= start) { + return ndx; + } + } + return not_found; + } + while (start < end) { // Cache internal leaves @@ -918,28 +953,8 @@ class IntegerNode : public IntegerNodeBase { auto start2 = start - this->m_leaf_start; size_t s = realm::npos; - if (nb_needles) { - const auto not_in_set = m_needles.end(); - bool search = nb_needles < 22; - auto cmp_fn = [this, search, not_in_set](const auto& v) { - if (search) { - for (auto it = m_needles.begin(); it != not_in_set; ++it) { - if (*it == v) - return true; - } - return false; - } - else { - return (m_needles.find(v) != not_in_set); - } - }; - for (size_t i = start2; i < end2; ++i) { - auto val = this->m_leaf_ptr->get(i); - if (cmp_fn(val)) { - s = i; - break; - } - } + if (m_nb_needles) { + s = find_first_haystack(start2, end2); } else if (end2 - start2 == 1) { if (this->m_leaf_ptr->get(start2) == this->m_value) { @@ -991,12 +1006,40 @@ class IntegerNode : public IntegerNodeBase { private: std::unordered_set m_needles; + IntegerColumn m_result; + size_t m_nb_needles = 0; + size_t m_index_get = 0; + size_t m_index_end = 0; IntegerNode(const IntegerNode& from, QueryNodeHandoverPatches* patches) : BaseType(from, patches) , m_needles(from.m_needles) { } + size_t find_first_haystack(size_t start, size_t end) + { + const auto not_in_set = m_needles.end(); + bool search = m_nb_needles < 22; + auto cmp_fn = [this, search, not_in_set](const auto& v) { + if (search) { + for (auto it = m_needles.begin(); it != not_in_set; ++it) { + if (*it == v) + return true; + } + return false; + } + else { + return (m_needles.find(v) != not_in_set); + } + }; + for (size_t i = start; i < end; ++i) { + auto val = this->m_leaf_ptr->get(i); + if (cmp_fn(val)) { + return i; + } + } + return realm::npos; + } }; diff --git a/test/test_query.cpp b/test/test_query.cpp index 04d3c930328..58c53bc4cf0 100644 --- a/test/test_query.cpp +++ b/test/test_query.cpp @@ -11876,7 +11876,7 @@ TEST(Query_IntOrQueryOptimisation) TEST(Query_IntOrQueryPerformance) { using std::chrono::duration_cast; - using std::chrono::milliseconds; + using std::chrono::microseconds; Group g; TableRef table = g.add_table("table"); @@ -11910,12 +11910,12 @@ TEST(Query_IntOrQueryPerformance) auto before = std::chrono::steady_clock().now(); size_t ints_count = q_ints.count(); auto after = std::chrono::steady_clock().now(); - // std::cout << "ints count: " << duration_cast(after - before).count() << " ms" << std::endl; + // std::cout << "ints count: " << duration_cast(after - before).count() << " us" << std::endl; before = std::chrono::steady_clock().now(); size_t nullable_ints_count = q_nullables.count(); after = std::chrono::steady_clock().now(); - // std::cout << "nullable ints count: " << duration_cast(after - before).count() << " ms" + // std::cout << "nullable ints count: " << duration_cast(after - before).count() << " us" // << std::endl; size_t expected_nullable_query_count = @@ -11927,10 +11927,27 @@ TEST(Query_IntOrQueryPerformance) run_queries(2); run_queries(2048); - // table->add_search_index(ints_col_ndx); - // table->add_search_index(nullable_ints_col_ndx); - // run_queries(); + table->add_search_index(ints_col_ndx); + table->add_search_index(nullable_ints_col_ndx); + + run_queries(2); + run_queries(1024); } +TEST(Query_IntIndexed) +{ + Group g; + TableRef table = g.add_table("table"); + auto col_id = table->add_column(type_Int, "id"); + + table->add_empty_row(100); + for (int i = 0; i < 100; i++) { + table->set_int(col_id, i, i % 10); + } + + table->add_search_index(col_id); + Query q = table->where().equal(col_id, 1); + CHECK_EQUAL(q.count(), 10); +} #endif // TEST_QUERY