From 2f5bb0e7ac0ee265dfb1d1447dc6edd9eb35cbef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Edelbo?= Date: Mon, 4 Mar 2019 12:51:03 +0100 Subject: [PATCH] Optimizing a bit First make a sort based on column id. This will allow us to only pass through the array of conditions once. Comparing column indecies before making dynamic casts. Early out if column has search index. --- src/realm/query_engine.cpp | 29 +++++++++++++---------------- src/realm/query_engine.hpp | 37 ++++++++++++++++++++++++++++--------- test/test_parser.cpp | 2 +- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/src/realm/query_engine.cpp b/src/realm/query_engine.cpp index 447a0a17fef..2109dca6ff4 100644 --- a/src/realm/query_engine.cpp +++ b/src/realm/query_engine.cpp @@ -326,7 +326,7 @@ void StringNode::_search_index_init() } } -bool StringNode::try_consume_condition(StringNode* other) +void StringNode::consume_condition(StringNode* other) { // If a search index is present, don't try to combine conditions since index search is most likely faster. // Assuming N elements to search and M conditions to check: @@ -334,22 +334,19 @@ bool StringNode::try_consume_condition(StringNode* other) // 2) no search index, combine conditions: O(N) // 3) no search index, conditions not combined: O(N*M) // In practice N is much larger than M, so if we have a search index, choose 1, otherwise if possible choose 2. - if (!m_condition_column->has_search_index() && m_condition_column == other->m_condition_column) { - REALM_ASSERT(other->m_needles.empty()); - if (m_needles.empty()) { - m_needles.insert(bool(m_value) ? StringData(*m_value) : StringData()); - } - if (bool(other->m_value)) { - m_needle_storage.push_back(StringBuffer()); - m_needle_storage.back().append(*other->m_value); - m_needles.insert(StringData(m_needle_storage.back().data(), m_needle_storage.back().size())); - } - else { - m_needles.insert(StringData()); - } - return true; + REALM_ASSERT(m_condition_column == other->m_condition_column); + REALM_ASSERT(other->m_needles.empty()); + if (m_needles.empty()) { + m_needles.insert(bool(m_value) ? StringData(*m_value) : StringData()); + } + if (bool(other->m_value)) { + m_needle_storage.push_back(StringBuffer()); + m_needle_storage.back().append(*other->m_value); + m_needles.insert(StringData(m_needle_storage.back().data(), m_needle_storage.back().size())); + } + else { + m_needles.insert(StringData()); } - return false; } // Requirements of template types: diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index d7fcf69b7a9..50260465cb1 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -1186,6 +1186,11 @@ class StringNodeBase : public ParentNode { do_verify_column(m_condition_column); } + bool has_search_index() const + { + return m_condition_column->has_search_index(); + } + void init() override { ParentNode::init(); @@ -1549,7 +1554,7 @@ class StringNode : public StringNodeEqualBase { void _search_index_init() override; - bool try_consume_condition(StringNode* other); + void consume_condition(StringNode* other); std::unique_ptr clone(QueryNodeHandoverPatches* patches) const override { @@ -1675,18 +1680,32 @@ class OrNode : public ParentNode { m_dD = 10.0; StringNode* first = nullptr; - StringNode* advance = nullptr; - for (auto it = m_conditions.begin(); it != m_conditions.end(); ++it) { - if (bool(*it) && (first = dynamic_cast*>(it->get()))) { - for (auto next = it + 1; next != m_conditions.end(); ) { - if ((advance = dynamic_cast*>(next->get()))) { - if (first->try_consume_condition(advance)) { + std::sort(m_conditions.begin(), m_conditions.end(), + [](auto& a, auto& b) { return a->m_condition_column_idx < b->m_condition_column_idx; }); + auto it = m_conditions.begin(); + while (it != m_conditions.end()) { + // Only try to optimize on StringNode conditions without search index + if (bool(*it) && (first = dynamic_cast*>(it->get())) && !first->has_search_index()) { + auto col_ndx = first->m_condition_column_idx; + auto next = it + 1; + while (next != m_conditions.end()) { + if ((*next)->m_condition_column_idx == col_ndx) { + if (auto advance = dynamic_cast*>(next->get())) { + first->consume_condition(advance); next = m_conditions.erase(next); - continue; + } + else { + ++next; } } - ++next; + else { + break; + } } + it = next; + } + else { + ++it; } } diff --git a/test/test_parser.cpp b/test/test_parser.cpp index c2433095f35..01d184eb5dd 100644 --- a/test/test_parser.cpp +++ b/test/test_parser.cpp @@ -2770,7 +2770,7 @@ TEST(Parser_ChainedStringEqualQueries) rd.shuffle(populated_data.begin(), populated_data.end()); std::string query; bool first = true; - size_t column_to_query = 0; + char column_to_query = 0; for (auto s : populated_data) { std::string column_name(1, 'a' + column_to_query); query += (first ? "" : " or " ) + column_name + " == '" + s + "'";