Skip to content

Commit

Permalink
Use index in int equal queries
Browse files Browse the repository at this point in the history
  • Loading branch information
jedelbo committed Apr 1, 2019
1 parent a7f93a6 commit 4310b4a
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 29 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* Improved query performance for unindexed integer columns when the query has a chain of OR conditions.
This will improve performance of "IN" queries generated by SDKs.
([PR #2888](https://github.com/realm/realm-sync/issues/2888).
* Use search index in queries on integer columns (equality only)

### Fixed
* <How to hit and notice issue? what was the impact?> ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?)
Expand Down
89 changes: 66 additions & 23 deletions src/realm/query_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -885,6 +885,27 @@ class IntegerNode<ColType, Equal> : public IntegerNodeBase<ColType> {
: BaseType(value, column_ndx)
{
}
~IntegerNode()
{
if (m_result.is_attached()) {
m_result.destroy();
}
}

void init() override
{
BaseType::init();
m_nb_needles = m_needles.size();

if (has_search_index()) {
ref_type ref = IntegerColumn::create(Allocator::get_default());
m_result.init_from_ref(Allocator::get_default(), ref);

IntegerNodeBase<ColType>::m_condition_column->find_all(m_result, this->m_value, 0, realm::npos);
m_index_get = 0;
m_index_end = m_result.size();
}
}

void consume_condition(IntegerNode<ColType, Equal>* other)
{
Expand All @@ -904,7 +925,21 @@ class IntegerNode<ColType, Equal> : public IntegerNodeBase<ColType> {
size_t find_first_local(size_t start, size_t end) override
{
REALM_ASSERT(this->m_table);
size_t nb_needles = m_needles.size();

if (has_search_index()) {
while (m_index_get < m_index_end) {
size_t ndx = size_t(m_result.get(m_index_get));
if (ndx >= end) {
break;
}
m_index_get++;
if (ndx >= start) {
return ndx;
}
}
return not_found;
}


while (start < end) {
// Cache internal leaves
Expand All @@ -918,28 +953,8 @@ class IntegerNode<ColType, Equal> : public IntegerNodeBase<ColType> {

auto start2 = start - this->m_leaf_start;
size_t s = realm::npos;
if (nb_needles) {
const auto not_in_set = m_needles.end();
bool search = nb_needles < 22;
auto cmp_fn = [this, search, not_in_set](const auto& v) {
if (search) {
for (auto it = m_needles.begin(); it != not_in_set; ++it) {
if (*it == v)
return true;
}
return false;
}
else {
return (m_needles.find(v) != not_in_set);
}
};
for (size_t i = start2; i < end2; ++i) {
auto val = this->m_leaf_ptr->get(i);
if (cmp_fn(val)) {
s = i;
break;
}
}
if (m_nb_needles) {
s = find_first_haystack(start2, end2);
}
else if (end2 - start2 == 1) {
if (this->m_leaf_ptr->get(start2) == this->m_value) {
Expand Down Expand Up @@ -991,12 +1006,40 @@ class IntegerNode<ColType, Equal> : public IntegerNodeBase<ColType> {

private:
std::unordered_set<TConditionValue> m_needles;
IntegerColumn m_result;
size_t m_nb_needles = 0;
size_t m_index_get = 0;
size_t m_index_end = 0;

IntegerNode(const IntegerNode<ColType, Equal>& from, QueryNodeHandoverPatches* patches)
: BaseType(from, patches)
, m_needles(from.m_needles)
{
}
size_t find_first_haystack(size_t start, size_t end)
{
const auto not_in_set = m_needles.end();
bool search = m_nb_needles < 22;
auto cmp_fn = [this, search, not_in_set](const auto& v) {
if (search) {
for (auto it = m_needles.begin(); it != not_in_set; ++it) {
if (*it == v)
return true;
}
return false;
}
else {
return (m_needles.find(v) != not_in_set);
}
};
for (size_t i = start; i < end; ++i) {
auto val = this->m_leaf_ptr->get(i);
if (cmp_fn(val)) {
return i;
}
}
return realm::npos;
}
};


Expand Down
29 changes: 23 additions & 6 deletions test/test_query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11876,7 +11876,7 @@ TEST(Query_IntOrQueryOptimisation)
TEST(Query_IntOrQueryPerformance)
{
using std::chrono::duration_cast;
using std::chrono::milliseconds;
using std::chrono::microseconds;

Group g;
TableRef table = g.add_table("table");
Expand Down Expand Up @@ -11910,12 +11910,12 @@ TEST(Query_IntOrQueryPerformance)
auto before = std::chrono::steady_clock().now();
size_t ints_count = q_ints.count();
auto after = std::chrono::steady_clock().now();
// std::cout << "ints count: " << duration_cast<milliseconds>(after - before).count() << " ms" << std::endl;
// std::cout << "ints count: " << duration_cast<microseconds>(after - before).count() << " us" << std::endl;

before = std::chrono::steady_clock().now();
size_t nullable_ints_count = q_nullables.count();
after = std::chrono::steady_clock().now();
// std::cout << "nullable ints count: " << duration_cast<milliseconds>(after - before).count() << " ms"
// std::cout << "nullable ints count: " << duration_cast<microseconds>(after - before).count() << " us"
// << std::endl;

size_t expected_nullable_query_count =
Expand All @@ -11927,10 +11927,27 @@ TEST(Query_IntOrQueryPerformance)
run_queries(2);
run_queries(2048);

// table->add_search_index(ints_col_ndx);
// table->add_search_index(nullable_ints_col_ndx);
// run_queries();
table->add_search_index(ints_col_ndx);
table->add_search_index(nullable_ints_col_ndx);

run_queries(2);
run_queries(1024);
}

TEST(Query_IntIndexed)
{
Group g;
TableRef table = g.add_table("table");
auto col_id = table->add_column(type_Int, "id");

table->add_empty_row(100);
for (int i = 0; i < 100; i++) {
table->set_int(col_id, i, i % 10);
}

table->add_search_index(col_id);
Query q = table->where().equal(col_id, 1);
CHECK_EQUAL(q.count(), 10);
}

#endif // TEST_QUERY

0 comments on commit 4310b4a

Please sign in to comment.