Skip to content

Commit

Permalink
vinyl: optimize select in case secondary key is updated frequently
Browse files Browse the repository at this point in the history
If a space has secondary keys, an update operation generates a REPLACE
in the primary key and a DELETE + REPLACE in each secondary key. We
don't need a DELETE in the primary key, because a field indexed by the
primary key cannot be updated so a REPLACE is enough to update the tuple
stored in the index. On the contrary, a field indexed by a secondary key
can be updated so we need a DELETE to remove the old tuple from the
index.

As a result, if a field indexed by a secondary key gets updated often
(e.g. the user frequently calls space:update({x}, {{'+', 2, 1}}) on a
space with a secondary index over field #2), a lot of DELETE statements
will be generated. The DELETE statements won't be compacted until major
compaction so a range select over a secondary index may take long,
because it will have to iterate over all those useless DELETEs.

In fact, the REPLACE generated by an update operation can be safely
substituted with an INSERT in a secondary index. INSERT + DELETE are
annihilated on dump/compaction so that would solve the problem.
Unfortunately, we can't substitute REPLACE with INSERT immediately on
update, because statements are shared between primary and secondary
indexes in memory and we can't use an INSERT in a primary index in case
of update (see above). However, it is OK to turn REPLACE generated by an
update in a secondary key to an INSERT on dump/compaction. We just need
a way to identify such REPLACE statements somehow.

In contrast to normal REPLACEs, a REPLACE statement generated by an
update operation usually has a column mask. There's only one exception:
if an update operation updates all secondary keys, the column mask isn't
stored (vy_stmt_column_mask() returns UINT64_MAX). This is done for the
sake of memory usage minimization, but it doesn't seem to make much
sense: first, updates that touch all secondary indexes should be rare;
second, we save only 8 bytes per statement. Let's remove this
optimization and store column mask in REPLACE statements generated by
update operations unconditionally and use this information in the write
iterator to turn REPLACEs into INSERTs.

See #2875
  • Loading branch information
locker authored and kostja committed Dec 1, 2017
1 parent 9fdd066 commit 7f1d1ee
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 53 deletions.
67 changes: 14 additions & 53 deletions src/box/vinyl.c
Expand Up @@ -1797,25 +1797,6 @@ vy_check_update(struct space *space, const struct vy_index *pk,
return 0;
}

/**
* Check if an UPDATE operation with the specified column mask
* changes all indexes. In that case we don't need to store
* column mask in a tuple.
* @param space Space to update.
* @param column_mask Bitmask of update operations.
*/
static inline bool
vy_update_changes_all_indexes(const struct space *space, uint64_t column_mask)
{
for (uint32_t i = 1; i < space->index_count; ++i) {
struct vy_index *index = vy_index(space->index[i]);
if (key_update_can_be_skipped(index->cmp_def->column_mask,
column_mask))
return false;
}
return true;
}

/**
* Execute UPDATE in a vinyl space.
* @param env Vinyl environment.
Expand Down Expand Up @@ -1876,10 +1857,8 @@ vy_update(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
if (tuple_validate_raw(pk->mem_format, new_tuple))
return -1;

bool update_changes_all =
vy_update_changes_all_indexes(space, column_mask);
struct tuple_format *mask_format = pk->mem_format_with_colmask;
if (space->index_count == 1 || update_changes_all) {
if (space->index_count == 1) {
stmt->new_tuple = vy_stmt_new_replace(pk->mem_format, new_tuple,
new_tuple_end);
if (stmt->new_tuple == NULL)
Expand All @@ -1904,20 +1883,12 @@ vy_update(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
if (space->index_count == 1)
return 0;

struct tuple *delete = NULL;
if (! update_changes_all) {
delete = vy_stmt_new_surrogate_delete(mask_format,
stmt->old_tuple);
if (delete == NULL)
return -1;
vy_stmt_set_column_mask(delete, column_mask);
} else {
delete = vy_stmt_new_surrogate_delete(pk->mem_format,
stmt->old_tuple);
if (delete == NULL)
return -1;
}
assert(delete != NULL);
struct tuple *delete = vy_stmt_new_surrogate_delete(mask_format,
stmt->old_tuple);
if (delete == NULL)
return -1;
vy_stmt_set_column_mask(delete, column_mask);

for (uint32_t i = 1; i < space->index_count; ++i) {
index = vy_index(space->index[i]);
if (vy_is_committed_one(env, space, index))
Expand Down Expand Up @@ -2162,10 +2133,8 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
if (tuple_validate_raw(pk->mem_format, new_tuple))
return -1;
new_tuple_end = new_tuple + new_size;
bool update_changes_all =
vy_update_changes_all_indexes(space, column_mask);
struct tuple_format *mask_format = pk->mem_format_with_colmask;
if (space->index_count == 1 || update_changes_all) {
if (space->index_count == 1) {
stmt->new_tuple = vy_stmt_new_replace(pk->mem_format, new_tuple,
new_tuple_end);
if (stmt->new_tuple == NULL)
Expand Down Expand Up @@ -2193,20 +2162,12 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,

/* Replace in secondary indexes works as delete insert. */
struct vy_index *index;
struct tuple *delete = NULL;
if (! update_changes_all) {
delete = vy_stmt_new_surrogate_delete(mask_format,
stmt->old_tuple);
if (delete == NULL)
return -1;
vy_stmt_set_column_mask(delete, column_mask);
} else {
delete = vy_stmt_new_surrogate_delete(pk->mem_format,
stmt->old_tuple);
if (delete == NULL)
return -1;
}
assert(delete != NULL);
struct tuple *delete = vy_stmt_new_surrogate_delete(mask_format,
stmt->old_tuple);
if (delete == NULL)
return -1;
vy_stmt_set_column_mask(delete, column_mask);

for (uint32_t i = 1; i < space->index_count; ++i) {
index = vy_index(space->index[i]);
if (vy_is_committed_one(env, space, index))
Expand Down
13 changes: 13 additions & 0 deletions src/box/vy_write_iterator.c
Expand Up @@ -621,6 +621,19 @@ vy_write_iterator_build_history(struct region *region,
while (true) {
*is_first_insert = vy_stmt_type(src->tuple) == IPROTO_INSERT;

if (!stream->is_primary &&
vy_stmt_type(src->tuple) == IPROTO_REPLACE) {
/*
* If a REPLACE stored in a secondary index was
* generated by an update operation, it can be
* turned into an INSERT.
*/
uint64_t stmt_mask = vy_stmt_column_mask(src->tuple);
if (stmt_mask != UINT64_MAX &&
!key_update_can_be_skipped(stmt_mask, key_mask))
*is_first_insert = true;
}

if (vy_stmt_lsn(src->tuple) > current_rv_lsn) {
/*
* Skip statements invisible to the current read
Expand Down
21 changes: 21 additions & 0 deletions test/vinyl/write_iterator.result
Expand Up @@ -780,6 +780,12 @@ box.begin() s:insert{5, 5} s:replace{5, 5, 5} box.commit()
box.begin() s:insert{6, 6} s:update(6, {{'!', 2, 6}}) box.commit()
---
...
_ = s:insert{7, 7}
---
...
_ = s:insert{8, 8}
---
...
box.snapshot()
---
- ok
Expand All @@ -803,6 +809,21 @@ s:delete{5}
s:delete{6}
---
...
-- Check that a REPLACE in a secondary index generated by
-- an update operation is converted into an INSERT on dump
-- and hence gets annihilated by the next DELETE.
_ = s:update(7, {{'=', 2, 77}})
---
...
s:delete{7}
---
...
_ = s:upsert({8, 8}, {{'=', 2, 88}})
---
...
s:delete{8}
---
...
-- Some padding to trigger minor compaction.
for i = 1001, 1000 + PAD2 do s:replace{i, i} end
---
Expand Down
9 changes: 9 additions & 0 deletions test/vinyl/write_iterator.test.lua
Expand Up @@ -331,6 +331,8 @@ _ = s:upsert({3, 3}, {{'!', 1, 1}}) -- upsert, no old tuple
box.begin() s:insert{4, 4} s:delete(4) box.commit()
box.begin() s:insert{5, 5} s:replace{5, 5, 5} box.commit()
box.begin() s:insert{6, 6} s:update(6, {{'!', 2, 6}}) box.commit()
_ = s:insert{7, 7}
_ = s:insert{8, 8}
box.snapshot()
-- Delete the inserted tuples and trigger compaction.
s:delete{1}
Expand All @@ -339,6 +341,13 @@ s:delete{3}
s:delete{4}
s:delete{5}
s:delete{6}
-- Check that a REPLACE in a secondary index generated by
-- an update operation is converted into an INSERT on dump
-- and hence gets annihilated by the next DELETE.
_ = s:update(7, {{'=', 2, 77}})
s:delete{7}
_ = s:upsert({8, 8}, {{'=', 2, 88}})
s:delete{8}
-- Some padding to trigger minor compaction.
for i = 1001, 1000 + PAD2 do s:replace{i, i} end
box.snapshot()
Expand Down

0 comments on commit 7f1d1ee

Please sign in to comment.