vinyl: optimize select in case secondary key is updated frequently

If a space has secondary keys, an update operation generates a REPLACE in the primary key and a DELETE + REPLACE in each secondary key. We don't need a DELETE in the primary key, because a field indexed by the primary key cannot be updated so a REPLACE is enough to update the tuple stored in the index. On the contrary, a field indexed by a secondary key can be updated so we need a DELETE to remove the old tuple from the index. As a result, if a field indexed by a secondary key gets updated often (e.g. the user frequently calls space:update({x}, {{'+', 2, 1}}) on a space with a secondary index over field #2), a lot of DELETE statements will be generated. The DELETE statements won't be compacted until major compaction so a range select over a secondary index may take long, because it will have to iterate over all those useless DELETEs. In fact, the REPLACE generated by an update operation can be safely substituted with an INSERT in a secondary index. INSERT + DELETE are annihilated on dump/compaction so that would solve the problem. Unfortunately, we can't substitute REPLACE with INSERT immediately on update, because statements are shared between primary and secondary indexes in memory and we can't use an INSERT in a primary index in case of update (see above). However, it is OK to turn REPLACE generated by an update in a secondary key to an INSERT on dump/compaction. We just need a way to identify such REPLACE statements somehow. In contrast to normal REPLACEs, a REPLACE statement generated by an update operation usually has a column mask. There's only one exception: if an update operation updates all secondary keys, the column mask isn't stored (vy_stmt_column_mask() returns UINT64_MAX). This is done for the sake of memory usage minimization, but it doesn't seem to make much sense: first, updates that touch all secondary indexes should be rare; second, we save only 8 bytes per statement. Let's remove this optimization and store column mask in REPLACE statements generated by update operations unconditionally and use this information in the write iterator to turn REPLACEs into INSERTs. See #2875
tarantool · Dec 1, 2017 · 7f1d1ee · 7f1d1ee
1 parent 9fdd066
commit 7f1d1ee
Show file tree

Hide file tree

Showing 4 changed files with 57 additions and 53 deletions.
diff --git a/src/box/vinyl.c b/src/box/vinyl.c
@@ -1797,25 +1797,6 @@ vy_check_update(struct space *space, const struct vy_index *pk,
 	return 0;
 }
 
-/**
- * Check if an UPDATE operation with the specified column mask
- * changes all indexes. In that case we don't need to store
- * column mask in a tuple.
- * @param space Space to update.
- * @param column_mask Bitmask of update operations.
- */
-static inline bool
-vy_update_changes_all_indexes(const struct space *space, uint64_t column_mask)
-{
-	for (uint32_t i = 1; i < space->index_count; ++i) {
-		struct vy_index *index = vy_index(space->index[i]);
-		if (key_update_can_be_skipped(index->cmp_def->column_mask,
-					      column_mask))
-			return false;
-	}
-	return true;
-}
-
 /**
  * Execute UPDATE in a vinyl space.
  * @param env     Vinyl environment.
@@ -1876,10 +1857,8 @@ vy_update(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	if (tuple_validate_raw(pk->mem_format, new_tuple))
 		return -1;
 
-	bool update_changes_all =
-		vy_update_changes_all_indexes(space, column_mask);
 	struct tuple_format *mask_format = pk->mem_format_with_colmask;
-	if (space->index_count == 1 || update_changes_all) {
+	if (space->index_count == 1) {
 		stmt->new_tuple = vy_stmt_new_replace(pk->mem_format, new_tuple,
 						      new_tuple_end);
 		if (stmt->new_tuple == NULL)
@@ -1904,20 +1883,12 @@ vy_update(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	if (space->index_count == 1)
 		return 0;
 
-	struct tuple *delete = NULL;
-	if (! update_changes_all) {
-		delete = vy_stmt_new_surrogate_delete(mask_format,
-						      stmt->old_tuple);
-		if (delete == NULL)
-			return -1;
-		vy_stmt_set_column_mask(delete, column_mask);
-	} else {
-		delete = vy_stmt_new_surrogate_delete(pk->mem_format,
-						      stmt->old_tuple);
-		if (delete == NULL)
-			return -1;
-	}
-	assert(delete != NULL);
+	struct tuple *delete = vy_stmt_new_surrogate_delete(mask_format,
+							    stmt->old_tuple);
+	if (delete == NULL)
+		return -1;
+	vy_stmt_set_column_mask(delete, column_mask);
+
 	for (uint32_t i = 1; i < space->index_count; ++i) {
 		index = vy_index(space->index[i]);
 		if (vy_is_committed_one(env, space, index))
@@ -2162,10 +2133,8 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 	if (tuple_validate_raw(pk->mem_format, new_tuple))
 		return -1;
 	new_tuple_end = new_tuple + new_size;
-	bool update_changes_all =
-		vy_update_changes_all_indexes(space, column_mask);
 	struct tuple_format *mask_format = pk->mem_format_with_colmask;
-	if (space->index_count == 1 || update_changes_all) {
+	if (space->index_count == 1) {
 		stmt->new_tuple = vy_stmt_new_replace(pk->mem_format, new_tuple,
 						      new_tuple_end);
 		if (stmt->new_tuple == NULL)
@@ -2193,20 +2162,12 @@ vy_upsert(struct vy_env *env, struct vy_tx *tx, struct txn_stmt *stmt,
 
 	/* Replace in secondary indexes works as delete insert. */
 	struct vy_index *index;
-	struct tuple *delete = NULL;
-	if (! update_changes_all) {
-		delete = vy_stmt_new_surrogate_delete(mask_format,
-						      stmt->old_tuple);
-		if (delete == NULL)
-			return -1;
-		vy_stmt_set_column_mask(delete, column_mask);
-	} else {
-		delete = vy_stmt_new_surrogate_delete(pk->mem_format,
-						      stmt->old_tuple);
-		if (delete == NULL)
-			return -1;
-	}
-	assert(delete != NULL);
+	struct tuple *delete = vy_stmt_new_surrogate_delete(mask_format,
+							    stmt->old_tuple);
+	if (delete == NULL)
+		return -1;
+	vy_stmt_set_column_mask(delete, column_mask);
+
 	for (uint32_t i = 1; i < space->index_count; ++i) {
 		index = vy_index(space->index[i]);
 		if (vy_is_committed_one(env, space, index))

diff --git a/src/box/vy_write_iterator.c b/src/box/vy_write_iterator.c
@@ -621,6 +621,19 @@ vy_write_iterator_build_history(struct region *region,
 	while (true) {
 		*is_first_insert = vy_stmt_type(src->tuple) == IPROTO_INSERT;
 
+		if (!stream->is_primary &&
+		    vy_stmt_type(src->tuple) == IPROTO_REPLACE) {
+			/*
+			 * If a REPLACE stored in a secondary index was
+			 * generated by an update operation, it can be
+			 * turned into an INSERT.
+			 */
+			uint64_t stmt_mask = vy_stmt_column_mask(src->tuple);
+			if (stmt_mask != UINT64_MAX &&
+			    !key_update_can_be_skipped(stmt_mask, key_mask))
+				*is_first_insert = true;
+		}
+
 		if (vy_stmt_lsn(src->tuple) > current_rv_lsn) {
 			/*
 			 * Skip statements invisible to the current read

diff --git a/test/vinyl/write_iterator.result b/test/vinyl/write_iterator.result
@@ -780,6 +780,12 @@ box.begin() s:insert{5, 5} s:replace{5, 5, 5} box.commit()
 box.begin() s:insert{6, 6} s:update(6, {{'!', 2, 6}}) box.commit()
 ---
 ...
+_ = s:insert{7, 7}
+---
+...
+_ = s:insert{8, 8}
+---
+...
 box.snapshot()
 ---
 - ok
@@ -803,6 +809,21 @@ s:delete{5}
 s:delete{6}
 ---
 ...
+-- Check that a REPLACE in a secondary index generated by
+-- an update operation is converted into an INSERT on dump
+-- and hence gets annihilated by the next DELETE.
+_ = s:update(7, {{'=', 2, 77}})
+---
+...
+s:delete{7}
+---
+...
+_ = s:upsert({8, 8}, {{'=', 2, 88}})
+---
+...
+s:delete{8}
+---
+...
 -- Some padding to trigger minor compaction.
 for i = 1001, 1000 + PAD2 do s:replace{i, i} end
 ---

diff --git a/test/vinyl/write_iterator.test.lua b/test/vinyl/write_iterator.test.lua
@@ -331,6 +331,8 @@ _ = s:upsert({3, 3}, {{'!', 1, 1}}) -- upsert, no old tuple
 box.begin() s:insert{4, 4} s:delete(4) box.commit()
 box.begin() s:insert{5, 5} s:replace{5, 5, 5} box.commit()
 box.begin() s:insert{6, 6} s:update(6, {{'!', 2, 6}}) box.commit()
+_ = s:insert{7, 7}
+_ = s:insert{8, 8}
 box.snapshot()
 -- Delete the inserted tuples and trigger compaction.
 s:delete{1}
@@ -339,6 +341,13 @@ s:delete{3}
 s:delete{4}
 s:delete{5}
 s:delete{6}
+-- Check that a REPLACE in a secondary index generated by
+-- an update operation is converted into an INSERT on dump
+-- and hence gets annihilated by the next DELETE.
+_ = s:update(7, {{'=', 2, 77}})
+s:delete{7}
+_ = s:upsert({8, 8}, {{'=', 2, 88}})
+s:delete{8}
 -- Some padding to trigger minor compaction.
 for i = 1001, 1000 + PAD2 do s:replace{i, i} end
 box.snapshot()