diff --git a/src/box/memtx_tree.cc b/src/box/memtx_tree.cc index f24f673513ca..ca3d7b8ca6e9 100644 --- a/src/box/memtx_tree.cc +++ b/src/box/memtx_tree.cc @@ -793,33 +793,92 @@ prepare_start_prefix_iterator(struct memtx_tree_key_data *start_data, return true; } -template static int -tree_iterator_start(struct iterator *iterator, struct tuple **ret) +memtx_tree_lookup_canonicalize(struct index *base, struct key_def *cmp_def, + enum iterator_type *out_type, + const char **out_key, uint32_t part_count) { - struct region *region = &fiber()->gc; - RegionGuard region_guard(region); - *ret = NULL; - struct space *space; - struct index *index_base; - index_weak_ref_get_checked(&iterator->index_ref, &space, &index_base); - struct memtx_tree_index *index = - (struct memtx_tree_index *)index_base; - struct tree_iterator *it = - get_tree_iterator(iterator); - iterator->next_internal = exhausted_iterator_next; - memtx_tree_t *tree = &index->tree; - struct txn *txn = in_txn(); - struct key_def *cmp_def = index->base.def->cmp_def; - struct memtx_tree_key_data start_data = - it->after_data.key != NULL ? it->after_data : it->key_data; - enum iterator_type type = it->type; + enum iterator_type type = *out_type; + const char *key = *out_key; + + assert(part_count == 0 || key != NULL); + assert(type >= 0 && type < iterator_type_MAX); + + static_assert(iterator_type_MAX < 32, "Too big for bit logic"); + const uint32_t supported_mask = ((1u << (ITER_GT + 1)) - 1) | + (1u << ITER_NP) | (1u << ITER_PP); + if (((1u << type) & supported_mask) == 0) { + diag_set(UnsupportedIndexFeature, base->def, + "requested iterator type"); + return -1; + } + + if ((type == ITER_NP || type == ITER_PP) && part_count > 0 && + cmp_def->parts[part_count - 1].coll != NULL) { + diag_set(UnsupportedIndexFeature, base->def, + "requested iterator type along with collation"); + return -1; + } + + if (part_count == 0) { + /* + * If no key is specified, downgrade equality + * iterators to a full range. + */ + type = iterator_type_is_reverse(type) ? ITER_LE : ITER_GE; + key = NULL; + } + + if (type == ITER_ALL) + type = ITER_GE; + + *out_type = type; + *out_key = key; + return 0; +} + +enum memtx_tree_lookup_op { + NOOP, + FIRST, + LOWER_BOUND, + UPPER_BOUND, + INVALIDATE, +}; + +struct memtx_tree_lookup_strategy { + enum memtx_tree_lookup_op operation; + bool step_back; +}; + +/** + * @warning might modify start_data->key (change it to pointer on @a region). + */ +template +static int +memtx_tree_lookup_setup(struct memtx_tree_key_data *out_start_data, + struct memtx_tree_key_data after_data, + enum iterator_type *out_type, struct key_def *cmp_def, + struct region *region, + struct memtx_tree_lookup_strategy *lookup) +{ + assert(out_start_data != NULL); + assert(out_type != NULL); + assert(cmp_def != NULL); + assert(region != NULL); + assert(lookup != NULL); + + enum iterator_type type = *out_type; + struct memtx_tree_key_data start_data = *out_start_data; + if ((type == ITER_NP || type == ITER_PP) && - it->after_data.key == NULL) { + after_data.key == NULL) { if (!prepare_start_prefix_iterator(&start_data, &type, - cmp_def, region)) + cmp_def, region)) { + lookup->operation = NOOP; return 0; + } } + /* * Since iteration with equality iterators returns first found tuple, * we need a special flag for EQ and REQ if we want to start iteration @@ -828,33 +887,21 @@ tree_iterator_start(struct iterator *iterator, struct tuple **ret) * As for range iterators with equality, we can simply change them * to their equivalents with inequality. */ - bool skip_equal_tuple = it->after_data.key != NULL; + bool skip_equal_tuple = after_data.key != NULL; if (skip_equal_tuple && type != ITER_EQ && type != ITER_REQ) type = iterator_type_is_reverse(type) ? ITER_LT : ITER_GT; - /* - * The key is full - all parts a present. If key if full, EQ and REQ - * queries can return no more than one tuple. - */ - bool key_is_full = start_data.part_count == cmp_def->part_count; - /* The flag will be change to true if found tuple equals to the key. */ - bool equals = false; - assert(it->last.tuple == NULL); + + /* Find the lookup strategy required. */ + enum memtx_tree_lookup_op op; if (start_data.key == NULL) { assert(type == ITER_GE || type == ITER_LE); - if (iterator_type_is_reverse(type)) - /* - * For all reverse iterators we will step back, - * see the and explanation code below. - * BPS tree iterators have an interesting property: - * a back step from invalid iterator set its - * position to the last element. Let's use that. - */ - invalidate_tree_iterator(&it->tree_iterator); - else - it->tree_iterator = memtx_tree_first(tree); - /* If there is at least one tuple in the tree, it is - * efficiently equals to the empty key. */ - equals = memtx_tree_size(tree) != 0; + /* + * For all reverse iterators we will step back, see the + * explanation and code below. BPS tree iterators have an + * interesting property: a back step from invalid iterator + * set its position to the last element. Let's use that. + */ + op = iterator_type_is_reverse(type) ? INVALIDATE : FIRST; } else { /* * We use lower_bound on equality iterators instead of LE @@ -873,17 +920,90 @@ tree_iterator_start(struct iterator *iterator, struct tuple **ret) */ if (skip_equal_tuple && (type == ITER_EQ || type == ITER_REQ)) need_lower_bound = !need_lower_bound; - if (need_lower_bound) { - it->tree_iterator = - memtx_tree_lower_bound(tree, &start_data, - &equals); - } else { - it->tree_iterator = - memtx_tree_upper_bound(tree, &start_data, - &equals); - } + + op = need_lower_bound ? LOWER_BOUND : UPPER_BOUND; } + /* + * Because of limitations of tree search API we use lower_bound for + * LT search and upper_bound for LE and REQ searches. In both cases + * we find a position to the right of the target one. Let's make a + * step to the left to reach target position. + * + * If we found an invalid iterator then all the elements in the tree + * are less (less or equal) to the key, and iterator_prev call will + * convert the iterator to the last position in the tree, which is + * exactly what we need. + */ + bool need_step_back = iterator_type_is_reverse(type); + + /* Set the output parameters. */ + *out_type = type; + *out_start_data = start_data; + lookup->operation = op; + lookup->step_back = need_step_back; + return 0; +} + +template +static int +tree_iterator_start(struct iterator *iterator, struct tuple **ret) +{ + struct region *region = &fiber()->gc; + RegionGuard region_guard(region); + + *ret = NULL; + iterator->next_internal = exhausted_iterator_next; + + struct tree_iterator *it = + get_tree_iterator(iterator); + assert(it->last.tuple == NULL); + + struct space *space; + struct index *index_base; + index_weak_ref_get_checked(&iterator->index_ref, &space, &index_base); + struct memtx_tree_index *index = + (struct memtx_tree_index *)index_base; + struct key_def *cmp_def = index->base.def->cmp_def; + struct memtx_tree_key_data start_data = + it->after_data.key != NULL ? it->after_data : it->key_data; + enum iterator_type type = it->type; + struct memtx_tree_lookup_strategy lookup; + if (memtx_tree_lookup_setup(&start_data, it->after_data, &type, + cmp_def, region, &lookup) == -1) + return -1; + + /* The flag will be changed to true if found tuple equals to the key. */ + bool equals = false; + memtx_tree_t *tree = &index->tree; + + switch (lookup.operation) { + case FIRST: + it->tree_iterator = memtx_tree_first(tree); + break; + case LOWER_BOUND: + it->tree_iterator = memtx_tree_lower_bound(tree, &start_data, + &equals); + break; + case UPPER_BOUND: + it->tree_iterator = memtx_tree_upper_bound(tree, &start_data, + &equals); + break; + case INVALIDATE: + invalidate_tree_iterator(&it->tree_iterator); + break; + default: + assert(lookup.operation == NOOP); + return 0; + } + + /* + * If there is at least one tuple in the tree, it is efficiently equal + * to the empty key. + */ + if (start_data.key == NULL) + equals = memtx_tree_size(tree) != 0; + /* * `it->tree_iterator` could potentially be positioned on successor of * key: we need to track gap based on it. @@ -891,23 +1011,13 @@ tree_iterator_start(struct iterator *iterator, struct tuple **ret) struct memtx_tree_data *res = memtx_tree_iterator_get_elem(tree, &it->tree_iterator); struct tuple *successor = res == NULL ? NULL : res->tuple; - if (iterator_type_is_reverse(type)) { - /* - * Because of limitations of tree search API we use - * lower_bound for LT search and upper_bound for LE and - * REQ searches. In both cases we find a position to the - * right of the target one. Let's make a step to the - * left to reach target position. - * If we found an invalid iterator all the elements in - * the tree are less (less or equal) to the key, and - * iterator_prev call will convert the iterator to the - * last position in the tree, that's what we need. - */ + if (lookup.step_back) { memtx_tree_iterator_prev(tree, &it->tree_iterator); res = memtx_tree_iterator_get_elem(tree, &it->tree_iterator); } + /* If we skip tuple, flag equals is not actual - need to refresh it. */ - if (skip_equal_tuple && res != NULL && + if (it->after_data.key != NULL && res != NULL && (type == ITER_EQ || type == ITER_REQ)) { equals = tuple_compare_with_key(res->tuple, res->hint, it->key_data.key, @@ -915,10 +1025,12 @@ tree_iterator_start(struct iterator *iterator, struct tuple **ret) it->key_data.hint, index->base.def->key_def) == 0; } + /* * Equality iterators requires exact key match: if the result does not * equal to the key, iteration ends. */ + struct txn *txn = in_txn(); bool eq_match = equals || (type != ITER_EQ && type != ITER_REQ); if (res != NULL && eq_match) { tree_iterator_set_last(it, res); @@ -933,6 +1045,11 @@ tree_iterator_start(struct iterator *iterator, struct tuple **ret) index_base, mk_index); } /********MVCC TRANSACTION MANAGER STORY GARBAGE COLLECTION BOUND START*********/ + /* + * If the key is full then all parts present, so EQ and REQ iterators + * can return no more than one tuple. + */ + bool key_is_full = start_data.part_count == cmp_def->part_count; if (key_is_full && !eq_match) memtx_tx_track_point(txn, space, index_base, it->key_data.key); if (!key_is_full || @@ -1114,16 +1231,112 @@ memtx_tree_index_random(struct index *base, uint32_t rnd, struct tuple **result) return memtx_prepare_result_tuple(space, result); } -template +template static ssize_t -memtx_tree_index_count(struct index *base, enum iterator_type type, - const char *key, uint32_t part_count) +memtx_tree_index_count_slow(struct index *base, enum iterator_type type, + const char *key, uint32_t part_count) { + assert(!base->def->opts.fast_offset && + (base->def->opts.hint == INDEX_HINT_ON) == USE_HINT); + if (type == ITER_ALL) - return memtx_tree_index_size(base); + return memtx_tree_index_size(base); return generic_index_count(base, type, key, part_count); } +template +static ssize_t +memtx_tree_index_count_fast(struct index *base, enum iterator_type type, + const char *key, uint32_t part_count) +{ + assert(base->def->opts.fast_offset && + (base->def->opts.hint == INDEX_HINT_ON) == USE_HINT); + + struct region *region = &fiber()->gc; + RegionGuard region_guard(region); + + struct memtx_tree_index *index = + (struct memtx_tree_index *)base; + struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); + + if (memtx_tree_lookup_canonicalize(base, cmp_def, &type, + &key, part_count) == -1) + return -1; + + size_t visible_size = memtx_tree_index_size(base); + if (visible_size == 0) + return 0; /* One can't select from an empty space. */ + + struct memtx_tree_key_data start_data; + start_data.key = key; + start_data.part_count = part_count; + struct memtx_tree_key_data null_after_data = {}; + struct memtx_tree_lookup_strategy lookup; + if (memtx_tree_lookup_setup(&start_data, null_after_data, &type, + cmp_def, region, &lookup) == -1) + return -1; + + memtx_tree_t *tree = &index->tree; + size_t full_size = memtx_tree_size(tree); + memtx_tree_iterator_t begin; + memtx_tree_iterator_t end; + size_t begin_offset; + size_t end_offset; + + switch (lookup.operation) { + case FIRST: + case INVALIDATE: + /* For empty key - return the visible tree size. TODO: MVCC. */ + assert(key == NULL); + return visible_size; + case LOWER_BOUND: + begin = memtx_tree_lower_bound_get_offset(tree, &start_data, + NULL, &begin_offset); + break; + case UPPER_BOUND: + begin = memtx_tree_upper_bound_get_offset(tree, &start_data, + NULL, &begin_offset); + break; + default: + assert(lookup.operation == NOOP); + return 0; /* Nothing to be done. */ + } + + if (iterator_type_is_reverse(type) && begin_offset == 0) + return 0; /* No tuples by this key and iterator. TODO: MVCC. */ + + if (!iterator_type_is_reverse(type) && begin_offset == full_size) + return 0; /* No tuples by this key and iterator. TODO: MVCC. */ + + if (lookup.step_back) { + memtx_tree_iterator_prev(tree, &begin); + begin_offset--; + } + + /* + * Now, when we have the first tuple and its offset, let's find the + * boundary of the iteration. + */ + if (type == ITER_EQ) { + end = memtx_tree_upper_bound_get_offset(tree, &start_data, + NULL, &end_offset); + } else if (type == ITER_REQ) { + end = memtx_tree_lower_bound_get_offset(tree, &start_data, + NULL, &end_offset); + memtx_tree_iterator_prev(tree, &end); + end_offset--; /* Unsigned overflow possible. */ + } else { + end_offset = iterator_type_is_reverse(type) ? -1 : full_size; + invalidate_tree_iterator(&end); + } + + size_t full_count = ((ssize_t)end_offset - begin_offset) * + iterator_direction(type); + + /* TODO: Calculate the visible count (MVCC). */ + return full_count; +} + template static int memtx_tree_index_get_internal(struct index *base, const char *key, @@ -1729,33 +1942,9 @@ memtx_tree_index_create_iterator(struct index *base, enum iterator_type type, struct memtx_engine *memtx = (struct memtx_engine *)base->engine; struct key_def *cmp_def = memtx_tree_cmp_def(&index->tree); - assert(part_count == 0 || key != NULL); - assert(type >= 0 && type < iterator_type_MAX); - static_assert(iterator_type_MAX < 32, "Too big for bit logic"); - const uint32_t supported_mask = ((1u << (ITER_GT + 1)) - 1) | - (1u << ITER_NP) | (1u << ITER_PP); - if (((1u << type) & supported_mask) == 0) { - diag_set(UnsupportedIndexFeature, base->def, - "requested iterator type"); - return NULL; - } - if ((type == ITER_NP || type == ITER_PP) && part_count > 0 && - cmp_def->parts[part_count - 1].coll != NULL) { - diag_set(UnsupportedIndexFeature, base->def, - "requested iterator type along with collation"); + if (memtx_tree_lookup_canonicalize(base, cmp_def, &type, + &key, part_count) == -1) return NULL; - } - if (part_count == 0) { - /* - * If no key is specified, downgrade equality - * iterators to a full range. - */ - type = iterator_type_is_reverse(type) ? ITER_LE : ITER_GE; - key = NULL; - } - - if (type == ITER_ALL) - type = ITER_GE; ERROR_INJECT(ERRINJ_INDEX_ITERATOR_NEW, { diag_set(ClientError, ER_INJECTION, "iterator fail"); @@ -2346,7 +2535,9 @@ get_memtx_tree_index_vtab(void) /* .min = */ generic_index_min, /* .max = */ generic_index_max, /* .random = */ memtx_tree_index_random, - /* .count = */ memtx_tree_index_count, + /* .count = */ + FAST_OFFSET ? memtx_tree_index_count_fast : + memtx_tree_index_count_slow, /* .get_internal */ memtx_tree_index_get_internal, /* .get = */ memtx_index_get,