diff --git a/src/box/blackhole.c b/src/box/blackhole.c index 6ada7a586e6b..249eb678ad24 100644 --- a/src/box/blackhole.c +++ b/src/box/blackhole.c @@ -157,7 +157,8 @@ blackhole_engine_create_space(struct engine *engine, struct space_def *def, struct tuple_format *format; format = tuple_format_new(&tuple_format_runtime->vtab, NULL, NULL, 0, def->fields, def->field_count, - def->exact_field_count, def->dict, false); + def->exact_field_count, def->dict, false, + false); if (format == NULL) { free(space); return NULL; diff --git a/src/box/box.cc b/src/box/box.cc index 9f2fd6da14bd..ae7f471cd9d5 100644 --- a/src/box/box.cc +++ b/src/box/box.cc @@ -46,6 +46,7 @@ #include #include "main.h" #include "tuple.h" +#include "tuple_format.h" #include "session.h" #include "schema.h" #include "engine.h" diff --git a/src/box/memtx_engine.c b/src/box/memtx_engine.c index 5cf70ab94b55..254ef24b47fb 100644 --- a/src/box/memtx_engine.c +++ b/src/box/memtx_engine.c @@ -994,6 +994,12 @@ memtx_engine_gc_f(va_list va) struct memtx_engine *memtx = va_arg(va, struct memtx_engine *); while (!fiber_is_cancelled()) { bool stop; + struct errinj *delay = errinj(ERRINJ_MEMTX_DELAY_GC, + ERRINJ_BOOL); + if (delay != NULL && delay->bparam) { + while (delay->bparam) + fiber_sleep(0.001); + } memtx_engine_run_gc(memtx, &stop); if (stop) { fiber_yield_timeout(TIMEOUT_INFINITY); diff --git a/src/box/memtx_space.c b/src/box/memtx_space.c index 4aad8da6c112..5f8cc98fdbd9 100644 --- a/src/box/memtx_space.c +++ b/src/box/memtx_space.c @@ -993,7 +993,7 @@ memtx_space_new(struct memtx_engine *memtx, tuple_format_new(&memtx_tuple_format_vtab, memtx, keys, key_count, def->fields, def->field_count, def->exact_field_count, def->dict, - def->opts.is_temporary); + def->opts.is_temporary, def->opts.is_ephemeral); if (format == NULL) { free(memtx_space); return NULL; diff --git a/src/box/space.c b/src/box/space.c index 62451679d2d9..316b34b92980 100644 --- a/src/box/space.c +++ b/src/box/space.c @@ -195,6 +195,7 @@ struct space * space_new_ephemeral(struct space_def *def, struct rlist *key_list) { assert(def->opts.is_temporary); + assert(def->opts.is_ephemeral); struct space *space = space_new(def, key_list); if (space == NULL) return NULL; diff --git a/src/box/space_def.c b/src/box/space_def.c index d60c2d350af4..c5b5ec2957ae 100644 --- a/src/box/space_def.c +++ b/src/box/space_def.c @@ -53,6 +53,7 @@ checks_array_decode(const char **str, uint32_t len, char *opt, uint32_t errcode, const struct space_opts space_opts_default = { /* .group_id = */ 0, /* .is_temporary = */ false, + /* .is_ephemeral = */ false, /* .view = */ false, /* .sql = */ NULL, /* .checks = */ NULL, @@ -262,6 +263,7 @@ space_def_new_ephemeral(uint32_t field_count) { struct space_opts opts = space_opts_default; opts.is_temporary = true; + opts.is_ephemeral = true; struct space_def *space_def = space_def_new(0, 0, field_count, "ephemeral", strlen("ephemeral"), diff --git a/src/box/space_def.h b/src/box/space_def.h index b6ab6b39f2bd..52ff567649bb 100644 --- a/src/box/space_def.h +++ b/src/box/space_def.h @@ -58,6 +58,11 @@ struct space_opts { * does not require manual release. */ bool is_temporary; + /** + * This flag is set if space is ephemeral and hence + * its format might be re-used. + */ + bool is_ephemeral; /** * If the space is a view, then it can't feature any * indexes, and must have SQL statement. Moreover, diff --git a/src/box/tuple.c b/src/box/tuple.c index a87b2bd20027..9cdb4647430a 100644 --- a/src/box/tuple.c +++ b/src/box/tuple.c @@ -203,12 +203,16 @@ tuple_next(struct tuple_iterator *it) int tuple_init(field_name_hash_f hash) { + if (tuple_format_init() != 0) + return -1; + field_name_hash = hash; /* * Create a format for runtime tuples */ tuple_format_runtime = tuple_format_new(&tuple_format_runtime_vtab, NULL, - NULL, 0, NULL, 0, 0, NULL, false); + NULL, 0, NULL, 0, 0, NULL, false, + false); if (tuple_format_runtime == NULL) return -1; @@ -380,7 +384,8 @@ box_tuple_format_new(struct key_def **keys, uint16_t key_count) { box_tuple_format_t *format = tuple_format_new(&tuple_format_runtime_vtab, NULL, - keys, key_count, NULL, 0, 0, NULL, false); + keys, key_count, NULL, 0, 0, NULL, false, + false); if (format != NULL) tuple_format_ref(format); return format; diff --git a/src/box/tuple_format.c b/src/box/tuple_format.c index 559c601f9087..214760247843 100644 --- a/src/box/tuple_format.c +++ b/src/box/tuple_format.c @@ -34,12 +34,78 @@ #include "tuple_format.h" #include "coll_id_cache.h" +#include "third_party/PMurHash.h" + /** Global table of tuple formats */ struct tuple_format **tuple_formats; static intptr_t recycled_format_ids = FORMAT_ID_NIL; static uint32_t formats_size = 0, formats_capacity = 0; +static int +tuple_format_cmp(const struct tuple_format *format1, + const struct tuple_format *format2) +{ + struct tuple_format *a = (struct tuple_format *)format1; + struct tuple_format *b = (struct tuple_format *)format2; + if (a->exact_field_count != b->exact_field_count) + return a->exact_field_count - b->exact_field_count; + if (tuple_format_field_count(a) != tuple_format_field_count(b)) + return tuple_format_field_count(a) - tuple_format_field_count(b); + + for (uint32_t i = 0; i < tuple_format_field_count(a); ++i) { + struct tuple_field *field_a = tuple_format_field(a, i); + struct tuple_field *field_b = tuple_format_field(b, i); + if (field_a->type != field_b->type) + return (int)field_a->type - (int)field_b->type; + if (field_a->coll_id != field_b->coll_id) + return (int)field_a->coll_id - (int)field_b->coll_id; + if (field_a->nullable_action != field_b->nullable_action) + return (int)field_a->nullable_action - + (int)field_b->nullable_action; + if (field_a->is_key_part != field_b->is_key_part) + return (int)field_a->is_key_part - + (int)field_b->is_key_part; + } + + return 0; +} + +static uint32_t +tuple_format_hash(struct tuple_format *format) +{ +#define TUPLE_FIELD_MEMBER_HASH(field, member, h, carry, size) \ + PMurHash32_Process(&h, &carry, &field->member, \ + sizeof(field->member)); \ + size += sizeof(field->member); + + uint32_t h = 13; + uint32_t carry = 0; + uint32_t size = 0; + for (uint32_t i = 0; i < tuple_format_field_count(format); ++i) { + struct tuple_field *f = tuple_format_field(format, i); + TUPLE_FIELD_MEMBER_HASH(f, type, h, carry, size) + TUPLE_FIELD_MEMBER_HASH(f, coll_id, h, carry, size) + TUPLE_FIELD_MEMBER_HASH(f, nullable_action, h, carry, size) + TUPLE_FIELD_MEMBER_HASH(f, is_key_part, h, carry, size) + } +#undef TUPLE_FIELD_MEMBER_HASH + return PMurHash32_Result(h, carry, size); +} + +#define MH_SOURCE 1 +#define mh_name _tuple_format +#define mh_key_t struct tuple_format * +#define mh_node_t struct tuple_format * +#define mh_arg_t void * +#define mh_hash(a, arg) ((*(a))->hash) +#define mh_hash_key(a, arg) ((a)->hash) +#define mh_cmp(a, b, arg) (tuple_format_cmp(*(a), *(b))) +#define mh_cmp_key(a, b, arg) (tuple_format_cmp((a), *(b))) +#include "salad/mhash.h" + +static struct mh_tuple_format_t *tuple_formats_hash = NULL; + static struct tuple_field * tuple_field_new(void) { @@ -248,6 +314,7 @@ tuple_format_create(struct tuple_format *format, struct key_def * const *keys, !tuple_field_is_nullable(field)) bit_set(format->required_fields, field->id); } + format->hash = tuple_format_hash(format); return 0; } @@ -276,7 +343,12 @@ tuple_format_register(struct tuple_format *format) formats_capacity = new_capacity; tuple_formats = formats; } - if (formats_size == FORMAT_ID_MAX + 1) { + uint32_t formats_size_max = FORMAT_ID_MAX + 1; + struct errinj *inj = errinj(ERRINJ_TUPLE_FORMAT_COUNT, + ERRINJ_INT); + if (inj != NULL && inj->iparam > 0) + formats_size_max = inj->iparam; + if (formats_size >= formats_size_max) { diag_set(ClientError, ER_TUPLE_FORMAT_LIMIT, (unsigned) formats_capacity); return -1; @@ -389,9 +461,80 @@ tuple_format_destroy(struct tuple_format *format) tuple_dictionary_unref(format->dict); } +/** + * Try to reuse given format. This is only possible for formats + * of ephemeral spaces, since we need to be sure that shared + * dictionary will never be altered. If it can, then alter can + * affect another space, which shares a format with one which is + * altered. + * @param p_format Double pointer to format. It is updated with + * hashed value, if corresponding format was found + * in hash table + * @retval Returns true if format was found in hash table, false + * otherwise. + * + */ +static bool +tuple_format_reuse(struct tuple_format **p_format) +{ + struct tuple_format *format = *p_format; + if (!format->is_ephemeral) + return false; + /* + * These fields do not participate in hashing. + * Make sure they're unset. + */ + assert(format->dict->name_count == 0); + assert(format->is_temporary); + mh_int_t key = mh_tuple_format_find(tuple_formats_hash, format, + NULL); + if (key != mh_end(tuple_formats_hash)) { + struct tuple_format **entry = mh_tuple_format_node( + tuple_formats_hash, key); + tuple_format_destroy(format); + free(format); + *p_format = *entry; + return true; + } + return false; +} + +/** + * See justification, why ephemeral space's formats are + * only feasible for hasing. + * @retval 0 on success, even if format wasn't added to hash + * -1 in case of error. + */ +static int +tuple_format_add_to_hash(struct tuple_format *format) +{ + if(!format->is_ephemeral) + return 0; + assert(format->dict->name_count == 0); + assert(format->is_temporary); + mh_int_t key = mh_tuple_format_put(tuple_formats_hash, + (const struct tuple_format **)&format, + NULL, NULL); + if (key == mh_end(tuple_formats_hash)) { + diag_set(OutOfMemory, 0, "tuple_format_add_to_hash", + "tuple formats hash entry"); + return -1; + } + return 0; +} + +static void +tuple_format_remove_from_hash(struct tuple_format *format) +{ + mh_int_t key = mh_tuple_format_find(tuple_formats_hash, format, NULL); + if (key != mh_end(tuple_formats_hash)) + mh_tuple_format_del(tuple_formats_hash, key, NULL); +} + void tuple_format_delete(struct tuple_format *format) { + tuple_format_remove_from_hash(format); tuple_format_deregister(format); tuple_format_destroy(format); free(format); @@ -402,7 +545,8 @@ tuple_format_new(struct tuple_format_vtab *vtab, void *engine, struct key_def * const *keys, uint16_t key_count, const struct field_def *space_fields, uint32_t space_field_count, uint32_t exact_field_count, - struct tuple_dictionary *dict, bool is_temporary) + struct tuple_dictionary *dict, bool is_temporary, + bool is_ephemeral) { struct tuple_format *format = tuple_format_alloc(keys, key_count, space_field_count, dict); @@ -411,18 +555,24 @@ tuple_format_new(struct tuple_format_vtab *vtab, void *engine, format->vtab = *vtab; format->engine = engine; format->is_temporary = is_temporary; + format->is_ephemeral = is_ephemeral; format->exact_field_count = exact_field_count; - if (tuple_format_register(format) < 0) { - tuple_format_destroy(format); - free(format); - return NULL; - } if (tuple_format_create(format, keys, key_count, space_fields, - space_field_count) < 0) { - tuple_format_delete(format); - return NULL; + space_field_count) < 0) + goto err; + if (tuple_format_reuse(&format)) + return format; + if (tuple_format_register(format) < 0) + goto err; + if (tuple_format_add_to_hash(format) < 0) { + tuple_format_deregister(format); + goto err; } return format; +err: + tuple_format_destroy(format); + free(format); + return NULL; } bool @@ -606,6 +756,18 @@ tuple_format_min_field_count(struct key_def * const *keys, uint16_t key_count, return min_field_count; } +int +tuple_format_init() +{ + tuple_formats_hash = mh_tuple_format_new(); + if (tuple_formats_hash == NULL) { + diag_set(OutOfMemory, sizeof(struct mh_tuple_format_t), "malloc", + "tuple format hash"); + return -1; + } + return 0; +} + /** Destroy tuple format subsystem and free resourses */ void tuple_format_free() @@ -625,6 +787,7 @@ tuple_format_free() } } free(tuple_formats); + mh_tuple_format_delete(tuple_formats_hash); } void diff --git a/src/box/tuple_format.h b/src/box/tuple_format.h index d15fef267f8a..0d91119db3bf 100644 --- a/src/box/tuple_format.h +++ b/src/box/tuple_format.h @@ -143,6 +143,12 @@ struct tuple_format { void *engine; /** Identifier */ uint16_t id; + /** + * Hash computed from this format. Does not include the + * tuple dictionary. Used only for sharing formats among + * ephemeral spaces. + */ + uint32_t hash; /** Reference counter */ int refs; /** @@ -151,6 +157,11 @@ struct tuple_format { * in progress. */ bool is_temporary; + /** + * This format belongs to ephemeral space and thus might + * be shared with other ephemeral spaces. + */ + bool is_ephemeral; /** * Size of field map of tuple in bytes. * \sa struct tuple @@ -265,6 +276,7 @@ tuple_format_unref(struct tuple_format *format) * @param space_field_count Length of @a space_fields. * @param exact_field_count Exact field count for format. * @param is_temporary Set if format belongs to temporary space. + * @param is_ephemeral Set if format belongs to ephemeral space. * * @retval not NULL Tuple format. * @retval NULL Memory error. @@ -274,7 +286,8 @@ tuple_format_new(struct tuple_format_vtab *vtab, void *engine, struct key_def * const *keys, uint16_t key_count, const struct field_def *space_fields, uint32_t space_field_count, uint32_t exact_field_count, - struct tuple_dictionary *dict, bool is_temporary); + struct tuple_dictionary *dict, bool is_temporary, + bool is_ephemeral); /** * Check, if @a format1 can store any tuples of @a format2. For @@ -459,6 +472,13 @@ tuple_field_by_part_raw(struct tuple_format *format, const char *data, return tuple_field_raw(format, data, field_map, part->fieldno); } +/** + * Initialize tuple format subsystem. + * @retval 0 on success, -1 otherwise. + */ +int +tuple_format_init(); + #if defined(__cplusplus) } /* extern "C" */ #endif /* defined(__cplusplus) */ diff --git a/src/box/vinyl.c b/src/box/vinyl.c index 49e8372c6783..b66360d4e1d8 100644 --- a/src/box/vinyl.c +++ b/src/box/vinyl.c @@ -621,7 +621,8 @@ vinyl_engine_create_space(struct engine *engine, struct space_def *def, struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, NULL, keys, key_count, def->fields, def->field_count, - def->exact_field_count, def->dict, false); + def->exact_field_count, def->dict, false, + false); if (format == NULL) { free(space); return NULL; @@ -3045,7 +3046,7 @@ vy_send_lsm(struct vy_join_ctx *ctx, struct vy_lsm_recovery_info *lsm_info) goto out; ctx->format = tuple_format_new(&vy_tuple_format_vtab, NULL, &ctx->key_def, 1, NULL, 0, 0, NULL, - false); + false, false); if (ctx->format == NULL) goto out_free_key_def; tuple_format_ref(ctx->format); diff --git a/src/box/vy_lsm.c b/src/box/vy_lsm.c index 7a6ba6eda102..570e783ab44c 100644 --- a/src/box/vy_lsm.c +++ b/src/box/vy_lsm.c @@ -61,7 +61,8 @@ vy_lsm_env_create(struct vy_lsm_env *env, const char *path, void *upsert_thresh_arg) { env->key_format = tuple_format_new(&vy_tuple_format_vtab, NULL, - NULL, 0, NULL, 0, 0, NULL, false); + NULL, 0, NULL, 0, 0, NULL, false, + false); if (env->key_format == NULL) return -1; tuple_format_ref(env->key_format); @@ -155,7 +156,7 @@ vy_lsm_new(struct vy_lsm_env *lsm_env, struct vy_cache_env *cache_env, } else { lsm->disk_format = tuple_format_new(&vy_tuple_format_vtab, NULL, &cmp_def, 1, NULL, 0, 0, - NULL, false); + NULL, false, false); if (lsm->disk_format == NULL) goto fail_format; } diff --git a/src/errinj.h b/src/errinj.h index 39de63d19cf2..31e4dfb08297 100644 --- a/src/errinj.h +++ b/src/errinj.h @@ -123,6 +123,8 @@ struct errinj { _(ERRINJ_RELAY_BREAK_LSN, ERRINJ_INT, {.iparam = -1}) \ _(ERRINJ_WAL_BREAK_LSN, ERRINJ_INT, {.iparam = -1}) \ _(ERRINJ_VY_COMPACTION_DELAY, ERRINJ_BOOL, {.bparam = false}) \ + _(ERRINJ_TUPLE_FORMAT_COUNT, ERRINJ_INT, {.iparam = -1}) \ + _(ERRINJ_MEMTX_DELAY_GC, ERRINJ_BOOL, {.bparam = false}) \ ENUM0(errinj_id, ERRINJ_LIST); extern struct errinj errinjs[]; diff --git a/test/box/errinj.result b/test/box/errinj.result index 12303670e858..ccb9affe8932 100644 --- a/test/box/errinj.result +++ b/test/box/errinj.result @@ -46,8 +46,12 @@ errinj.info() state: false ERRINJ_WAL_FALLOCATE: state: 0 + ERRINJ_RELAY_EXIT_DELAY: + state: 0 ERRINJ_SNAP_COMMIT_DELAY: state: false + ERRINJ_TUPLE_FORMAT_COUNT: + state: -1 ERRINJ_TUPLE_ALLOC: state: false ERRINJ_VY_RUN_WRITE_DELAY: @@ -92,8 +96,8 @@ errinj.info() state: false ERRINJ_VY_POINT_ITER_WAIT: state: false - ERRINJ_RELAY_EXIT_DELAY: - state: 0 + ERRINJ_MEMTX_DELAY_GC: + state: false ERRINJ_IPROTO_TX_DELAY: state: false ERRINJ_BUILD_INDEX: diff --git a/test/sql/errinj.result b/test/sql/errinj.result index cb993f8ce83b..c423c8bc6b61 100644 --- a/test/sql/errinj.result +++ b/test/sql/errinj.result @@ -16,6 +16,42 @@ errinj = box.error.injection fiber = require('fiber') --- ... +-- gh-3924 Check that tuple_formats of ephemeral spaces are +-- reused. +box.sql.execute("CREATE TABLE t4 (id INTEGER PRIMARY KEY, a INTEGER);") +--- +... +box.sql.execute("INSERT INTO t4 VALUES (1,1)") +--- +... +box.sql.execute("INSERT INTO t4 VALUES (2,1)") +--- +... +box.sql.execute("INSERT INTO t4 VALUES (3,2)") +--- +... +errinj.set('ERRINJ_TUPLE_FORMAT_COUNT', 200) +--- +- ok +... +errinj.set('ERRINJ_MEMTX_DELAY_GC', true) +--- +- ok +... +for i = 1, 201 do box.sql.execute("SELECT DISTINCT a FROM t4") end +--- +... +errinj.set('ERRINJ_MEMTX_DELAY_GC', false) +--- +- ok +... +errinj.set('ERRINJ_TUPLE_FORMAT_COUNT', -1) +--- +- ok +... +box.sql.execute('DROP TABLE t4') +--- +... box.sql.execute('create table test (id int primary key, a float, b text)') --- ... diff --git a/test/sql/errinj.test.lua b/test/sql/errinj.test.lua index fa7f9f2d63d7..8378c255c5df 100644 --- a/test/sql/errinj.test.lua +++ b/test/sql/errinj.test.lua @@ -5,6 +5,19 @@ box.sql.execute('pragma sql_default_engine=\''..engine..'\'') errinj = box.error.injection fiber = require('fiber') +-- gh-3924 Check that tuple_formats of ephemeral spaces are +-- reused. +box.sql.execute("CREATE TABLE t4 (id INTEGER PRIMARY KEY, a INTEGER);") +box.sql.execute("INSERT INTO t4 VALUES (1,1)") +box.sql.execute("INSERT INTO t4 VALUES (2,1)") +box.sql.execute("INSERT INTO t4 VALUES (3,2)") +errinj.set('ERRINJ_TUPLE_FORMAT_COUNT', 200) +errinj.set('ERRINJ_MEMTX_DELAY_GC', true) +for i = 1, 201 do box.sql.execute("SELECT DISTINCT a FROM t4") end +errinj.set('ERRINJ_MEMTX_DELAY_GC', false) +errinj.set('ERRINJ_TUPLE_FORMAT_COUNT', -1) +box.sql.execute('DROP TABLE t4') + box.sql.execute('create table test (id int primary key, a float, b text)') box.schema.user.grant('guest','read,write,execute', 'universe') cn = remote.connect(box.cfg.listen) diff --git a/test/unit/vy_iterators_helper.c b/test/unit/vy_iterators_helper.c index 6808ff408e5d..55d8504bb3e2 100644 --- a/test/unit/vy_iterators_helper.c +++ b/test/unit/vy_iterators_helper.c @@ -22,7 +22,7 @@ vy_iterator_C_test_init(size_t cache_size) vy_cache_env_create(&cache_env, cord_slab_cache()); vy_cache_env_set_quota(&cache_env, cache_size); vy_key_format = tuple_format_new(&vy_tuple_format_vtab, NULL, NULL, 0, - NULL, 0, 0, NULL, false); + NULL, 0, 0, NULL, false, false); tuple_format_ref(vy_key_format); size_t mem_size = 64 * 1024 * 1024; @@ -202,7 +202,8 @@ create_test_mem(struct key_def *def) struct key_def * const defs[] = { def }; struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, NULL, defs, - def->part_count, NULL, 0, 0, NULL, false); + def->part_count, NULL, 0, 0, NULL, false, + false); fail_if(format == NULL); /* Create mem */ @@ -220,7 +221,7 @@ create_test_cache(uint32_t *fields, uint32_t *types, assert(*def != NULL); vy_cache_create(cache, &cache_env, *def, true); *format = tuple_format_new(&vy_tuple_format_vtab, NULL, def, 1, NULL, 0, - 0, NULL, false); + 0, NULL, false, false); tuple_format_ref(*format); } diff --git a/test/unit/vy_mem.c b/test/unit/vy_mem.c index a13c58bbad5e..b8272eac923e 100644 --- a/test/unit/vy_mem.c +++ b/test/unit/vy_mem.c @@ -78,7 +78,8 @@ test_iterator_restore_after_insertion() /* Create format */ struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, NULL, &key_def, 1, NULL, - 0, 0, NULL, false); + 0, 0, NULL, false, + false); assert(format != NULL); tuple_format_ref(format); diff --git a/test/unit/vy_point_lookup.c b/test/unit/vy_point_lookup.c index c19d3071ce1c..8ed16f6c0b86 100644 --- a/test/unit/vy_point_lookup.c +++ b/test/unit/vy_point_lookup.c @@ -85,7 +85,8 @@ test_basic() vy_cache_create(&cache, &cache_env, key_def, true); struct tuple_format *format = tuple_format_new(&vy_tuple_format_vtab, NULL, &key_def, 1, NULL, - 0, 0, NULL, false); + 0, 0, NULL, false, + false); isnt(format, NULL, "tuple_format_new is not NULL"); tuple_format_ref(format);