Skip to content

Commit

Permalink
box: introduce functional indexes
Browse files Browse the repository at this point in the history
Closes #1260

@TarantoolBot document
Title: introduce functional indexes in memtx
Now you can define a functional index using a registered persistent
function.

There are restrictions for function and key definition for
a functional index:
 - the referenced function must be persistent, deterministic
   and must return a scalar type or an array.
 - you must define key parts which describe the function return value
 - the function must return data which types match the
   defined key parts
 - the function may return multiple keys; this would be a multikey
   functional index; each key entry is indexed separately;
 - for multikey functional indexes, the key definition should
   start with part 1 and cover all returned key parts
 - key parts can't use JSON paths.
 - the function used for the functional index can not access tuple
   fields by name, only by index.

Functional index can't be primary.
It is not possible to change the used function after a functional index
is defined on it. The index must be dropped first.

To define a functional multikey index, just return multiple values
from the function. Each value (even when it is a single scalar must
be returned as a table i.e. {1}) and must match the key definition.

Example:
s = box.schema.space.create('withdata')
s:format({{name = 'name', type = 'string'},
          {name = 'address', type = 'string'}})
pk = s:create_index('name', {parts = {1, 'string'}})
lua_code = [[function(tuple)
                local address = string.split(tuple[2])
                local ret = {}
                for _, v in pairs(address) do
			table.insert(ret, {utf8.upper(v)})
		end
                return unpack(ret)
             end]]
box.schema.func.create('address', {body = lua_code,
                       is_deterministic = true, is_sandboxed = true})
idx = s:create_index('addr', {unique = false,
                     func = 'address',
                     parts = {{1, 'string', collation = 'unicode_ci'}}})
s:insert({"James", "SIS Building Lambeth London UK"})
s:insert({"Sherlock", "221B Baker St Marylebone London NW1 6XE UK"})
idx:select('Uk')
---
- - ['James', 'SIS Building Lambeth London UK']
  - ['Sherlock', '221B Baker St Marylebone London NW1 6XE UK']
...
  • Loading branch information
kshcherbatov committed Jul 25, 2019
1 parent 4a5168e commit 8809f70
Show file tree
Hide file tree
Showing 53 changed files with 2,217 additions and 108 deletions.
1 change: 1 addition & 0 deletions src/box/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ add_library(box STATIC
fk_constraint.c
func.c
func_def.c
key_list.c
alter.cc
schema.cc
schema_def.c
Expand Down
95 changes: 94 additions & 1 deletion src/box/alter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "fiber.h" /* for gc_pool */
#include "scoped_guard.h"
#include "third_party/base64.h"
#include "memtx_engine.h"
#include <new> /* for placement new */
#include <stdio.h> /* snprintf() */
#include <ctype.h>
Expand Down Expand Up @@ -285,7 +286,8 @@ index_def_new_from_tuple(struct tuple *tuple, struct space *space)
space->def->fields,
space->def->field_count, &fiber()->gc) != 0)
diag_raise();
key_def = key_def_new(part_def, part_count);
key_def = key_def_new(part_def, part_count, opts.func_id > 0,
opts.is_multikey);
if (key_def == NULL)
diag_raise();
struct index_def *index_def =
Expand Down Expand Up @@ -1370,6 +1372,30 @@ RebuildIndex::~RebuildIndex()
index_def_delete(new_index_def);
}

/**
* RebuildFuncIndex - prepare functional index definition,
* drop the old index data and rebuild index from by reading the
* primary key.
*/
class RebuildFuncIndex: public RebuildIndex
{
struct index_def *
func_index_def_new(struct index_def *index_def,
struct func *func)
{
struct index_def *new_index_def = index_def_dup_xc(index_def);
index_def_set_func(new_index_def, func);
return new_index_def;
}
public:
RebuildFuncIndex(struct alter_space *alter,
struct index_def *old_index_def_arg,
struct func *func) :
RebuildIndex(alter,
func_index_def_new(old_index_def_arg, func),
old_index_def_arg) {}
};

/** TruncateIndex - truncate an index. */
class TruncateIndex: public AlterSpaceOp
{
Expand Down Expand Up @@ -2841,6 +2867,12 @@ on_replace_dd_func(struct trigger * /* trigger */, void *event)
(unsigned) old_func->def->uid,
"function has grants");
}
if (old_func != NULL &&
space_has_data(BOX_FUNC_INDEX_ID, 1, old_func->def->fid)) {
tnt_raise(ClientError, ER_DROP_FUNCTION,
(unsigned) old_func->def->uid,
"function has references");
}
struct trigger *on_commit =
txn_alter_trigger_new(on_drop_func_commit, old_func);
struct trigger *on_rollback =
Expand Down Expand Up @@ -4689,6 +4721,63 @@ on_replace_dd_ck_constraint(struct trigger * /* trigger*/, void *event)
trigger_run_xc(&on_alter_space, space);
}

/** A trigger invoked on replace in the _func_index space. */
static void
on_replace_dd_func_index(struct trigger *trigger, void *event)
{
(void) trigger;
struct txn *txn = (struct txn *) event;
struct txn_stmt *stmt = txn_current_stmt(txn);
struct tuple *old_tuple = stmt->old_tuple;
struct tuple *new_tuple = stmt->new_tuple;

struct alter_space *alter = NULL;
struct func *func = NULL;
struct index *index;
struct space *space;
if (old_tuple == NULL && new_tuple != NULL) {
uint32_t space_id = tuple_field_u32_xc(new_tuple,
BOX_FUNC_INDEX_FIELD_SPACE_ID);
uint32_t index_id = tuple_field_u32_xc(new_tuple,
BOX_FUNC_INDEX_FIELD_INDEX_ID);
uint32_t fid = tuple_field_u32_xc(new_tuple,
BOX_FUNC_INDEX_FUNCTION_ID);
space = space_cache_find_xc(space_id);
index = index_find_xc(space, index_id);
func = func_cache_find(fid);
if (func->def->language != FUNC_LANGUAGE_LUA ||
func->def->body == NULL || !func->def->is_deterministic ||
!func->def->is_sandboxed) {
tnt_raise(ClientError, ER_WRONG_INDEX_OPTIONS, 0,
"referenced function doesn't satisfy "
"functional index constraints");
}
} else if (old_tuple != NULL && new_tuple == NULL) {
uint32_t space_id = tuple_field_u32_xc(old_tuple,
BOX_FUNC_INDEX_FIELD_SPACE_ID);
uint32_t index_id = tuple_field_u32_xc(old_tuple,
BOX_FUNC_INDEX_FIELD_INDEX_ID);
space = space_cache_find_xc(space_id);
index = index_find_xc(space, index_id);
func = NULL;
} else {
assert(old_tuple != NULL && new_tuple != NULL);
tnt_raise(ClientError, ER_UNSUPPORTED, "func_index", "alter");
}

alter = alter_space_new(space);
auto scoped_guard = make_scoped_guard([=] {alter_space_delete(alter);});
alter_space_move_indexes(alter, 0, index->def->iid);
(void) new RebuildFuncIndex(alter, index->def, func);
alter_space_move_indexes(alter, index->def->iid + 1,
space->index_id_max + 1);
(void) new MoveCkConstraints(alter);
(void) new UpdateSchemaVersion(alter);
alter_space_do(txn, alter);

scoped_guard.is_active = false;
}

struct trigger alter_space_on_replace_space = {
RLIST_LINK_INITIALIZER, on_replace_dd_space, NULL, NULL
};
Expand Down Expand Up @@ -4749,4 +4838,8 @@ struct trigger on_replace_ck_constraint = {
RLIST_LINK_INITIALIZER, on_replace_dd_ck_constraint, NULL, NULL
};

struct trigger on_replace_func_index = {
RLIST_LINK_INITIALIZER, on_replace_dd_func_index, NULL, NULL
};

/* vim: set foldmethod=marker */
1 change: 1 addition & 0 deletions src/box/alter.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,5 +47,6 @@ extern struct trigger on_replace_space_sequence;
extern struct trigger on_replace_trigger;
extern struct trigger on_replace_fk_constraint;
extern struct trigger on_replace_ck_constraint;
extern struct trigger on_replace_func_index;

#endif /* INCLUDES_TARANTOOL_BOX_ALTER_H */
Binary file modified src/box/bootstrap.snap
Binary file not shown.
1 change: 1 addition & 0 deletions src/box/errcode.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ struct errcode_record {
/*195 */_(ER_CREATE_CK_CONSTRAINT, "Failed to create check constraint '%s': %s") \
/*196 */_(ER_CK_CONSTRAINT_FAILED, "Check constraint failed '%s': %s") \
/*197 */_(ER_SQL_COLUMN_COUNT, "Unequal number of entries in row expression: left side has %u, but right side - %u") \
/*198 */_(ER_FUNC_INDEX_FUNC, "Failed to build a key for functional index '%s': %s") \

/*
* !IMPORTANT! Please follow instructions at start of the file
Expand Down
28 changes: 28 additions & 0 deletions src/box/index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,16 @@ generic_index_replace(struct index *index, struct tuple *old_tuple,
return -1;
}

struct iterator *
generic_index_create_iterator(struct index *base, enum iterator_type type,
const char *key, uint32_t part_count)
{
(void) type; (void) key; (void) part_count;
diag_set(UnsupportedIndexFeature, base->def, "read view");
return NULL;
}


struct snapshot_iterator *
generic_index_create_snapshot_iterator(struct index *index)
{
Expand Down Expand Up @@ -729,4 +739,22 @@ generic_index_end_build(struct index *)
{
}

int
disabled_index_build_next(struct index *index, struct tuple *tuple)
{
(void) index; (void) tuple;
return 0;
}

int
disabled_index_replace(struct index *index, struct tuple *old_tuple,
struct tuple *new_tuple, enum dup_replace_mode mode,
struct tuple **result)
{
(void) old_tuple; (void) new_tuple; (void) mode;
(void) index;
*result = NULL;
return 0;
}

/* }}} */
9 changes: 9 additions & 0 deletions src/box/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -685,8 +685,17 @@ void generic_index_compact(struct index *);
void generic_index_reset_stat(struct index *);
void generic_index_begin_build(struct index *);
int generic_index_reserve(struct index *, uint32_t);
struct iterator *
generic_index_create_iterator(struct index *base, enum iterator_type type,
const char *key, uint32_t part_count);
int generic_index_build_next(struct index *, struct tuple *);
void generic_index_end_build(struct index *);
int
disabled_index_build_next(struct index *index, struct tuple *tuple);
int
disabled_index_replace(struct index *index, struct tuple *old_tuple,
struct tuple *new_tuple, enum dup_replace_mode mode,
struct tuple **result);

#if defined(__cplusplus)
} /* extern "C" */
Expand Down
9 changes: 9 additions & 0 deletions src/box/index_def.c
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ const struct index_opts index_opts_default = {
/* .bloom_fpr = */ 0.05,
/* .lsn = */ 0,
/* .stat = */ NULL,
/* .func = */ 0,
/* .is_multikey = */ false,
};

const struct opt_def index_opts_reg[] = {
Expand All @@ -63,6 +65,8 @@ const struct opt_def index_opts_reg[] = {
OPT_DEF("run_size_ratio", OPT_FLOAT, struct index_opts, run_size_ratio),
OPT_DEF("bloom_fpr", OPT_FLOAT, struct index_opts, bloom_fpr),
OPT_DEF("lsn", OPT_INT64, struct index_opts, lsn),
OPT_DEF("func", OPT_UINT32, struct index_opts, func_id),
OPT_DEF("is_multikey", OPT_BOOL, struct index_opts, is_multikey),
OPT_DEF_LEGACY("sql"),
OPT_END,
};
Expand Down Expand Up @@ -296,6 +300,11 @@ index_def_is_valid(struct index_def *index_def, const char *space_name)
space_name, "primary key cannot be multikey");
return false;
}
if (index_def->iid == 0 && key_def_is_for_func_index(index_def->key_def)) {
diag_set(ClientError, ER_MODIFY_INDEX, index_def->name,
space_name, "primary key can not use a function");
return false;
}
for (uint32_t i = 0; i < index_def->key_def->part_count; i++) {
assert(index_def->key_def->parts[i].type < field_type_MAX);
if (index_def->key_def->parts[i].fieldno > BOX_INDEX_FIELD_MAX) {
Expand Down
20 changes: 20 additions & 0 deletions src/box/index_def.h
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,10 @@ struct index_opts {
* filled after running ANALYZE command.
*/
struct index_stat *stat;
/** Identifier of the functional index function. */
uint32_t func_id;
/** Whether functional index extractor is multikey. */
bool is_multikey;
};

extern const struct index_opts index_opts_default;
Expand Down Expand Up @@ -207,6 +211,10 @@ index_opts_cmp(const struct index_opts *o1, const struct index_opts *o2)
return o1->run_size_ratio < o2->run_size_ratio ? -1 : 1;
if (o1->bloom_fpr != o2->bloom_fpr)
return o1->bloom_fpr < o2->bloom_fpr ? -1 : 1;
if (o1->func_id != o2->func_id)
return o1->func_id - o2->func_id;
if (o1->is_multikey != o2->is_multikey)
return o1->is_multikey - o2->is_multikey;
return 0;
}

Expand Down Expand Up @@ -298,6 +306,18 @@ index_def_update_optionality(struct index_def *def, uint32_t min_field_count)
key_def_update_optionality(def->cmp_def, min_field_count);
}

/**
* Update func pointer for functional index key definitions.
* @param def Index def, containing key definitions to update.
* @param func The functional index function pointer.
*/
static inline void
index_def_set_func(struct index_def *def, struct func *func)
{
def->key_def->func_index_func = func;
def->cmp_def->func_index_func = func;
}

/**
* Add an index definition to a list, preserving the
* first position of the primary key.
Expand Down
33 changes: 25 additions & 8 deletions src/box/key_def.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,8 @@ key_def_set_part(struct key_def *def, uint32_t part_no, uint32_t fieldno,
}

struct key_def *
key_def_new(const struct key_part_def *parts, uint32_t part_count)
key_def_new(const struct key_part_def *parts, uint32_t part_count,
bool is_functional, bool is_multikey)
{
size_t sz = 0;
for (uint32_t i = 0; i < part_count; i++)
Expand All @@ -255,7 +256,6 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)

def->part_count = part_count;
def->unique_part_count = part_count;

/* A pointer to the JSON paths data in the new key_def. */
char *path_pool = (char *)def + key_def_sizeof(part_count, 0);
for (uint32_t i = 0; i < part_count; i++) {
Expand All @@ -266,8 +266,7 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
if (coll_id == NULL) {
diag_set(ClientError, ER_WRONG_INDEX_OPTIONS,
i + 1, "collation was not found by ID");
key_def_delete(def);
return NULL;
goto error;
}
coll = coll_id->coll;
}
Expand All @@ -276,14 +275,25 @@ key_def_new(const struct key_part_def *parts, uint32_t part_count)
part->nullable_action, coll, part->coll_id,
part->sort_order, part->path, path_len,
&path_pool, TUPLE_OFFSET_SLOT_NIL,
0) != 0) {
key_def_delete(def);
return NULL;
0) != 0)
goto error;
}
if (is_functional) {
def->is_multikey = is_multikey;
def->part_count_for_func_index = part_count;
if (!key_def_is_sequential(def) || parts->fieldno != 0 ||
def->has_json_paths) {
diag_set(ClientError, ER_WRONG_INDEX_OPTIONS, 0,
"invalid functional key definition");
goto error;
}
}
assert(path_pool == (char *)def + sz);
key_def_set_func(def);
return def;
error:
key_def_delete(def);
return NULL;
}

int
Expand Down Expand Up @@ -677,6 +687,7 @@ key_def_find_by_fieldno(const struct key_def *key_def, uint32_t fieldno)
const struct key_part *
key_def_find(const struct key_def *key_def, const struct key_part *to_find)
{
assert(!key_def_is_for_func_index(key_def));
const struct key_part *part = key_def->parts;
const struct key_part *end = part + key_def->part_count;
for (; part != end; part++) {
Expand Down Expand Up @@ -708,6 +719,9 @@ static bool
key_def_can_merge(const struct key_def *key_def,
const struct key_part *to_merge)
{
if (key_def_is_for_func_index(key_def))
return true;

const struct key_part *part = key_def_find(key_def, to_merge);
if (part == NULL)
return true;
Expand All @@ -722,6 +736,7 @@ key_def_can_merge(const struct key_def *key_def,
struct key_def *
key_def_merge(const struct key_def *first, const struct key_def *second)
{
assert(!key_def_is_for_func_index(second));
uint32_t new_part_count = first->part_count + second->part_count;
/*
* Find and remove part duplicates, i.e. parts counted
Expand Down Expand Up @@ -754,6 +769,8 @@ key_def_merge(const struct key_def *first, const struct key_def *second)
new_def->has_optional_parts = first->has_optional_parts ||
second->has_optional_parts;
new_def->is_multikey = first->is_multikey || second->is_multikey;
new_def->part_count_for_func_index = first->part_count_for_func_index;
new_def->func_index_func = first->func_index_func;

/* JSON paths data in the new key_def. */
char *path_pool = (char *)new_def + key_def_sizeof(new_part_count, 0);
Expand Down Expand Up @@ -826,7 +843,7 @@ key_def_find_pk_in_cmp_def(const struct key_def *cmp_def,
}

/* Finally, allocate the new key definition. */
extracted_def = key_def_new(parts, pk_def->part_count);
extracted_def = key_def_new(parts, pk_def->part_count, false, false);
out:
region_truncate(region, region_svp);
return extracted_def;
Expand Down

0 comments on commit 8809f70

Please sign in to comment.