Skip to content

Commit

Permalink
Optimize tuple_to_string with libyaml
Browse files Browse the repository at this point in the history
Change implementation of tuple_to_string

Old variant coverted tuple to lua table, then encoded to yaml
New variant encodes tuple to yaml.
Add new function to API box_tuple_to_string

Relates #128

Results of benchmarking with:

	old: 1m27.555s
	new: 0m50.830s
	Acceleration 43%
  • Loading branch information
ilmarkov committed Jun 16, 2017
1 parent 402410d commit c2af4fa
Show file tree
Hide file tree
Showing 5 changed files with 250 additions and 3 deletions.
1 change: 1 addition & 0 deletions extra/exports
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ box_tuple_extract_key
box_tuple_compare
box_tuple_compare_with_key
box_return_tuple
box_tuple_to_string
box_space_id_by_name
box_index_id_by_name
box_select
Expand Down
7 changes: 4 additions & 3 deletions src/box/lua/tuple.lua
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ box_tuple_update(box_tuple_t *tuple, const char *expr, const char *expr_end);

box_tuple_t *
box_tuple_upsert(box_tuple_t *tuple, const char *expr, const char *expr_end);

char *
box_tuple_to_string(box_tuple_t *tuple);
]]

local builtin = ffi.C
Expand Down Expand Up @@ -310,9 +313,7 @@ ffi.metatype(tuple_t, {
return builtin.box_tuple_field_count(tuple)
end;
__tostring = function(tuple)
-- Unpack tuple, call yaml.encode, remove yaml header and footer
-- 5 = '---\n\n' (header), -6 = '\n...\n' (footer)
return yaml.encode(methods.totable(tuple)):sub(5, -6)
return ffi.string(builtin.box_tuple_to_string(tuple)):sub(5, -6)
end;
__index = function(tuple, key)
if type(key) == "number" then
Expand Down
7 changes: 7 additions & 0 deletions src/box/tuple.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,13 @@ box_tuple_to_buf(const box_tuple_t *tuple, char *buf, size_t size)
return tuple_to_buf(tuple, buf, size);
}

char *
box_tuple_to_string(const box_tuple_t *tuple)
{
assert(tuple != NULL);
return tuple_to_yaml(tuple);
}

box_tuple_format_t *
box_tuple_format(const box_tuple_t *tuple)
{
Expand Down
13 changes: 13 additions & 0 deletions src/box/tuple.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,16 @@ box_tuple_format(const box_tuple_t *tuple);
const char *
box_tuple_field(const box_tuple_t *tuple, uint32_t fieldno);

/**
* Convert tuple to yaml string
*
* \param tuple tuple
* \retval NULL in case of error written in diag
* \retval pointer to string allocated on fiber()->gc region
*/
char *
box_tuple_to_string(const box_tuple_t *tuple);

/**
* Tuple iterator
*/
Expand Down Expand Up @@ -710,6 +720,9 @@ tuple_to_obuf(struct tuple *tuple, struct obuf *buf);
ssize_t
tuple_to_buf(const struct tuple *tuple, char *buf, size_t size);

char *
tuple_to_yaml(const struct tuple *tuple);

#if defined(__cplusplus)
} /* extern "C" */

Expand Down
225 changes: 225 additions & 0 deletions src/box/tuple_convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,15 @@
*/
#include "tuple.h"
#include "iobuf.h"
#include <msgpuck/msgpuck.h>
#include "third_party/libyaml/include/yaml.h"
#include "third_party/utf8.h"
#include "third_party/base64.h"
#include <small/region.h>
#include "fiber.h"
#include "lua/utils.h"

#define ENCODE_NUMBER_PRECISION 14

int
tuple_to_obuf(struct tuple *tuple, struct obuf *buf)
Expand All @@ -53,3 +62,219 @@ tuple_to_buf(const struct tuple *tuple, char *buf, size_t size)
}
return bsize;
}

int
append_output(void *arg, unsigned char *buf, size_t len)
{
(void) arg;
char *buf_out = region_alloc(&fiber()->gc, len);
if (!buf_out) {
diag_set(OutOfMemory, len , "region_alloc", "append_output");
return 0;
}
memcpy(buf_out, buf, len);
return 1;
}

static int
encode_node(yaml_emitter_t *emitter, const char **data);

static int
encode_table(yaml_emitter_t *emitter, const char **data){
yaml_event_t ev;
yaml_mapping_style_t yaml_style = YAML_FLOW_MAPPING_STYLE;
if (!yaml_mapping_start_event_initialize(&ev, NULL, NULL, 0, yaml_style)
|| !yaml_emitter_emit(emitter, &ev)) {
diag_set(SystemError, "failed to init event libyaml");
return 0;
}

uint32_t size = mp_decode_map(data);
for (uint32_t i = 0; i < size; i++) {
if (!encode_node(emitter, data))
return 0;
if (!encode_node(emitter, data))
return 0;
}

if (!yaml_mapping_end_event_initialize(&ev) ||
!yaml_emitter_emit(emitter, &ev)) {
diag_set(SystemError, "failed to end event libyaml");
return 0;
}

return 1;
}


static int
encode_array(yaml_emitter_t *emitter, const char **data){
yaml_event_t ev;
yaml_sequence_style_t yaml_style = YAML_FLOW_SEQUENCE_STYLE;
if (!yaml_sequence_start_event_initialize(&ev, NULL, NULL, 0, // mb sth TODO with anchor
yaml_style) ||
!yaml_emitter_emit(emitter, &ev)) {
diag_set(SystemError, "failed to init event libyaml");
return 0;
}

uint32_t size = mp_decode_array(data);
for (uint32_t i = 0; i < size; i++) {
if (!encode_node(emitter, data))
return 0;
}

if (!yaml_sequence_end_event_initialize(&ev) ||
!yaml_emitter_emit(emitter, &ev)) {
diag_set(SystemError, "failed to end event libyaml");
return 0;
}

return 1;
}

static int
encode_node(yaml_emitter_t *emitter, const char **data)
{
size_t len = 0;
const char *str = "";
yaml_char_t *tag = NULL;
yaml_event_t ev;
yaml_scalar_style_t style = YAML_PLAIN_SCALAR_STYLE;
int is_binary = 0;
char buf[FPCONV_G_FMT_BUFSIZE];
char *binary_encode = NULL;
int type = mp_typeof(**data);
switch(type) {
case MP_UINT:
len = snprintf(buf, sizeof(buf) - 1, "%llu",
(unsigned long long) mp_decode_uint(data));
buf[len] = 0;
str = buf;
break;
case MP_INT:
len = snprintf(buf, sizeof(buf) - 1, "%lld",
(long long) mp_decode_int(data));
buf[len] = 0;
str = buf;
break;
case MP_FLOAT:
fpconv_g_fmt(buf, mp_decode_float(data), ENCODE_NUMBER_PRECISION);
str = buf;
len = strlen(buf);
break;
case MP_DOUBLE:
fpconv_g_fmt(buf, mp_decode_double(data), ENCODE_NUMBER_PRECISION);
str = buf;
len = strlen(buf);
break;
case MP_ARRAY:
return encode_array(emitter, data);
case MP_MAP:
return encode_table(emitter, data);
case MP_STR:
len = mp_decode_strl(data);
str = *data;
style = YAML_ANY_SCALAR_STYLE;
if (check_utf8((const yaml_char_t *) str, len)) {
style = YAML_SINGLE_QUOTED_SCALAR_STYLE;
*data += len;
break;
}
case MP_BIN:
if (type != MP_STR) {
len = mp_decode_binl(data);
}
/* Binary or not UTF8 */
is_binary = 1;
binary_encode = (char *) malloc(base64_bufsize(len));
if (!binary_encode) {
diag_set(OutOfMemory, base64_bufsize(len),
"malloc", "encode_node");
return 0;
}
base64_encode(str, len, binary_encode, base64_bufsize(len));
str = binary_encode;
tag = (yaml_char_t *) "binary";
*data += len;
break;
case MP_BOOL:
if (mp_decode_bool(data)) {
str = "true";
len = 4;
} else {
str = "false";
len = 5;
}
break;
case MP_NIL:
style = YAML_PLAIN_SCALAR_STYLE;
str = "null";
len = 4;
break;
case MP_EXT:
mp_next(data);
break;
}

if (!yaml_scalar_event_initialize(&ev, NULL, tag, (unsigned char *)str,
len, !is_binary, !is_binary, style) ||
!yaml_emitter_emit(emitter, &ev)) {
diag_set(OutOfMemory, len, "yaml_scalar_event_initialize",
"encode_node");
return 1;
}

if (is_binary) {
free(binary_encode);
}
return 1;
}

char *
tuple_to_yaml(const struct tuple *tuple){
const char *data = tuple_data(tuple);
yaml_emitter_t emitter;
yaml_event_t ev;

size_t used = region_used(&fiber()->gc);

if (!yaml_emitter_initialize(&emitter)) {
diag_set(SystemError, "failed to init libyaml");
return NULL;
}
yaml_emitter_set_unicode(&emitter, 1);
yaml_emitter_set_indent(&emitter, 2);
yaml_emitter_set_width(&emitter, 2);
yaml_emitter_set_break(&emitter, YAML_LN_BREAK);
yaml_emitter_set_output(&emitter, &append_output, NULL);

if (!yaml_stream_start_event_initialize(&ev, YAML_UTF8_ENCODING) ||
!yaml_emitter_emit(&emitter, &ev) ||
!yaml_document_start_event_initialize(&ev, NULL, NULL, NULL, 0) ||
!yaml_emitter_emit(&emitter, &ev)
) {
diag_set(SystemError, "failed to init event libyaml");
return NULL;
}
if (!encode_node(&emitter, &data)) {
return NULL;
}
if (!yaml_document_end_event_initialize(&ev, 0) ||
!yaml_emitter_emit(&emitter, &ev) ||
!yaml_stream_end_event_initialize(&ev) ||
!yaml_emitter_emit(&emitter, &ev) ||
!yaml_emitter_flush(&emitter)) {
diag_set(SystemError, "failed to end event libyaml");
return NULL;
}
yaml_emitter_delete(&emitter);
size_t total_len = region_used(&fiber()->gc) - used;
char *buf = (char *) region_join(&fiber()->gc, total_len);
if (!buf) {
diag_set(OutOfMemory, total_len, "region_join", "tuple_to_yaml");
return NULL;
}
buf[total_len] = 0;
return buf;
}

0 comments on commit c2af4fa

Please sign in to comment.