Permalink
Browse files

discodb: add ddb_deltalist - value lists are now stored in deltalists…

… instead of lists during discodb construction, resulting to 50-75% smaller memory footprint in the many-values-per-key case
  • Loading branch information...
1 parent 341313b commit 40786e1ab68309ef96d1a249b7cc851e42f7af0a Ville Tuulos committed Jun 21, 2011
@@ -10,7 +10,7 @@
#include <ddb_profile.h>
#include <ddb_map.h>
-#include <ddb_list.h>
+#include <ddb_deltalist.h>
#include <ddb_delta.h>
#include <ddb_cmph.h>
@@ -98,8 +98,10 @@ static int pack_key2values(struct ddb_packed *pack,
const struct ddb_map *keys_map,
int unique_items)
{
- char *buf = NULL;
- uint64_t buf_size = 0;
+ valueid_t *values = NULL;
+ uint64_t values_size = 0;
+ char *dbuf = NULL;
+ uint64_t dbuf_size = 0;
int i, ret = -1;
uint32_t num = pack->head->num_keys;
@@ -108,13 +110,25 @@ static int pack_key2values(struct ddb_packed *pack,
for (i = 0; i < num; i++){
uint64_t *ptr = ddb_map_lookup_str(keys_map, &keys[i]);
- const struct ddb_list *values = (const struct ddb_list*)*ptr;
- uint64_t size = 0;
+ uint64_t size, num_values;
+ uint32_t num_written;
int duplicates = 0;
- uint32_t num_written = 0;
- if (ddb_delta_encode(values, &buf, &buf_size, &size,
- &num_written, &duplicates, unique_items))
+ const struct ddb_deltalist *d = (const struct ddb_deltalist*)*ptr;
+ if (ddb_deltalist_to_array(d, &num_values, &values, &values_size))
+ goto end;
+
+ if (num_values > UINT32_MAX)
+ goto end;
+
+ if (ddb_delta_encode(values,
+ (uint32_t)num_values,
+ &dbuf,
+ &dbuf_size,
+ &size,
+ &num_written,
+ &duplicates,
+ unique_items))
goto end;
pack->head->num_values += num_written;
@@ -127,13 +141,14 @@ static int pack_key2values(struct ddb_packed *pack,
goto end;
if (buffer_write_data(pack, keys[i].data, keys[i].length))
goto end;
- if (buffer_write_data(pack, buf, size))
+ if (buffer_write_data(pack, dbuf, size))
goto end;
}
buffer_toc_mark(pack);
ret = 0;
end:
- free(buf);
+ free(values);
+ free(dbuf);
return ret;
}
@@ -306,6 +321,7 @@ static int maybe_disable_compression(const struct ddb_cons *cons)
return 0;
}
+
char *ddb_cons_finalize(struct ddb_cons *cons, uint64_t *length, uint64_t flags)
{
struct ddb_packed *pack = NULL;
@@ -416,13 +432,13 @@ int ddb_cons_add(struct ddb_cons *db,
uint64_t *val_ptr;
uint64_t *key_ptr;
valueid_t value_id;
- struct ddb_list *value_list;
+ struct ddb_deltalist *value_list;
if (!(key_ptr = ddb_map_insert_str(db->keys_map, key)))
return -1;
- if (!*key_ptr && !(*key_ptr = (uint64_t)ddb_list_new()))
+ if (!*key_ptr && !(*key_ptr = (uint64_t)ddb_deltalist_new()))
return -1;
- value_list = (struct ddb_list*)*key_ptr;
+ value_list = (struct ddb_deltalist*)*key_ptr;
if (value){
if (!(val_ptr = ddb_map_insert_str(db->values_map, value)))
@@ -8,6 +8,8 @@
#include <ddb_bits.h>
#include <ddb_delta.h>
+#define BUF_INCREMENT 1048576
+
static uint32_t bits_needed(uint32_t max)
{
uint32_t x = max;
@@ -28,8 +30,11 @@ static uint32_t allocate_bits(char **buf, uint64_t *buf_size,
/* + 8 is for write_bits and read_bits which may try to access
* at most 7 bytes out of array bounds */
if (len + 8 > *buf_size){
- *buf_size = len + 8;
- if (!(*buf = realloc(*buf, *buf_size)))
+ while (len + 8 > *buf_size)
+ *buf_size += BUF_INCREMENT;
+ free(*buf);
+ *buf = NULL;
+ if (!(*buf = malloc(*buf_size)))
return 0;
}
memset(*buf, 0, len + 8);
@@ -38,8 +43,8 @@ static uint32_t allocate_bits(char **buf, uint64_t *buf_size,
static int id_cmp(const void *p1, const void *p2)
{
- const uint64_t x = *(const uint64_t*)p1;
- const uint64_t y = *(const uint64_t*)p2;
+ const valueid_t x = *(const valueid_t*)p1;
+ const valueid_t y = *(const valueid_t*)p2;
if (x > y)
return 1;
@@ -70,31 +75,31 @@ void ddb_delta_cursor(struct ddb_delta_cursor *c, const char *src)
}
}
-int ddb_delta_encode(const struct ddb_list *values,
+int ddb_delta_encode(valueid_t *values,
+ uint32_t num_values,
char **buf,
uint64_t *buf_size,
uint64_t *size,
uint32_t *num_written,
int *duplicates,
int unique_values)
{
- uint32_t len, i, j = 0, bits = 0, prev = 0, max_diff = 0;
- uint64_t *list = ddb_list_pointer(values, &len);
+ uint32_t i, j = 0, bits = 0, prev = 0, max_diff = 0;
uint64_t offs = 0;
*duplicates = 0;
- if (len){
- qsort(list, len, 8, id_cmp);
+ if (num_values){
+ qsort(values, num_values, sizeof(valueid_t), id_cmp);
/* find maximum delta -> bits needed per id */
- max_diff = list[0];
- for (i = 1; i < len; i++){
- uint32_t d = list[i] - list[i - 1];
+ max_diff = values[0];
+ for (i = 1; i < num_values; i++){
+ uint32_t d = values[i] - values[i - 1];
if (d > max_diff)
max_diff = d;
}
bits = bits_needed(max_diff);
- if (!(allocate_bits(buf, buf_size, 32 + 5 + bits * len)))
+ if (!(allocate_bits(buf, buf_size, 32 + 5 + bits * num_values)))
return -1;
}else{
if (!(allocate_bits(buf, buf_size, 32)))
@@ -106,19 +111,19 @@ int ddb_delta_encode(const struct ddb_list *values,
delta-encoded values (bits * num_vals) ]
*/
offs = 32;
- if (len){
+ if (num_values){
write_bits(*buf, offs, bits - 1);
offs += 5;
- for (i = 0; i < len; i++){
- uint32_t d = list[i] - prev;
+ for (i = 0; i < num_values; i++){
+ uint32_t d = values[i] - prev;
if (!d && i){
if (unique_values)
continue;
else
*duplicates = 1;
}
write_bits(*buf, offs, d);
- prev = list[i];
+ prev = values[i];
offs += bits;
++j;
}
@@ -4,7 +4,7 @@
#include <stdint.h>
-#include <ddb_list.h>
+#include <ddb_types.h>
struct ddb_delta_cursor{
const char *deltas;
@@ -18,7 +18,8 @@ void ddb_delta_cursor_next(struct ddb_delta_cursor *c);
void ddb_delta_cursor(struct ddb_delta_cursor *c, const char *src);
-int ddb_delta_encode(const struct ddb_list *values,
+int ddb_delta_encode(valueid_t *values,
+ uint32_t num_values,
char **buf,
uint64_t *buf_size,
uint64_t *size,
Oops, something went wrong.

0 comments on commit 40786e1

Please sign in to comment.