Skip to content

Commit

Permalink
Listpack encoding for sets (#11290)
Browse files Browse the repository at this point in the history
Small sets with not only integer elements are listpack encoded, by default
up to 128 elements, max 64 bytes per element, new config `set-max-listpack-entries`
and `set-max-listpack-value`. This saves memory for small sets compared to using a hashtable.

Sets with only integers, even very small sets, are still intset encoded (up to 1G
limit, etc.). Larger sets are hashtable encoded.

This PR increments the RDB version, and has an effect on OBJECT ENCODING

Possible conversions when elements are added:

    intset -> listpack
    listpack -> hashtable
    intset -> hashtable

Note: No conversion happens when elements are deleted. If all elements are
deleted and then added again, the set is deleted and recreated, thus implicitly
converted to a smaller encoding.
  • Loading branch information
zuiderkwast committed Nov 9, 2022
1 parent 07d1870 commit 4e472a1
Show file tree
Hide file tree
Showing 19 changed files with 1,132 additions and 344 deletions.
9 changes: 8 additions & 1 deletion redis.conf
Original file line number Diff line number Diff line change
Expand Up @@ -1951,13 +1951,20 @@ list-max-listpack-size -2
# etc.
list-compress-depth 0

# Sets have a special encoding in just one case: when a set is composed
# Sets have a special encoding when a set is composed
# of just strings that happen to be integers in radix 10 in the range
# of 64 bit signed integers.
# The following configuration setting sets the limit in the size of the
# set in order to use this special memory saving encoding.
set-max-intset-entries 512

# Sets containing non-integer values are also encoded using a memory efficient
# data structure when they have a small number of entries, and the biggest entry
# does not exceed a given threshold. These thresholds can be configured using
# the following directives.
set-max-listpack-entries 128
set-max-listpack-value 64

# Similarly to hashes and lists, sorted sets are also specially encoded in
# order to save a lot of space. This encoding is only used when the length and
# elements of a sorted set are below the following limits:
Expand Down
67 changes: 21 additions & 46 deletions src/aof.c
Original file line number Diff line number Diff line change
Expand Up @@ -1818,56 +1818,31 @@ int rewriteListObject(rio *r, robj *key, robj *o) {
* The function returns 0 on error, 1 on success. */
int rewriteSetObject(rio *r, robj *key, robj *o) {
long long count = 0, items = setTypeSize(o);

if (o->encoding == OBJ_ENCODING_INTSET) {
int ii = 0;
int64_t llval;

while(intsetGet(o->ptr,ii++,&llval)) {
if (count == 0) {
int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
AOF_REWRITE_ITEMS_PER_CMD : items;

if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
!rioWriteBulkString(r,"SADD",4) ||
!rioWriteBulkObject(r,key))
{
return 0;
}
setTypeIterator *si = setTypeInitIterator(o);
char *str;
size_t len;
int64_t llval;
while (setTypeNext(si, &str, &len, &llval) != -1) {
if (count == 0) {
int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
AOF_REWRITE_ITEMS_PER_CMD : items;
if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
!rioWriteBulkString(r,"SADD",4) ||
!rioWriteBulkObject(r,key))
{
return 0;
}
if (!rioWriteBulkLongLong(r,llval)) return 0;
if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
} else if (o->encoding == OBJ_ENCODING_HT) {
dictIterator *di = dictGetIterator(o->ptr);
dictEntry *de;

while((de = dictNext(di)) != NULL) {
sds ele = dictGetKey(de);
if (count == 0) {
int cmd_items = (items > AOF_REWRITE_ITEMS_PER_CMD) ?
AOF_REWRITE_ITEMS_PER_CMD : items;

if (!rioWriteBulkCount(r,'*',2+cmd_items) ||
!rioWriteBulkString(r,"SADD",4) ||
!rioWriteBulkObject(r,key))
{
dictReleaseIterator(di);
return 0;
}
}
if (!rioWriteBulkString(r,ele,sdslen(ele))) {
dictReleaseIterator(di);
return 0;
}
if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
size_t written = str ?
rioWriteBulkString(r, str, len) : rioWriteBulkLongLong(r, llval);
if (!written) {
setTypeReleaseIterator(si);
return 0;
}
dictReleaseIterator(di);
} else {
serverPanic("Unknown set encoding");
if (++count == AOF_REWRITE_ITEMS_PER_CMD) count = 0;
items--;
}
setTypeReleaseIterator(si);
return 1;
}

Expand Down
2 changes: 2 additions & 0 deletions src/config.c
Original file line number Diff line number Diff line change
Expand Up @@ -3130,6 +3130,8 @@ standardConfig static_configs[] = {
/* Size_t configs */
createSizeTConfig("hash-max-listpack-entries", "hash-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_entries, 512, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("set-max-intset-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_intset_entries, 512, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("set-max-listpack-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("set-max-listpack-value", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.set_max_listpack_value, 64, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("zset-max-listpack-entries", "zset-max-ziplist-entries", MODIFIABLE_CONFIG, 0, LONG_MAX, server.zset_max_listpack_entries, 128, INTEGER_CONFIG, NULL, NULL),
createSizeTConfig("active-defrag-ignore-bytes", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.active_defrag_ignore_bytes, 100<<20, MEMORY_CONFIG, NULL, NULL), /* Default: don't defrag if frag overhead is below 100mb */
createSizeTConfig("hash-max-listpack-value", "hash-max-ziplist-value", MODIFIABLE_CONFIG, 0, LONG_MAX, server.hash_max_listpack_value, 64, MEMORY_CONFIG, NULL, NULL),
Expand Down
6 changes: 4 additions & 2 deletions src/db.c
Original file line number Diff line number Diff line change
Expand Up @@ -915,14 +915,16 @@ void scanGenericCommand(client *c, robj *o, unsigned long cursor) {
} while (cursor &&
maxiterations-- &&
listLength(keys) < (unsigned long)count);
} else if (o->type == OBJ_SET) {
} else if (o->type == OBJ_SET && o->encoding == OBJ_ENCODING_INTSET) {
int pos = 0;
int64_t ll;

while(intsetGet(o->ptr,pos++,&ll))
listAddNodeTail(keys,createStringObjectFromLongLong(ll));
cursor = 0;
} else if (o->type == OBJ_HASH || o->type == OBJ_ZSET) {
} else if ((o->type == OBJ_HASH || o->type == OBJ_ZSET || o->type == OBJ_SET) &&
o->encoding == OBJ_ENCODING_LISTPACK)
{
unsigned char *p = lpFirst(o->ptr);
unsigned char *vstr;
int64_t vlen;
Expand Down
10 changes: 6 additions & 4 deletions src/defrag.c
Original file line number Diff line number Diff line change
Expand Up @@ -874,10 +874,12 @@ long defragKey(redisDb *db, dictEntry *de) {
} else if (ob->type == OBJ_SET) {
if (ob->encoding == OBJ_ENCODING_HT) {
defragged += defragSet(db, de);
} else if (ob->encoding == OBJ_ENCODING_INTSET) {
intset *newis, *is = ob->ptr;
if ((newis = activeDefragAlloc(is)))
defragged++, ob->ptr = newis;
} else if (ob->encoding == OBJ_ENCODING_INTSET ||
ob->encoding == OBJ_ENCODING_LISTPACK)
{
void *newptr, *ptr = ob->ptr;
if ((newptr = activeDefragAlloc(ptr)))
defragged++, ob->ptr = newptr;
} else {
serverPanic("Unknown set encoding");
}
Expand Down
13 changes: 13 additions & 0 deletions src/intset.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,17 @@ int64_t intsetRandom(intset *is) {
return _intsetGet(is,rand()%len);
}

/* Return the largest member. */
int64_t intsetMax(intset *is) {
uint32_t len = intrev32ifbe(is->length);
return _intsetGet(is, len - 1);
}

/* Return the smallest member. */
int64_t intsetMin(intset *is) {
return _intsetGet(is, 0);
}

/* Get the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
Expand Down Expand Up @@ -425,6 +436,8 @@ int intsetTest(int argc, char **argv, int flags) {
is = intsetAdd(is,6,&success); assert(success);
is = intsetAdd(is,4,&success); assert(success);
is = intsetAdd(is,4,&success); assert(!success);
assert(6 == intsetMax(is));
assert(4 == intsetMin(is));
ok();
zfree(is);
}
Expand Down
2 changes: 2 additions & 0 deletions src/intset.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
intset *intsetRemove(intset *is, int64_t value, int *success);
uint8_t intsetFind(intset *is, int64_t value);
int64_t intsetRandom(intset *is);
int64_t intsetMax(intset *is);
int64_t intsetMin(intset *is);
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
uint32_t intsetLen(const intset *is);
size_t intsetBlobLen(intset *is);
Expand Down

0 comments on commit 4e472a1

Please sign in to comment.