Skip to content

Commit

Permalink
Defragger improvements around large bins (#12996)
Browse files Browse the repository at this point in the history
Implement #12963

## Changes
1. large bins don't have external fragmentation or are at least
non-defraggable, so we should ignore the effect of
large bins when measuring fragmentation, and only measure fragmentation
of small bins. this affects both the allocator_frag* metrics and also
the active-defrag trigger
2. Adding INFO metrics for `muzzy` memory, which is memory returned to
the OS but still shows as RSS until the OS reclaims it.

---------

Co-authored-by: Oran Agra <oran@redislabs.com>
  • Loading branch information
sundb and oranagra committed Feb 20, 2024
1 parent ca5cac9 commit f6785df
Show file tree
Hide file tree
Showing 6 changed files with 99 additions and 18 deletions.
16 changes: 10 additions & 6 deletions src/defrag.c
Original file line number Diff line number Diff line change
Expand Up @@ -780,17 +780,21 @@ void activeDefragKvstore(kvstore *kvs) {
* or not, a false detection can cause the defragmenter to waste a lot of CPU
* without the possibility of getting any results. */
float getAllocatorFragmentation(size_t *out_frag_bytes) {
size_t resident, active, allocated;
zmalloc_get_allocator_info(&allocated, &active, &resident);
float frag_pct = ((float)active / allocated)*100 - 100;
size_t frag_bytes = active - allocated;
size_t resident, active, allocated, frag_smallbins_bytes;
zmalloc_get_allocator_info(&allocated, &active, &resident, NULL, NULL, &frag_smallbins_bytes);

/* Calculate the fragmentation ratio as the proportion of wasted memory in small
* bins (which are defraggable) relative to the total allocated memory (including large bins).
* This is because otherwise, if most of the memory usage is large bins, we may show high percentage,
* despite the fact it's not a lot of memory for the user. */
float frag_pct = (float)frag_smallbins_bytes / allocated * 100;
float rss_pct = ((float)resident / allocated)*100 - 100;
size_t rss_bytes = resident - allocated;
if(out_frag_bytes)
*out_frag_bytes = frag_bytes;
*out_frag_bytes = frag_smallbins_bytes;
serverLog(LL_DEBUG,
"allocated=%zu, active=%zu, resident=%zu, frag=%.2f%% (%.2f%% rss), frag_bytes=%zu (%zu rss)",
allocated, active, resident, frag_pct, rss_pct, frag_bytes, rss_bytes);
allocated, active, resident, frag_pct, rss_pct, frag_smallbins_bytes, rss_bytes);
return frag_pct;
}

Expand Down
9 changes: 6 additions & 3 deletions src/object.c
Original file line number Diff line number Diff line change
Expand Up @@ -1184,9 +1184,9 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
mh->total_frag_bytes =
server.cron_malloc_stats.process_rss - server.cron_malloc_stats.zmalloc_used;
mh->allocator_frag =
(float)server.cron_malloc_stats.allocator_active / server.cron_malloc_stats.allocator_allocated;
(float)server.cron_malloc_stats.allocator_frag_smallbins_bytes / server.cron_malloc_stats.allocator_allocated + 1;
mh->allocator_frag_bytes =
server.cron_malloc_stats.allocator_active - server.cron_malloc_stats.allocator_allocated;
server.cron_malloc_stats.allocator_frag_smallbins_bytes;
mh->allocator_rss =
(float)server.cron_malloc_stats.allocator_resident / server.cron_malloc_stats.allocator_active;
mh->allocator_rss_bytes =
Expand Down Expand Up @@ -1556,7 +1556,7 @@ NULL
} else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
struct redisMemOverhead *mh = getMemoryOverheadData();

addReplyMapLen(c,27+mh->num_dbs);
addReplyMapLen(c,28+mh->num_dbs);

addReplyBulkCString(c,"peak.allocated");
addReplyLongLong(c,mh->peak_allocated);
Expand Down Expand Up @@ -1628,6 +1628,9 @@ NULL
addReplyBulkCString(c,"allocator.resident");
addReplyLongLong(c,server.cron_malloc_stats.allocator_resident);

addReplyBulkCString(c,"allocator.muzzy");
addReplyLongLong(c,server.cron_malloc_stats.allocator_muzzy);

addReplyBulkCString(c,"allocator-fragmentation.ratio");
addReplyDouble(c,mh->allocator_frag);

Expand Down
6 changes: 5 additions & 1 deletion src/server.c
Original file line number Diff line number Diff line change
Expand Up @@ -1228,7 +1228,10 @@ void cronUpdateMemoryStats(void) {
* allocations, and allocator reserved pages that can be pursed (all not actual frag) */
zmalloc_get_allocator_info(&server.cron_malloc_stats.allocator_allocated,
&server.cron_malloc_stats.allocator_active,
&server.cron_malloc_stats.allocator_resident);
&server.cron_malloc_stats.allocator_resident,
NULL,
&server.cron_malloc_stats.allocator_muzzy,
&server.cron_malloc_stats.allocator_frag_smallbins_bytes);
/* in case the allocator isn't providing these stats, fake them so that
* fragmentation info still shows some (inaccurate metrics) */
if (!server.cron_malloc_stats.allocator_resident) {
Expand Down Expand Up @@ -5643,6 +5646,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"allocator_allocated:%zu\r\n", server.cron_malloc_stats.allocator_allocated,
"allocator_active:%zu\r\n", server.cron_malloc_stats.allocator_active,
"allocator_resident:%zu\r\n", server.cron_malloc_stats.allocator_resident,
"allocator_muzzy:%zu\r\n", server.cron_malloc_stats.allocator_muzzy,
"total_system_memory:%lu\r\n", (unsigned long)total_system_mem,
"total_system_memory_human:%s\r\n", total_system_hmem,
"used_memory_lua:%lld\r\n", memory_lua, /* deprecated, renamed to used_memory_vm_eval */
Expand Down
2 changes: 2 additions & 0 deletions src/server.h
Original file line number Diff line number Diff line change
Expand Up @@ -1464,6 +1464,8 @@ struct malloc_stats {
size_t allocator_allocated;
size_t allocator_active;
size_t allocator_resident;
size_t allocator_muzzy;
size_t allocator_frag_smallbins_bytes;
};

/*-----------------------------------------------------------------------------
Expand Down
81 changes: 74 additions & 7 deletions src/zmalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -626,9 +626,54 @@ size_t zmalloc_get_rss(void) {

#if defined(USE_JEMALLOC)

int zmalloc_get_allocator_info(size_t *allocated,
size_t *active,
size_t *resident) {
#include <assert.h>

#define STRINGIFY_(x) #x
#define STRINGIFY(x) STRINGIFY_(x)

/* Compute the total memory wasted in fragmentation of inside small arena bins.
* Done by summing the memory in unused regs in all slabs of all small bins. */
size_t zmalloc_get_frag_smallbins(void) {
unsigned nbins;
size_t sz, frag = 0;
char buf[100];

sz = sizeof(unsigned);
assert(!je_mallctl("arenas.nbins", &nbins, &sz, NULL, 0));
for (unsigned j = 0; j < nbins; j++) {
size_t curregs, curslabs, reg_size;
uint32_t nregs;

/* The size of the current bin */
snprintf(buf, sizeof(buf), "arenas.bin.%d.size", j);
sz = sizeof(size_t);
assert(!je_mallctl(buf, &reg_size, &sz, NULL, 0));

/* Number of used regions in the bin */
snprintf(buf, sizeof(buf), "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".bins.%d.curregs", j);
sz = sizeof(size_t);
assert(!je_mallctl(buf, &curregs, &sz, NULL, 0));

/* Number of regions per slab */
snprintf(buf, sizeof(buf), "arenas.bin.%d.nregs", j);
sz = sizeof(uint32_t);
assert(!je_mallctl(buf, &nregs, &sz, NULL, 0));

/* Number of current slabs in the bin */
snprintf(buf, sizeof(buf), "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".bins.%d.curslabs", j);
sz = sizeof(size_t);
assert(!je_mallctl(buf, &curslabs, &sz, NULL, 0));

/* Calculate the fragmentation bytes for the current bin and add it to the total. */
frag += ((nregs * curslabs) - curregs) * reg_size;
}

return frag;
}

int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident,
size_t *retained, size_t *muzzy, size_t *frag_smallbins_bytes)
{
uint64_t epoch = 1;
size_t sz;
*allocated = *resident = *active = 0;
Expand All @@ -645,6 +690,26 @@ int zmalloc_get_allocator_info(size_t *allocated,
/* Unlike zmalloc_used_memory, this matches the stats.resident by taking
* into account all allocations done by this process (not only zmalloc). */
je_mallctl("stats.allocated", allocated, &sz, NULL, 0);

/* Retained memory is memory released by `madvised(..., MADV_DONTNEED)`, which is not part
* of RSS or mapped memory, and doesn't have a strong association with physical memory in the OS.
* It is still part of the VM-Size, and may be used again in later allocations. */
if (retained) {
*retained = 0;
je_mallctl("stats.retained", retained, &sz, NULL, 0);
}

/* Unlike retained, Muzzy representats memory released with `madvised(..., MADV_FREE)`.
* These pages will show as RSS for the process, until the OS decides to re-use them. */
if (muzzy) {
size_t pmuzzy, page;
assert(!je_mallctl("stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".pmuzzy", &pmuzzy, &sz, NULL, 0));
assert(!je_mallctl("arenas.page", &page, &sz, NULL, 0));
*muzzy = pmuzzy * page;
}

/* Total size of consumed meomry in unused regs in small bins (AKA external fragmentation). */
*frag_smallbins_bytes = zmalloc_get_frag_smallbins();
return 1;
}

Expand All @@ -670,10 +735,12 @@ int jemalloc_purge(void) {

#else

int zmalloc_get_allocator_info(size_t *allocated,
size_t *active,
size_t *resident) {
*allocated = *resident = *active = 0;
int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident,
size_t *retained, size_t *muzzy, size_t *frag_smallbins_bytes)
{
*allocated = *resident = *active = *frag_smallbins_bytes = 0;
if (retained) *retained = 0;
if (muzzy) *muzzy = 0;
return 1;
}

Expand Down
3 changes: 2 additions & 1 deletion src/zmalloc.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ __attribute__((malloc)) char *zstrdup(const char *s);
size_t zmalloc_used_memory(void);
void zmalloc_set_oom_handler(void (*oom_handler)(size_t));
size_t zmalloc_get_rss(void);
int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident);
int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident,
size_t *retained, size_t *muzzy, size_t *frag_smallbins_bytes);
void set_jemalloc_bg_thread(int enable);
int jemalloc_purge(void);
size_t zmalloc_get_private_dirty(long pid);
Expand Down

0 comments on commit f6785df

Please sign in to comment.