Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ malloc_cmp_test: clean
$(CC) $(CFLAGS) $(OPTIMIZE) $(EXE_CFLAGS) $(OS_FLAGS) -DMALLOC_PERF_TEST $(ISO_ALLOC_PRINTF_SRC) tests/tests.c -o $(BUILD_DIR)/malloc_tests
echo "Running IsoAlloc Performance Test"
build/tests
echo "Running glibc malloc Performance Test"
echo "Running system malloc Performance Test"
build/malloc_tests

## C++ Support - Build a debug version of the unit test
Expand Down
35 changes: 25 additions & 10 deletions PERFORMANCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,31 @@ The same test run on an AWS t2.xlarge Ubuntu 20.04 instance with 4 `Intel(R) Xeo
```
Running IsoAlloc Performance Test

iso_alloc/iso_free 1441616 tests completed in 0.418426 seconds
iso_calloc/iso_free 1441616 tests completed in 0.578068 seconds
iso_realloc/iso_free 1441616 tests completed in 0.681393 seconds
iso_alloc/iso_free 1441616 tests completed in 0.147336 seconds
iso_calloc/iso_free 1441616 tests completed in 0.161482 seconds
iso_realloc/iso_free 1441616 tests completed in 0.244981 seconds

Running glibc malloc Performance Test

malloc/free 1441616 tests completed in 0.352161 seconds
calloc/free 1441616 tests completed in 0.562425 seconds
realloc/free 1441616 tests completed in 0.590622 seconds
malloc/free 1441616 tests completed in 0.182437 seconds
calloc/free 1441616 tests completed in 0.246065 seconds
realloc/free 1441616 tests completed in 0.332292 seconds
```

Here is the same test as above on Mac OS 11.6

```
Running IsoAlloc Performance Test

iso_alloc/iso_free 1441616 tests completed in 0.124150 seconds
iso_calloc/iso_free 1441616 tests completed in 0.182955 seconds
iso_realloc/iso_free 1441616 tests completed in 0.275084 seconds

Running system malloc Performance Test

malloc/free 1441616 tests completed in 0.090845 seconds
calloc/free 1441616 tests completed in 0.200397 seconds
realloc/free 1441616 tests completed in 0.254574 seconds
```

This same test can be used with the `perf` utility to measure basic stats like page faults and CPU utilization using both heap implementations. The output below is on the same AWS t2.xlarge instance as above.
Expand Down Expand Up @@ -163,10 +178,10 @@ cache-thrashN mimalloc 00.36 3356 1.44 0.00 0 229
cache-thrashN tcmalloc 01.87 6880 7.42 0.00 0 1138
cache-thrashN jemalloc 00.37 3760 1.46 0.00 0 296

redis isoalloc 9.335 71048 4.35 0.36 0 19326 ops/sec: 214227.92
redis mimalloc 4.611 28932 2.13 0.20 4 6657 ops/sec: 433692.97
redis tcmalloc 5.055 37088 2.37 0.19 3 8444 ops/sec: 395588.59
redis jemalloc 5.150 30964 2.42 0.19 5 7024 ops/sec: 388279.50
redis isoalloc 8.669 76240 4.07 0.30 1 21473 ops/sec: 230702.66, relative time: 8.669s
redis mimalloc 4.555 28968 2.13 0.17 4 6655 ops/sec: 439023.69, relative time: 4.555s
redis tcmalloc 4.715 37120 2.21 0.17 3 8446 ops/sec: 424108.56, relative time: 4.715s
redis jemalloc 5.125 30836 2.41 0.17 0 7034 ops/sec: 390174.03, relative time: 5.125s
```

IsoAlloc isn't quite ready for performance sensitive server workloads but it's more than fast enough for client side mobile/desktop applications with risky C/C++ attack surface.
8 changes: 6 additions & 2 deletions include/iso_alloc_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,8 @@ using namespace std;
* create. This is a completely arbitrary number but
* it does correspond to the size of the _root.zones
* array that lives in global memory. Currently the
* iso_alloc_zone structure is roughly 1088 bytes so
* this allocates 8912896 bytes (~8.5 MB) for _root */
* iso_alloc_zone structure is roughly 1090 bytes so
* this allocates 8929280 bytes (~8.9 MB) for _root */
#define MAX_ZONES 8192

/* Each user allocation zone we make is 4mb in size.
Expand All @@ -296,6 +296,8 @@ using namespace std;
#define BIG_ZONE_USER_PAGE_COUNT 2
#define BIG_ZONE_USER_PAGE_COUNT_SHIFT 1

#define ZONE_LOOKUP_TABLE_SZ ((SMALL_SZ_MAX+1) * sizeof(uint16_t))

/* We allocate zones at startup for common sizes.
* Each of these default zones is ZONE_USER_SIZE bytes
* so ZONE_8192 holds less chunks than ZONE_128 for
Expand Down Expand Up @@ -414,6 +416,7 @@ static uint64_t default_zones[] = {ZONE_512, ZONE_512, ZONE_512, ZONE_1024};

typedef uint64_t bit_slot_t;
typedef int64_t bitmap_index_t;
typedef uint16_t zone_lookup_table_t;

typedef struct {
void *user_pages_start; /* Start of the pages backing this zone */
Expand All @@ -430,6 +433,7 @@ typedef struct {
bool internally_managed; /* Zones can be managed by iso_alloc or custom */
bool is_full; /* Indicates whether this zone is full to avoid expensive free bit slot searches */
uint16_t index; /* Zone index */
uint16_t next_sz_index; /* What is the index of the next zone of this size */
#if CPU_PIN
uint8_t cpu_core; /* What CPU core this zone is pinned to */
#endif
Expand Down
2 changes: 2 additions & 0 deletions misc/commands.gdb
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ i r
x/i $pc
thread apply all bt
thread apply all info locals
p *_root
p _zone_lookup_table
98 changes: 90 additions & 8 deletions src/iso_alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ uint32_t g_page_size;
uint32_t _default_zone_count;
iso_alloc_root *_root;

/* Zones are linked by their next_sz_index member which
* tells the allocator where in the _root->zones array
* it can find the next zone that holds the same size
* chunks. The lookup table helps us find the first zone
* that holds a specific size in O(1) time */
static zone_lookup_table_t *_zone_lookup_table;

#if NO_ZERO_ALLOCATIONS
void *_zero_alloc_page;
#endif
Expand Down Expand Up @@ -363,6 +370,10 @@ INTERNAL_HIDDEN void iso_alloc_initialize_global_root(void) {
LOG_AND_ABORT("Could not initialize global root");
}

/* We mlock the root or every allocation would
* result in a soft page fault */
mlock(&_root, sizeof(iso_alloc_root));

_default_zone_count = sizeof(default_zones) >> 3;

_root->zones_size = (MAX_ZONES * sizeof(iso_alloc_zone));
Expand All @@ -377,18 +388,17 @@ INTERNAL_HIDDEN void iso_alloc_initialize_global_root(void) {
_root->zones = (void *) (p + g_page_size);
name_mapping(p, _root->zones_size, "isoalloc zone metadata");

/* If we don't lock the zone lookup table we will incur a
* soft page fault with almost every allocation */
_zone_lookup_table = mmap_rw_pages(ZONE_LOOKUP_TABLE_SZ, true, NULL);
mlock(&_zone_lookup_table, ZONE_LOOKUP_TABLE_SZ);

for(int64_t i = 0; i < _default_zone_count; i++) {
if((_iso_new_zone(default_zones[i], true)) == NULL) {
LOG_AND_ABORT("Failed to create a new zone");
}
}

/* This call to mlock may fail if memory limits
* are set too low. This will not affect us
* at runtime. It just means some of the default
* root meta data may get swapped to disk */
mlock(&_root, sizeof(iso_alloc_root));

_root->zone_handle_mask = rand_uint64();
_root->big_zone_next_mask = rand_uint64();
_root->big_zone_canary_secret = rand_uint64();
Expand Down Expand Up @@ -574,6 +584,8 @@ __attribute__((destructor(LAST_DTOR))) void iso_alloc_dtor(void) {
munmap(_root, sizeof(iso_alloc_root));
#endif

munmap(_zone_lookup_table, ZONE_LOOKUP_TABLE_SZ);

UNLOCK_ROOT();
}

Expand All @@ -600,9 +612,10 @@ INTERNAL_HIDDEN iso_alloc_zone *iso_new_zone(size_t size, bool internal) {
return zone;
}

/* Requires the root is locked */
INTERNAL_HIDDEN iso_alloc_zone *_iso_new_zone(size_t size, bool internal) {
if(_root->zones_used >= MAX_ZONES) {
LOG_AND_ABORT("Cannot allocate additional zones");
LOG_AND_ABORT("Cannot allocate additional zones. I have already allocated %d", _root->zones_used);
}

if(size > SMALL_SZ_MAX) {
Expand Down Expand Up @@ -691,6 +704,36 @@ INTERNAL_HIDDEN iso_alloc_zone *_iso_new_zone(size_t size, bool internal) {
POISON_ZONE(new_zone);
MASK_ZONE_PTRS(new_zone);

/* The lookup table is never used for custom zones */
if(internal == true) {
/* If no other zones of this size exist then set the
* index in the zone lookup table to its index */
if(_zone_lookup_table[size] == 0) {
_zone_lookup_table[size] = _root->zones_used;
} else {
/* Other zones exist that hold this size. We need to
* fixup the most recent ones next_sz_index member.
* We do this by walking the list using next_sz_index */
for(int32_t i = _zone_lookup_table[size]; i < _root->zones_used;) {
iso_alloc_zone *zt = &_root->zones[i];

if(zt->chunk_size != size) {
LOG_AND_ABORT("Inconsistent lookup table for zone[%d] chunk size %d (%d)", zt->index, zt->chunk_size, size);
}

/* Follow this zone's next_sz_index member */
if(zt->next_sz_index != 0) {
i = zt->next_sz_index;
} else {
/* If this zones next_sz_index is zero then set
* it to the zone we just created and break */
zt->next_sz_index = new_zone->index;
break;
}
}
}
}

_root->zones_used++;

return new_zone;
Expand Down Expand Up @@ -831,7 +874,46 @@ INTERNAL_HIDDEN iso_alloc_zone *iso_find_zone_fit(size_t size) {
iso_alloc_zone *zone = NULL;
int32_t i = 0;

#if !SMALL_MEM_STARTUP
if(IS_ALIGNED(size) != 0) {
size = ALIGN_SZ_UP(size);
}

/* Fast path via lookup table */
if(_zone_lookup_table[size] != 0) {
i = _zone_lookup_table[size];

for(; i < _root->zones_used;) {
zone = &_root->zones[i];

if(zone->chunk_size != size) {
LOG_AND_ABORT("Zone lookup table failed to match sizes for zone[%d](%d) for chunk size (%d)", zone->index, zone->chunk_size, size);
}

if(zone->internally_managed == false) {
LOG_AND_ABORT("Lookup table should never contain custom zones");
}

bool fits = iso_does_zone_fit(zone, size);

if(fits == true) {
return zone;
}

if(zone->next_sz_index != 0) {
i = zone->next_sz_index;
} else {
/* We have reached the end of our linked zones. The
* lookup table failed to find us a usable zone.
* Instead of creating a new one we will break out
* of this loop and try iterating through all zones,
* including ones we may have skipped over, to find
* a suitable candidate. */
break;
}
}
}

#if SMALL_MEM_STARTUP
/* A simple optimization to find which default zone
* should fit this allocation. If we fail then a
* slower iterative approach is used. The longer a
Expand Down
2 changes: 1 addition & 1 deletion src/iso_alloc_profiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ INTERNAL_HIDDEN uint64_t __iso_alloc_mem_usage() {
iso_alloc_zone *zone = &_root->zones[i];
mem_usage += zone->bitmap_size;
mem_usage += ZONE_USER_SIZE;
LOG("Zone[%d] holds %d byte chunks, megabytes (%d)", zone->index, zone->chunk_size, (ZONE_USER_SIZE / MEGABYTE_SIZE));
LOG("Zone[%d] holds %d byte chunks, megabytes (%d) next zone = %d", zone->index, zone->chunk_size, (ZONE_USER_SIZE / MEGABYTE_SIZE), zone->next_sz_index);
}

return (mem_usage / MEGABYTE_SIZE);
Expand Down