Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gh-112532: Improve mimalloc page visiting #114133

Merged
merged 1 commit into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Include/internal/mimalloc/mimalloc/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t*
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
void _mi_segment_thread_collect(mi_segments_tld_t* tld);
bool _mi_abandoned_pool_visit_blocks(mi_abandoned_pool_t* pool, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg);


#if MI_HUGE_PAGE_ABANDON
void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
Expand Down Expand Up @@ -161,6 +163,8 @@ void _mi_heap_collect_abandon(mi_heap_t* heap);
void _mi_heap_set_default_direct(mi_heap_t* heap);
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid);
void _mi_heap_unsafe_destroy_all(void);
void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page);
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_block_visit_fun* visitor, void* arg);

// "stats.c"
void _mi_stats_done(mi_stats_t* stats);
Expand Down
114 changes: 80 additions & 34 deletions Objects/mimalloc/heap.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ typedef bool (heap_page_visitor_fun)(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
// Visit all pages in a heap; returns `false` if break was called.
static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void* arg1, void* arg2)
{
if (heap==NULL || heap->page_count==0) return 0;
if (heap==NULL || heap->page_count==0) return true;

// visit all pages
#if MI_DEBUG>1
Expand Down Expand Up @@ -521,11 +521,20 @@ typedef struct mi_heap_area_ex_s {
mi_page_t* page;
} mi_heap_area_ex_t;

static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_visit_fun* visitor, void* arg) {
mi_assert(xarea != NULL);
if (xarea==NULL) return true;
const mi_heap_area_t* area = &xarea->area;
mi_page_t* page = xarea->page;
static void mi_fast_divisor(size_t divisor, size_t* magic, size_t* shift) {
mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
*shift = MI_INTPTR_BITS - mi_clz(divisor - 1);
*magic = (size_t)(((1ULL << 32) * ((1ULL << *shift) - divisor)) / divisor + 1);
}

static size_t mi_fast_divide(size_t n, size_t magic, size_t shift) {
mi_assert_internal(n <= UINT32_MAX);
return ((((uint64_t) n * magic) >> 32) + n) >> shift;
}

bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_block_visit_fun* visitor, void* arg) {
mi_assert(area != NULL);
if (area==NULL) return true;
mi_assert(page != NULL);
if (page == NULL) return true;

Expand All @@ -537,17 +546,39 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
const size_t ubsize = mi_page_usable_block_size(page); // without padding
size_t psize;
uint8_t* pstart = _mi_page_start(_mi_page_segment(page), page, &psize);
mi_heap_t* heap = mi_page_heap(page);

if (page->capacity == 1) {
// optimize page with one block
mi_assert_internal(page->used == 1 && page->free == NULL);
return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
return visitor(heap, area, pstart, ubsize, arg);
}

if (page->used == page->capacity) {
// optimize full pages
uint8_t* block = pstart;
for (size_t i = 0; i < page->capacity; i++) {
if (!visitor(heap, area, block, ubsize, arg)) return false;
block += bsize;
}
return true;
}

// create a bitmap of free blocks.
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)];
memset(free_map, 0, sizeof(free_map));
uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS];
size_t bmapsize = (page->capacity + MI_INTPTR_BITS - 1) / MI_INTPTR_BITS;
memset(free_map, 0, bmapsize * sizeof(uintptr_t));

if (page->capacity % MI_INTPTR_BITS != 0) {
size_t shift = (page->capacity % MI_INTPTR_BITS);
uintptr_t mask = (UINTPTR_MAX << shift);
free_map[bmapsize-1] = mask;
}

// fast repeated division by the block size
size_t magic, shift;
mi_fast_divisor(bsize, &magic, &shift);

#if MI_DEBUG>1
size_t free_count = 0;
Expand All @@ -559,10 +590,11 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
size_t offset = (uint8_t*)block - pstart;
mi_assert_internal(offset % bsize == 0);
size_t blockidx = offset / bsize; // Todo: avoid division?
mi_assert_internal( blockidx < MI_MAX_BLOCKS);
size_t bitidx = (blockidx / sizeof(uintptr_t));
size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
size_t blockidx = mi_fast_divide(offset, magic, shift);
mi_assert_internal(blockidx == offset / bsize);
mi_assert_internal(blockidx < MI_MAX_BLOCKS);
size_t bitidx = (blockidx / MI_INTPTR_BITS);
size_t bit = blockidx - (bitidx * MI_INTPTR_BITS);
free_map[bitidx] |= ((uintptr_t)1 << bit);
}
mi_assert_internal(page->capacity == (free_count + page->used));
Expand All @@ -571,19 +603,29 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v
#if MI_DEBUG>1
size_t used_count = 0;
#endif
for (size_t i = 0; i < page->capacity; i++) {
size_t bitidx = (i / sizeof(uintptr_t));
size_t bit = i - (bitidx * sizeof(uintptr_t));
uintptr_t m = free_map[bitidx];
if (bit == 0 && m == UINTPTR_MAX) {
i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
uint8_t* block = pstart;
for (size_t i = 0; i < bmapsize; i++) {
if (free_map[i] == 0) {
// every block is in use
for (size_t j = 0; j < MI_INTPTR_BITS; j++) {
#if MI_DEBUG>1
used_count++;
#endif
if (!visitor(heap, area, block, ubsize, arg)) return false;
block += bsize;
}
}
else if ((m & ((uintptr_t)1 << bit)) == 0) {
#if MI_DEBUG>1
used_count++;
#endif
uint8_t* block = pstart + (i * bsize);
if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
else {
uintptr_t m = ~free_map[i];
while (m) {
#if MI_DEBUG>1
used_count++;
#endif
size_t bitidx = mi_ctz(m);
if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false;
m &= m - 1;
}
block += bsize * MI_INTPTR_BITS;
}
}
mi_assert_internal(page->used == used_count);
Expand All @@ -592,21 +634,24 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v

typedef bool (mi_heap_area_visit_fun)(const mi_heap_t* heap, const mi_heap_area_ex_t* area, void* arg);

void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page);
area->reserved = page->reserved * bsize;
area->committed = page->capacity * bsize;
area->blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
area->used = page->used; // number of blocks in use (#553)
area->block_size = ubsize;
area->full_block_size = bsize;
}

static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t* page, void* vfun, void* arg) {
MI_UNUSED(heap);
MI_UNUSED(pq);
mi_heap_area_visit_fun* fun = (mi_heap_area_visit_fun*)vfun;
mi_heap_area_ex_t xarea;
const size_t bsize = mi_page_block_size(page);
const size_t ubsize = mi_page_usable_block_size(page);
xarea.page = page;
xarea.area.reserved = page->reserved * bsize;
xarea.area.committed = page->capacity * bsize;
xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
xarea.area.used = page->used; // number of blocks in use (#553)
xarea.area.block_size = ubsize;
xarea.area.full_block_size = bsize;
_mi_heap_area_init(&xarea.area, page);
return fun(heap, &xarea, arg);
}

Expand All @@ -627,7 +672,7 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t*
mi_visit_blocks_args_t* args = (mi_visit_blocks_args_t*)arg;
if (!args->visitor(heap, &xarea->area, NULL, xarea->area.block_size, args->arg)) return false;
if (args->visit_blocks) {
return mi_heap_area_visit_blocks(xarea, args->visitor, args->arg);
return _mi_heap_area_visit_blocks(&xarea->area, xarea->page, args->visitor, args->arg);
}
else {
return true;
Expand All @@ -637,5 +682,6 @@ static bool mi_heap_area_visitor(const mi_heap_t* heap, const mi_heap_area_ex_t*
// Visit all blocks in a heap
bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
mi_visit_blocks_args_t args = { visit_blocks, visitor, arg };
_mi_heap_delayed_free_partial((mi_heap_t *)heap);
return mi_heap_visit_areas(heap, &mi_heap_area_visitor, &args);
}
50 changes: 50 additions & 0 deletions Objects/mimalloc/segment.c
Original file line number Diff line number Diff line change
Expand Up @@ -1614,3 +1614,53 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t pag
mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
return page;
}

/* -----------------------------------------------------------
Visit blocks in abandoned segments
----------------------------------------------------------- */

static bool mi_segment_visit_page(mi_segment_t* segment, mi_page_t* page, bool visit_blocks, mi_block_visit_fun* visitor, void* arg)
{
mi_heap_area_t area;
_mi_heap_area_init(&area, page);
if (!visitor(NULL, &area, NULL, area.block_size, arg)) return false;
if (visit_blocks) {
return _mi_heap_area_visit_blocks(&area, page, visitor, arg);
}
else {
return true;
}
}

static bool mi_segment_visit_pages(mi_segment_t* segment, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
const mi_slice_t* end;
mi_slice_t* slice = mi_slices_start_iterate(segment, &end);
while (slice < end) {
if (mi_slice_is_used(slice)) {
mi_page_t* const page = mi_slice_to_page(slice);
if (page->tag == page_tag) {
if (!mi_segment_visit_page(segment, page, visit_blocks, visitor, arg)) return false;
}
}
slice = slice + slice->slice_count;
}
return true;
}

// Visit all blocks in a abandoned segments
bool _mi_abandoned_pool_visit_blocks(mi_abandoned_pool_t* pool, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This (and therefore the previous 2 functions) doesn't seem to be used anywhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These will be used in the upcoming GC PR. Here is an example usage:

https://github.com/colesbury/cpython/blob/8314c7c1d9d9670d4a83b9dc12f23611493c8eaa/Python/gc_free_threading.c#L226-L227

I put them in this PR because:

  1. Keeping the mimalloc changes separate makes them a bit easier to track and upstream
  2. The GC PR will be big and doing this first makes the upcoming PR a bit smaller

// Note: this is not safe in any other thread is abandoning or claiming segments from the pool
mi_segment_t* segment = mi_tagged_segment_ptr(pool->abandoned);
while (segment != NULL) {
if (!mi_segment_visit_pages(segment, page_tag, visit_blocks, visitor, arg)) return false;
segment = segment->abandoned_next;
}

segment = pool->abandoned_visited;
while (segment != NULL) {
if (!mi_segment_visit_pages(segment, page_tag, visit_blocks, visitor, arg)) return false;
segment = segment->abandoned_next;
}

return true;
}
Loading