From 74194f3089da9c0b727bc63274c7cbfbc69d63a2 Mon Sep 17 00:00:00 2001 From: Nia Waldvogel Date: Sat, 29 Nov 2025 19:58:03 -0500 Subject: [PATCH 1/2] runtime (gc_blocks.go): use a linked stack to scan marked objects The blocks GC originally used a fixed-size stack to hold objects to scan. When this stack overflowed, the GC would fully rescan all marked objects. This could cause the GC to degrade to O(n^2) when scanning large linked data structures. Instead of using a fixed-size stack, we now add a pointer field to the start of each object. This pointer field is used to implement an unbounded linked stack. This also consolidates the heap object scanning into one place, which simplifies the process. This comes at the cost of introducing a pointer field to the start of the object, plus the cost of aligning the result. This translates to: - 16 bytes of overhead on x86/arm64 with the conservative collector - 0 bytes of overhead on x86/arm64 with the precise collector (the layout field cost gets aligned up to 16 bytes anyway) - 8 bytes of overhead on other 64-bit systems - 4 bytes of overhead on 32-bit systems - 2 bytes of overhead on AVR --- builder/sizes_test.go | 6 +- src/runtime/gc_blocks.go | 153 +++++++++++++++------------------ src/runtime/gc_conservative.go | 18 +++- src/runtime/gc_precise.go | 31 ++++--- 4 files changed, 105 insertions(+), 103 deletions(-) diff --git a/builder/sizes_test.go b/builder/sizes_test.go index c1d44f443d..7349d5757c 100644 --- a/builder/sizes_test.go +++ b/builder/sizes_test.go @@ -42,9 +42,9 @@ func TestBinarySize(t *testing.T) { // This is a small number of very diverse targets that we want to test. tests := []sizeTest{ // microcontrollers - {"hifive1b", "examples/echo", 3896, 280, 0, 2268}, - {"microbit", "examples/serial", 2860, 360, 8, 2272}, - {"wioterminal", "examples/pininterrupt", 7361, 1491, 116, 6912}, + {"hifive1b", "examples/echo", 3756, 280, 0, 2268}, + {"microbit", "examples/serial", 2756, 340, 8, 2272}, + {"wioterminal", "examples/pininterrupt", 7297, 1491, 116, 6912}, // TODO: also check wasm. Right now this is difficult, because // wasm binaries are run through wasm-opt and therefore the diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go index 408656b258..2ee7c9d2f8 100644 --- a/src/runtime/gc_blocks.go +++ b/src/runtime/gc_blocks.go @@ -46,11 +46,11 @@ const ( bytesPerBlock = wordsPerBlock * unsafe.Sizeof(heapStart) stateBits = 2 // how many bits a block state takes (see blockState type) blocksPerStateByte = 8 / stateBits - markStackSize = 8 * unsafe.Sizeof((*int)(nil)) // number of to-be-marked blocks to queue before forcing a rescan ) var ( metadataStart unsafe.Pointer // pointer to the start of the heap metadata + scanList *objHeader // scanList is a singly linked list of heap objects that have been marked but not scanned nextAlloc gcBlock // the next block that should be tried by the allocator endBlock gcBlock // the block just past the end of the available space gcTotalAlloc uint64 // total number of bytes allocated @@ -225,6 +225,15 @@ func (b gcBlock) unmark() { } } +// objHeader is a structure prepended to every heap object to hold metadata. +type objHeader struct { + // next is the next object to scan after this. + next *objHeader + + // layout holds the layout bitmap used to find pointers in the object. + layout gcLayout +} + func isOnHeap(ptr uintptr) bool { return ptr >= heapStart && ptr < uintptr(metadataStart) } @@ -315,13 +324,10 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { runtimePanicAt(returnAddress(0), "heap alloc in interrupt") } - // Round the size up to a multiple of blocks. + // Round the size up to a multiple of blocks, adding space for the header. rawSize := size + size += align(unsafe.Sizeof(objHeader{})) size += bytesPerBlock - 1 - if preciseHeap { - // Add space for the layout. - size += align(unsafe.Sizeof(layout)) - } if size < rawSize { // The size overflowed. runtimePanicAt(returnAddress(0), "out of memory") @@ -414,20 +420,18 @@ func alloc(size uintptr, layout unsafe.Pointer) unsafe.Pointer { i.setState(blockStateTail) } + // Create the object header. + pointer := thisAlloc.pointer() + header := (*objHeader)(pointer) + header.layout = parseGCLayout(layout) + // We've claimed this allocation, now we can unlock the heap. gcLock.Unlock() // Return a pointer to this allocation. - pointer := thisAlloc.pointer() - if preciseHeap { - // Store the object layout at the start of the object. - // TODO: this wastes a little bit of space on systems with - // larger-than-pointer alignment requirements. - *(*unsafe.Pointer)(pointer) = layout - add := align(unsafe.Sizeof(layout)) - pointer = unsafe.Add(pointer, add) - size -= add - } + add := align(unsafe.Sizeof(objHeader{})) + pointer = unsafe.Add(pointer, add) + size -= add memzero(pointer, size) return pointer } @@ -562,42 +566,33 @@ func markCurrentGoroutineStack(sp uintptr) { markRoot(0, sp) } -// stackOverflow is a flag which is set when the GC scans too deep while marking. -// After it is set, all marked allocations must be re-scanned. -var stackOverflow bool - -// startMark starts the marking process on a root and all of its children. -func startMark(root gcBlock) { - var stack [markStackSize]gcBlock - stack[0] = root - root.setState(blockStateMark) - stackLen := 1 - for stackLen > 0 { - // Pop a block off of the stack. - stackLen-- - block := stack[stackLen] - if gcDebug { - println("stack popped, remaining stack:", stackLen) +// finishMark finishes the marking process by scanning all heap objects on scanList. +func finishMark() { + for { + // Remove an object from the scan list. + obj := scanList + if obj == nil { + return } + scanList = obj.next - // Scan all pointers inside the block. - scanner := newGCObjectScanner(block) + // Create a scanner with the object layout. + scanner := obj.layout.scanner() if scanner.pointerFree() { // This object doesn't contain any pointers. // This is a fast path for objects like make([]int, 4096). continue } - start, end := block.address(), block.findNext().address() - if preciseHeap { - // The first word of the object is just the pointer layout value. - // Skip it. - start += align(unsafe.Sizeof(uintptr(0))) - } + + // Scan all pointers in the object. + start := uintptr(unsafe.Pointer(obj)) + align(unsafe.Sizeof(objHeader{})) + end := blockFromAddr(uintptr(unsafe.Pointer(obj))).findNext().address() + for addr := start; addr != end; addr += unsafe.Alignof(addr) { // Load the word. word := *(*uintptr)(unsafe.Pointer(addr)) - if !scanner.nextIsPointer(word, root.address(), addr) { + if !scanner.nextIsPointer(word, uintptr(unsafe.Pointer(obj)), addr) { // Not a heap pointer. continue } @@ -628,58 +623,46 @@ func startMark(root gcBlock) { } referencedBlock.setState(blockStateMark) - if stackLen == len(stack) { - // The stack is full. - // It is necessary to rescan all marked blocks once we are done. - stackOverflow = true - if gcDebug { - println("gc stack overflowed") - } - continue - } - - // Push the pointer onto the stack to be scanned later. - stack[stackLen] = referencedBlock - stackLen++ + // Add the object to the scan list. + header := (*objHeader)(referencedBlock.pointer()) + header.next = scanList + scanList = header } } } -// finishMark finishes the marking process by processing all stack overflows. -func finishMark() { - for stackOverflow { - // Re-mark all blocks. - stackOverflow = false - for block := gcBlock(0); block < endBlock; block++ { - if block.state() != blockStateMark { - // Block is not marked, so we do not need to rescan it. - continue - } +// mark a GC root at the address addr. +func markRoot(addr, root uintptr) { + // Find the heap block corresponding to the root. + if !isOnHeap(root) { + // This is not a heap pointer. + return + } + block := blockFromAddr(root) - // Re-mark the block. - startMark(block) - } + // Find the head of the corresponding object. + if block.state() == blockStateFree { + // The to-be-marked object doesn't actually exist. + // This could either be a dangling pointer (oops!) but most likely + // just a false positive. + return } -} + head := block.findHead() -// mark a GC root at the address addr. -func markRoot(addr, root uintptr) { - if isOnHeap(root) { - block := blockFromAddr(root) - if block.state() == blockStateFree { - // The to-be-marked object doesn't actually exist. - // This could either be a dangling pointer (oops!) but most likely - // just a false positive. - return - } - head := block.findHead() - if head.state() != blockStateMark { - if gcDebug { - println("found unmarked pointer", root, "at address", addr) - } - startMark(head) - } + // Mark the object. + if head.state() == blockStateMark { + // This object is already marked. + return + } + if gcDebug { + println("found unmarked pointer", root, "at address", addr) } + head.setState(blockStateMark) + + // Add the object to the scan list. + header := (*objHeader)(head.pointer()) + header.next = scanList + scanList = header } // Sweep goes through all memory and frees unmarked memory. diff --git a/src/runtime/gc_conservative.go b/src/runtime/gc_conservative.go index 90e5cb098f..f7b8743705 100644 --- a/src/runtime/gc_conservative.go +++ b/src/runtime/gc_conservative.go @@ -6,15 +6,27 @@ package runtime -const preciseHeap = false +import "unsafe" -type gcObjectScanner struct { +// gcLayout tracks pointer locations in a heap object. +// The conservative GC treats all locations as potential pointers, so this doesn't need to store anything. +type gcLayout struct { +} + +// parseGCLayout stores the layout information passed to alloc into a gcLayout value. +// The conservative GC discards this information. +func parseGCLayout(layout unsafe.Pointer) gcLayout { + return gcLayout{} } -func newGCObjectScanner(block gcBlock) gcObjectScanner { +// scanner creates a gcObjectScanner with this layout. +func (l gcLayout) scanner() gcObjectScanner { return gcObjectScanner{} } +type gcObjectScanner struct { +} + func (scanner *gcObjectScanner) pointerFree() bool { // We don't know whether this object contains pointers, so conservatively // return false. diff --git a/src/runtime/gc_precise.go b/src/runtime/gc_precise.go index aa716585c8..96f813f58f 100644 --- a/src/runtime/gc_precise.go +++ b/src/runtime/gc_precise.go @@ -59,19 +59,19 @@ import "unsafe" const preciseHeap = true -type gcObjectScanner struct { - index uintptr - size uintptr - bitmap uintptr - bitmapAddr unsafe.Pointer +// parseGCLayout stores the layout information passed to alloc into a gcLayout value. +func parseGCLayout(layout unsafe.Pointer) gcLayout { + return gcLayout{layout: uintptr(layout)} } -func newGCObjectScanner(block gcBlock) gcObjectScanner { - if gcAsserts && block != block.findHead() { - runtimePanic("gc: object scanner must start at head") - } - scanner := gcObjectScanner{} - layout := *(*uintptr)(unsafe.Pointer(block.address())) +// gcLayout tracks pointer locations in a heap object. +type gcLayout struct { + layout uintptr +} + +// scanner creates a gcObjectScanner with this layout. +func (l gcLayout) scanner() (scanner gcObjectScanner) { + layout := l.layout if layout == 0 { // Unknown layout. Assume all words in the object could be pointers. // This layout value below corresponds to a slice of pointers like: @@ -104,7 +104,14 @@ func newGCObjectScanner(block gcBlock) gcObjectScanner { scanner.size = *(*uintptr)(layoutAddr) scanner.bitmapAddr = unsafe.Add(layoutAddr, unsafe.Sizeof(uintptr(0))) } - return scanner + return +} + +type gcObjectScanner struct { + index uintptr + size uintptr + bitmap uintptr + bitmapAddr unsafe.Pointer } func (scanner *gcObjectScanner) pointerFree() bool { From 11d283de250d9e91379333a24882f6f89b962723 Mon Sep 17 00:00:00 2001 From: Nia Waldvogel Date: Sun, 30 Nov 2025 12:21:55 -0500 Subject: [PATCH 2/2] runtime (gc_blocks.go): simplify scanning logic Loop over valid pointer locations in heap objects instead of checking if each location is valid. The conservative scanning code is now shared between markRoots and the heap scan. This also removes the ending alignment requirement from markRoots, since the new scan* functions do not require an aligned length. This requirement was occasionally violated by the linux global marking code. This saves some code space and has negligible impact on performance. --- builder/sizes_test.go | 6 +- src/runtime/gc_blocks.go | 75 +++++------------ src/runtime/gc_conservative.go | 23 ++---- src/runtime/gc_precise.go | 142 ++++++++++++++++----------------- 4 files changed, 99 insertions(+), 147 deletions(-) diff --git a/builder/sizes_test.go b/builder/sizes_test.go index 7349d5757c..11dcb96ea0 100644 --- a/builder/sizes_test.go +++ b/builder/sizes_test.go @@ -42,9 +42,9 @@ func TestBinarySize(t *testing.T) { // This is a small number of very diverse targets that we want to test. tests := []sizeTest{ // microcontrollers - {"hifive1b", "examples/echo", 3756, 280, 0, 2268}, - {"microbit", "examples/serial", 2756, 340, 8, 2272}, - {"wioterminal", "examples/pininterrupt", 7297, 1491, 116, 6912}, + {"hifive1b", "examples/echo", 3568, 280, 0, 2268}, + {"microbit", "examples/serial", 2630, 342, 8, 2272}, + {"wioterminal", "examples/pininterrupt", 7175, 1493, 116, 6912}, // TODO: also check wasm. Right now this is difficult, because // wasm binaries are run through wasm-opt and therefore the diff --git a/src/runtime/gc_blocks.go b/src/runtime/gc_blocks.go index 2ee7c9d2f8..99ad6a8591 100644 --- a/src/runtime/gc_blocks.go +++ b/src/runtime/gc_blocks.go @@ -532,8 +532,7 @@ func runGC() (freeBytes uintptr) { // markRoots reads all pointers from start to end (exclusive) and if they look // like a heap pointer and are unmarked, marks them and scans that object as -// well (recursively). The start and end parameters must be valid pointers and -// must be aligned. +// well (recursively). The starting address must be valid and aligned. func markRoots(start, end uintptr) { if gcDebug { println("mark from", start, "to", end, int(end-start)) @@ -545,18 +544,21 @@ func markRoots(start, end uintptr) { if start%unsafe.Alignof(start) != 0 { runtimePanic("gc: unaligned start pointer") } - if end%unsafe.Alignof(end) != 0 { - runtimePanic("gc: unaligned end pointer") - } } - // Reduce the end bound to avoid reading too far on platforms where pointer alignment is smaller than pointer size. - // If the size of the range is 0, then end will be slightly below start after this. - end -= unsafe.Sizeof(end) - unsafe.Alignof(end) + // Scan the range conservatively. + scanConservative(start, end-start) +} - for addr := start; addr < end; addr += unsafe.Alignof(addr) { +// scanConservative scans all possible pointer locations in a range and marks referenced heap allocations. +// The starting address must be valid and pointer-aligned. +func scanConservative(addr, len uintptr) { + for len >= unsafe.Sizeof(addr) { root := *(*uintptr)(unsafe.Pointer(addr)) markRoot(addr, root) + + addr += unsafe.Alignof(addr) + len -= unsafe.Alignof(addr) } } @@ -576,58 +578,21 @@ func finishMark() { } scanList = obj.next - // Create a scanner with the object layout. - scanner := obj.layout.scanner() - if scanner.pointerFree() { + // Check if the object may contain pointers. + if obj.layout.pointerFree() { // This object doesn't contain any pointers. // This is a fast path for objects like make([]int, 4096). + // It skips the length calculation. continue } - // Scan all pointers in the object. - start := uintptr(unsafe.Pointer(obj)) + align(unsafe.Sizeof(objHeader{})) - end := blockFromAddr(uintptr(unsafe.Pointer(obj))).findNext().address() + // Compute the scan bounds. + objAddr := uintptr(unsafe.Pointer(obj)) + start := objAddr + align(unsafe.Sizeof(objHeader{})) + end := blockFromAddr(objAddr).findNext().address() - for addr := start; addr != end; addr += unsafe.Alignof(addr) { - // Load the word. - word := *(*uintptr)(unsafe.Pointer(addr)) - - if !scanner.nextIsPointer(word, uintptr(unsafe.Pointer(obj)), addr) { - // Not a heap pointer. - continue - } - - // Find the corresponding memory block. - referencedBlock := blockFromAddr(word) - - if referencedBlock.state() == blockStateFree { - // The to-be-marked object doesn't actually exist. - // This is probably a false positive. - if gcDebug { - println("found reference to free memory:", word, "at:", addr) - } - continue - } - - // Move to the block's head. - referencedBlock = referencedBlock.findHead() - - if referencedBlock.state() == blockStateMark { - // The block has already been marked by something else. - continue - } - - // Mark block. - if gcDebug { - println("marking block:", referencedBlock) - } - referencedBlock.setState(blockStateMark) - - // Add the object to the scan list. - header := (*objHeader)(referencedBlock.pointer()) - header.next = scanList - scanList = header - } + // Scan the object. + obj.layout.scan(start, end-start) } } diff --git a/src/runtime/gc_conservative.go b/src/runtime/gc_conservative.go index f7b8743705..a0c4f21c5d 100644 --- a/src/runtime/gc_conservative.go +++ b/src/runtime/gc_conservative.go @@ -8,34 +8,23 @@ package runtime import "unsafe" -// gcLayout tracks pointer locations in a heap object. -// The conservative GC treats all locations as potential pointers, so this doesn't need to store anything. -type gcLayout struct { -} - // parseGCLayout stores the layout information passed to alloc into a gcLayout value. // The conservative GC discards this information. func parseGCLayout(layout unsafe.Pointer) gcLayout { return gcLayout{} } -// scanner creates a gcObjectScanner with this layout. -func (l gcLayout) scanner() gcObjectScanner { - return gcObjectScanner{} -} - -type gcObjectScanner struct { +// gcLayout tracks pointer locations in a heap object. +// The conservative GC treats all locations as potential pointers, so this doesn't need to store anything. +type gcLayout struct { } -func (scanner *gcObjectScanner) pointerFree() bool { +func (l gcLayout) pointerFree() bool { // We don't know whether this object contains pointers, so conservatively // return false. return false } -// nextIsPointer returns whether this could be a pointer. Because the GC is -// conservative, we can't do much more than check whether the object lies -// somewhere in the heap. -func (scanner gcObjectScanner) nextIsPointer(ptr, parent, addrOfWord uintptr) bool { - return isOnHeap(ptr) +func (l gcLayout) scan(start, len uintptr) { + scanConservative(start, len) } diff --git a/src/runtime/gc_precise.go b/src/runtime/gc_precise.go index 96f813f58f..746dd40c03 100644 --- a/src/runtime/gc_precise.go +++ b/src/runtime/gc_precise.go @@ -57,98 +57,96 @@ package runtime import "unsafe" -const preciseHeap = true +const sizeFieldBits = 4 + (unsafe.Sizeof(uintptr(0)) / 4) // parseGCLayout stores the layout information passed to alloc into a gcLayout value. func parseGCLayout(layout unsafe.Pointer) gcLayout { - return gcLayout{layout: uintptr(layout)} + return gcLayout(layout) } // gcLayout tracks pointer locations in a heap object. -type gcLayout struct { - layout uintptr +type gcLayout uintptr + +func (layout gcLayout) pointerFree() bool { + return layout&1 != 0 && layout>>(sizeFieldBits+1) == 0 } -// scanner creates a gcObjectScanner with this layout. -func (l gcLayout) scanner() (scanner gcObjectScanner) { - layout := l.layout - if layout == 0 { - // Unknown layout. Assume all words in the object could be pointers. - // This layout value below corresponds to a slice of pointers like: - // make(*byte, N) - scanner.size = 1 - scanner.bitmap = 1 - } else if layout&1 != 0 { - // Layout is stored directly in the integer value. - // Determine format of bitfields in the integer. - const layoutBits = uint64(unsafe.Sizeof(layout) * 8) - var sizeFieldBits uint64 - switch layoutBits { // note: this switch should be resolved at compile time - case 16: - sizeFieldBits = 4 - case 32: - sizeFieldBits = 5 - case 64: - sizeFieldBits = 6 - default: - runtimePanic("unknown pointer size") - } +// scan an object with this element layout. +// The starting address must be valid and pointer-aligned. +// The length is rounded down to a multiple of the element size. +func (layout gcLayout) scan(start, len uintptr) { + switch { + case layout == 0: + // This is an unknown layout. + // Scan conservatively. + // NOTE: This is *NOT* equivalent to a slice of pointers on AVR. + scanConservative(start, len) + + case layout&1 != 0: + // The layout is stored directly in the integer value. + // Extract the bitfields. + size := uintptr(layout>>1) & (1<> (1 + sizeFieldBits) - // Extract values from the bitfields. - // See comment at the top of this file for more information. - scanner.size = (layout >> 1) & (1<> (1 + sizeFieldBits) - } else { - // Layout is stored separately in a global object. + // Scan with the extracted mask. + scanSimple(start, len, size*unsafe.Alignof(start), mask) + + default: + // The layout is stored seperately in a global object. + // Extract the size and bitmap. layoutAddr := unsafe.Pointer(layout) - scanner.size = *(*uintptr)(layoutAddr) - scanner.bitmapAddr = unsafe.Add(layoutAddr, unsafe.Sizeof(uintptr(0))) + size := *(*uintptr)(layoutAddr) + bitmapPtr := unsafe.Add(layoutAddr, unsafe.Sizeof(uintptr(0))) + bitmapLen := (size + 7) / 8 + bitmap := unsafe.Slice((*byte)(bitmapPtr), bitmapLen) + + // Scan with the bitmap. + scanComplex(start, len, size*unsafe.Alignof(start), bitmap) } - return } -type gcObjectScanner struct { - index uintptr - size uintptr - bitmap uintptr - bitmapAddr unsafe.Pointer -} +// scanSimple scans an object with an integer bitmask of pointer locations. +// The starting address must be valid and pointer-aligned. +func scanSimple(start, len, size, mask uintptr) { + for len >= size { + // Scan this element. + scanWithMask(start, mask) -func (scanner *gcObjectScanner) pointerFree() bool { - if scanner.bitmapAddr != nil { - // While the format allows for large objects without pointers, this is - // optimized by the compiler so if bitmapAddr is set, we know that there - // are at least some pointers in the object. - return false + // Move to the next element. + start += size + len -= size } - // If the bitmap is zero, there are definitely no pointers in the object. - return scanner.bitmap == 0 } -func (scanner *gcObjectScanner) nextIsPointer(word, parent, addrOfWord uintptr) bool { - index := scanner.index - scanner.index++ - if scanner.index == scanner.size { - scanner.index = 0 - } +// scanComplex scans an object with a bitmap of pointer locations. +// The starting address must be valid and pointer-aligned. +func scanComplex(start, len, size uintptr, bitmap []byte) { + for len >= size { + // Scan this element. + for i, mask := range bitmap { + addr := start + 8*unsafe.Alignof(start)*uintptr(i) + scanWithMask(addr, uintptr(mask)) + } - if !isOnHeap(word) { - // Definitely isn't a pointer. - return false + // Move to the next element. + start += size + len -= size } +} - // Might be a pointer. Now look at the object layout to know for sure. - if scanner.bitmapAddr != nil { - if (*(*uint8)(unsafe.Add(scanner.bitmapAddr, index/8))>>(index%8))&1 == 0 { - return false +// scanWithMask scans a portion of an object with a mask of pointer locations. +// The address must be valid and pointer-aligned. +func scanWithMask(addr, mask uintptr) { + // TODO: use ctz when available + for mask != 0 { + if mask&1 != 0 { + // Load and mark this pointer. + root := *(*uintptr)(unsafe.Pointer(addr)) + markRoot(addr, root) } - return true - } - if (scanner.bitmap>>index)&1 == 0 { - // not a pointer! - return false - } - // Probably a pointer. - return true + // Move to the next offset. + mask >>= 1 + addr += unsafe.Alignof(addr) + } }