fix: Multiple Immix/Commix GC stablility improvements (#3790)

* Bunch of fixes to GC: * Fix registers marking - setjmp might be not word-aligned * Fix ordering or relative source dir directories in SourceCodeCache * Use Rtti* instead of void* in GC signatures. Bunch of other fixes/cleanups * Add a dedicated GC array allocation. * Fix generated memory layouts for arrays. * Prevent copying object monitor during clone operation. * Don't test commix+lto=thin in multiarch CI, use commix+lto=none instead
scala-native · Feb 27, 2024 · b910afc · b910afc
1 parent fd55278
commit b910afc
Show file tree

Hide file tree

Showing 31 changed files with 450 additions and 311 deletions.
diff --git a/.github/workflows/run-tests-linux-multiarch.yml b/.github/workflows/run-tests-linux-multiarch.yml
@@ -98,13 +98,14 @@ jobs:
             lto: none
           - build-mode: debug
             lto: thin
+          # Fails frequently
+          - gc: commix
+            lto: thin
           # Reduce ammount of builds combinations
           - gc: immix
             lto: none
           - gc: immix
             build-mode: debug
-          - gc: commix
-            lto: none
           - gc: commix
             build-mode: debug
           - gc: boehm

diff --git a/nativelib/src/main/resources/scala-native/gc/boehm/gc.c b/nativelib/src/main/resources/scala-native/gc/boehm/gc.c
@@ -15,28 +15,36 @@
 
 void scalanative_GC_init() { GC_INIT(); }
 
-void *scalanative_GC_alloc(void *info, size_t size) {
-    void **alloc = (void **)GC_malloc(size);
-    *alloc = info;
+void *scalanative_GC_alloc(Rtti *info, size_t size) {
+    Object *alloc = (Object *)GC_malloc(size);
+    alloc->rtti = info;
     return (void *)alloc;
 }
 
-void *scalanative_GC_alloc_small(void *info, size_t size) {
-    void **alloc = (void **)GC_malloc(size);
-    *alloc = info;
+void *scalanative_GC_alloc_small(Rtti *info, size_t size) {
+    Object *alloc = (Object *)GC_malloc(size);
+    alloc->rtti = info;
     return (void *)alloc;
 }
 
-void *scalanative_GC_alloc_large(void *info, size_t size) {
-    void **alloc = (void **)GC_malloc(size);
-    *alloc = info;
+void *scalanative_GC_alloc_large(Rtti *info, size_t size) {
+    Object *alloc = (Object *)GC_malloc(size);
+    alloc->rtti = info;
     return (void *)alloc;
 }
 
-void *scalanative_GC_alloc_atomic(void *info, size_t size) {
-    void **alloc = (void **)GC_malloc_atomic(size);
+void *scalanative_GC_alloc_array(Rtti *info, size_t length, size_t stride) {
+    size_t size = info->size + length * stride;
+    ArrayHeader *alloc;
+    int32_t classId = info->rt.id;
+    if (classId == __object_array_id || classId == __blob_array_id)
+        alloc = (ArrayHeader *)GC_malloc(size);
+    else
+        alloc = (ArrayHeader *)GC_malloc_atomic(size);
     memset(alloc, 0, size);
-    *alloc = info;
+    alloc->rtti = info;
+    alloc->length = length;
+    alloc->stride = stride;
     return (void *)alloc;
 }
 
@@ -77,6 +85,7 @@ int scalanative_GC_pthread_create(pthread_t *thread, pthread_attr_t *attr,
 
 // ScalaNativeGC interface stubs. Boehm GC relies on STW using signal handlers
 void scalanative_GC_set_mutator_thread_state(GC_MutatorThreadState unused){};
+
 void scalanative_GC_yield(){};
 
 void scalanative_GC_add_roots(void *addr_low, void *addr_high) {

diff --git a/nativelib/src/main/resources/scala-native/gc/commix/Allocator.c b/nativelib/src/main/resources/scala-native/gc/commix/Allocator.c
@@ -56,10 +56,12 @@ void Allocator_InitCursors(Allocator *allocator, bool canCollect) {
 void Allocator_Clear(Allocator *allocator) {
     BlockList_Clear(&allocator->recycledBlocks);
     allocator->recycledBlockCount = 0;
-    allocator->limit = NULL;
     allocator->block = NULL;
-    allocator->largeLimit = NULL;
+    allocator->cursor = NULL;
+    allocator->limit = NULL;
     allocator->largeBlock = NULL;
+    allocator->largeCursor = NULL;
+    allocator->largeLimit = NULL;
 }
 
 bool Allocator_newOverflowBlock(Allocator *allocator) {
@@ -84,6 +86,7 @@ bool Allocator_newOverflowBlock(Allocator *allocator) {
  */
 word_t *Allocator_overflowAllocation(Allocator *allocator, size_t size) {
     word_t *start = allocator->largeCursor;
+    assert(start != NULL);
     word_t *end = (word_t *)((uint8_t *)start + size);
 
     // allocator->largeLimit == NULL implies end > allocator->largeLimit
@@ -105,6 +108,11 @@ word_t *Allocator_overflowAllocation(Allocator *allocator, size_t size) {
  */
 INLINE word_t *Allocator_tryAlloc(Allocator *allocator, size_t size) {
     word_t *start = allocator->cursor;
+    if (start == NULL) {
+        Allocator_InitCursors(allocator, true);
+        start = allocator->cursor;
+    }
+    assert(start != NULL);
     word_t *end = (word_t *)((uint8_t *)start + size);
 
     // allocator->limit == NULL implies end > allocator->limit
@@ -119,7 +127,6 @@ INLINE word_t *Allocator_tryAlloc(Allocator *allocator, size_t size) {
             if (Allocator_getNextLine(allocator)) {
                 return Allocator_tryAlloc(allocator, size);
             }
-
             return NULL;
         }
     }
@@ -137,20 +144,22 @@ bool Allocator_getNextLine(Allocator *allocator) {
     if (block == NULL) {
         return Allocator_newBlock(allocator);
     }
-    word_t *blockStart = allocator->blockStart;
 
     int lineIndex = BlockMeta_FirstFreeLine(block);
     if (lineIndex == LAST_HOLE) {
         return Allocator_newBlock(allocator);
     }
 
+    word_t *blockStart = allocator->blockStart;
     word_t *line = Block_GetLineAddress(blockStart, lineIndex);
 
     allocator->cursor = line;
     FreeLineMeta *lineMeta = (FreeLineMeta *)line;
     uint16_t size = lineMeta->size;
     if (size == 0)
         return Allocator_newBlock(allocator);
+    assert(lineMeta->next == LAST_HOLE ||
+           (lineMeta->next >= 0 && lineMeta->next <= LINE_COUNT));
     BlockMeta_SetFirstFreeLine(block, lineMeta->next);
     allocator->limit = line + (size * WORDS_IN_LINE);
     assert(allocator->limit <= Block_GetBlockEnd(blockStart));
@@ -178,8 +187,8 @@ bool Allocator_newBlock(Allocator *allocator) {
         // get all the changes done by sweeping
         atomic_thread_fence(memory_order_acquire);
 #ifdef DEBUG_PRINT
-        printf("Allocator_newBlock RECYCLED %p %" PRIu32 "\n", block,
-               BlockMeta_GetBlockIndex(blockMetaStart, block));
+        printf("Allocator_newBlock RECYCLED %p %" PRIu32 " for %p\n", block,
+               BlockMeta_GetBlockIndex(blockMetaStart, block), allocator);
         fflush(stdout);
 #endif
         assert(block->debugFlag == dbg_partial_free);
@@ -190,21 +199,23 @@ bool Allocator_newBlock(Allocator *allocator) {
                                              allocator->heapStart, block);
 
         int lineIndex = BlockMeta_FirstFreeLine(block);
-        assert(lineIndex < LINE_COUNT);
+        assert(lineIndex >= 0 && lineIndex < LINE_COUNT);
         word_t *line = Block_GetLineAddress(blockStart, lineIndex);
 
-        allocator->cursor = line;
         FreeLineMeta *lineMeta = (FreeLineMeta *)line;
-        BlockMeta_SetFirstFreeLine(block, lineMeta->next);
         uint16_t size = lineMeta->size;
         assert(size > 0);
+        assert(lineMeta->next == LAST_HOLE ||
+               (lineMeta->next >= 0 && lineMeta->next <= LINE_COUNT));
+        BlockMeta_SetFirstFreeLine(block, lineMeta->next);
+        allocator->cursor = line;
         allocator->limit = line + (size * WORDS_IN_LINE);
         assert(allocator->limit <= Block_GetBlockEnd(blockStart));
     } else {
         block = BlockAllocator_GetFreeBlock(allocator->blockAllocator);
 #ifdef DEBUG_PRINT
-        printf("Allocator_newBlock %p %" PRIu32 "\n", block,
-               BlockMeta_GetBlockIndex(blockMetaStart, block));
+        printf("Allocator_newBlock %p %" PRIu32 " for %p\n", block,
+               BlockMeta_GetBlockIndex(blockMetaStart, block), allocator);
         fflush(stdout);
 #endif
         if (block == NULL) {
@@ -259,11 +270,11 @@ NOINLINE word_t *Allocator_allocSlow(Allocator *allocator, Heap *heap,
         done:
             assert(Heap_IsWordInHeap(heap, object));
             assert(object != NULL);
-            memset(object, 0, size);
             ObjectMeta *objectMeta = Bytemap_Get(allocator->bytemap, object);
 #ifdef GC_ASSERTIONS
             ObjectMeta_AssertIsValidAllocation(objectMeta, size);
 #endif
+            memset(object, 0, size);
             ObjectMeta_SetAllocated(objectMeta);
             return object;
         }
@@ -307,19 +318,18 @@ INLINE word_t *Allocator_Alloc(Heap *heap, uint32_t size) {
     word_t *end = (word_t *)((uint8_t *)start + size);
 
     // Checks if the end of the block overlaps with the limit
-    if (end > allocator->limit) {
+    if (start == NULL || end > allocator->limit) {
         return Allocator_allocSlow(allocator, heap, size);
     }
 
     allocator->cursor = end;
 
-    memset(start, 0, size);
-
     word_t *object = start;
     ObjectMeta *objectMeta = Bytemap_Get(heap->bytemap, object);
 #ifdef GC_ASSERTIONS
     ObjectMeta_AssertIsValidAllocation(objectMeta, size);
 #endif
+    memset(start, 0, size);
     ObjectMeta_SetAllocated(objectMeta);
 
     // prefetch starting from 36 words away from the object start

diff --git a/nativelib/src/main/resources/scala-native/gc/commix/BlockAllocator.c b/nativelib/src/main/resources/scala-native/gc/commix/BlockAllocator.c
@@ -42,6 +42,7 @@ void BlockAllocator_Init(BlockAllocator *blockAllocator, word_t *blockMetaStart,
     blockAllocator->reservedSuperblock = (word_t)sLimit;
 
     blockAllocator->concurrent = false;
+    blockAllocator->freeBlockCount = blockCount;
 
     mutex_init(&blockAllocator->allocationLock);
 
@@ -114,6 +115,8 @@ BlockAllocator_getFreeBlockSlow(BlockAllocator *blockAllocator) {
         superblock->debugFlag = dbg_in_use;
 #endif
         BlockMeta_SetFlag(superblock, block_simple);
+        atomic_fetch_add_explicit(&blockAllocator->freeBlockCount, -1,
+                                  memory_order_relaxed);
         return superblock;
     } else {
         // as the last resort look in the superblock being coalesced
@@ -144,6 +147,8 @@ BlockAllocator_getFreeBlockSlow(BlockAllocator *blockAllocator) {
             block->debugFlag = dbg_in_use;
 #endif
             BlockMeta_SetFlag(block, block_simple);
+            atomic_fetch_add_explicit(&blockAllocator->freeBlockCount, -1,
+                                      memory_order_relaxed);
         }
         return block;
     }
@@ -249,6 +254,8 @@ BlockMeta *BlockAllocator_GetFreeSuperblock(BlockAllocator *blockAllocator,
            BlockMeta_GetBlockIndex(blockAllocator->blockMetaStart, superblock));
     fflush(stdout);
 #endif
+    atomic_fetch_add_explicit(&blockAllocator->freeBlockCount, -size,
+                              memory_order_relaxed);
     return superblock;
 }
 
@@ -426,10 +433,12 @@ void BlockAllocator_ReserveBlocks(BlockAllocator *blockAllocator) {
     }
 
 #ifdef GC_ASSERTIONS
-    BlockMeta *limit = superblock + SWEEP_RESERVE_BLOCKS;
-    for (BlockMeta *current = superblock; current < limit; current++) {
-        assert(BlockMeta_IsFree(current));
-        assert(current->debugFlag == dbg_free_in_collection);
+    if (superblock != NULL) {
+        BlockMeta *limit = superblock + SWEEP_RESERVE_BLOCKS;
+        for (BlockMeta *current = superblock; current < limit; current++) {
+            assert(BlockMeta_IsFree(current));
+            assert(current->debugFlag == dbg_free_in_collection);
+        }
     }
 #endif
 

diff --git a/nativelib/src/main/resources/scala-native/gc/commix/CommixGC.c b/nativelib/src/main/resources/scala-native/gc/commix/CommixGC.c
@@ -25,8 +25,6 @@
 #include "MutatorThread.h"
 #include <stdatomic.h>
 
-void scalanative_GC_collect();
-
 void scalanative_afterexit() {
 #ifdef ENABLE_GC_STATS
     Stats_OnExit(heap.stats);
@@ -55,39 +53,43 @@ NOINLINE void scalanative_GC_init() {
 #endif
 }
 
-INLINE void *scalanative_GC_alloc(void *info, size_t size) {
+INLINE void *scalanative_GC_alloc(Rtti *info, size_t size) {
     size = MathUtils_RoundToNextMultiple(size, ALLOCATION_ALIGNMENT);
+
     assert(size % ALLOCATION_ALIGNMENT == 0);
 
-    void **alloc;
+    Object *alloc;
     if (size >= LARGE_BLOCK_SIZE) {
-        alloc = (void **)LargeAllocator_Alloc(&heap, size);
+        alloc = (Object *)LargeAllocator_Alloc(&heap, size);
     } else {
-        alloc = (void **)Allocator_Alloc(&heap, size);
+        alloc = (Object *)Allocator_Alloc(&heap, size);
     }
-
-    *alloc = info;
+    alloc->rtti = info;
     return (void *)alloc;
 }
 
-INLINE void *scalanative_GC_alloc_small(void *info, size_t size) {
+INLINE void *scalanative_GC_alloc_small(Rtti *info, size_t size) {
     size = MathUtils_RoundToNextMultiple(size, ALLOCATION_ALIGNMENT);
 
-    void **alloc = (void **)Allocator_Alloc(&heap, size);
-    *alloc = info;
+    Object *alloc = (Object *)Allocator_Alloc(&heap, size);
+    alloc->rtti = info;
     return (void *)alloc;
 }
 
-INLINE void *scalanative_GC_alloc_large(void *info, size_t size) {
+INLINE void *scalanative_GC_alloc_large(Rtti *info, size_t size) {
     size = MathUtils_RoundToNextMultiple(size, ALLOCATION_ALIGNMENT);
 
-    void **alloc = (void **)LargeAllocator_Alloc(&heap, size);
-    *alloc = info;
+    Object *alloc = (Object *)LargeAllocator_Alloc(&heap, size);
+    alloc->rtti = info;
     return (void *)alloc;
 }
-
-INLINE void *scalanative_GC_alloc_atomic(void *info, size_t size) {
-    return scalanative_GC_alloc(info, size);
+INLINE void *scalanative_GC_alloc_array(Rtti *info, size_t length,
+                                        size_t stride) {
+    size_t size = info->size + length * stride;
+    ArrayHeader *alloc = (ArrayHeader *)scalanative_GC_alloc(info, size);
+    alloc->length = length;
+    alloc->stride = stride;
+    return (void *)alloc;
 }
 
 INLINE void scalanative_GC_collect() { Heap_Collect(&heap); }
@@ -104,8 +106,8 @@ INLINE void scalanative_GC_register_weak_reference_handler(void *handler) {
 size_t scalanative_GC_get_init_heapsize() { return Settings_MinHeapSize(); }
 
 /* Get the maximum heap size */
-/* If the user has set a maximum heap size using the GC_MAXIMUM_HEAP_SIZE */
-/* environment variable,*/
+/* If the user has set a maximum heap size using the GC_MAXIMUM_HEAP_SIZE
+ * environment variable,*/
 /* then this size will be returned.*/
 /* Otherwise, the total size of the physical memory (guarded) will be returned*/
 size_t scalanative_GC_get_max_heapsize() {
@@ -133,7 +135,7 @@ static ThreadRoutineReturnType WINAPI ProxyThreadStartRoutine(void *args) {
 #else
 static ThreadRoutineReturnType ProxyThreadStartRoutine(void *args) {
 #endif
-    volatile word_t stackBottom = 1;
+    volatile word_t stackBottom = 0;
     stackBottom = (word_t)&stackBottom;
     WrappedFunctionCallArgs *wrapped = (WrappedFunctionCallArgs *)args;
     ThreadStartRoutine originalFn = wrapped->fn;

diff --git a/nativelib/src/main/resources/scala-native/gc/commix/Heap.c b/nativelib/src/main/resources/scala-native/gc/commix/Heap.c
@@ -219,6 +219,7 @@ void Heap_Init(Heap *heap, size_t minHeapSize, size_t maxHeapSize) {
 }
 
 void Heap_Collect(Heap *heap) {
+    MutatorThread *mutatorThread = currentMutatorThread;
 #ifdef SCALANATIVE_MULTITHREADING_ENABLED
     if (!Synchronizer_acquire())
         return;