Skip to content

Commit 23c4a9a

Browse files
Buffer obj_free candidates.
Previously, every object allocation in rb_gc_impl_new_obj made a per-object FFI call into Rust (mmtk_add_obj_free_candidate), which acquired a mutex on one of the WeakProcessor's candidate vecs, pushed a single element, and released the mutex. That's an FFI crossing + mutex lock/unlock on every single allocation. Now, each MMTk_ractor_cache has two local buffers (parallel-freeable and non-parallel-freeable, 128 entries each). On allocation, we just store the pointer into the local buffer. When a buffer fills up, we flush the entire batch in one FFI call using mmtk_add_obj_free_candidates, which does a single mutex acquisition and extend_from_slice for the whole batch. We picked 128 as our buffer size at random. We should probably investigate further what an optimum size for this is
1 parent 2023bba commit 23c4a9a

File tree

4 files changed

+74
-13
lines changed

4 files changed

+74
-13
lines changed

gc/mmtk/mmtk.c

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,20 @@ struct objspace {
4848
unsigned int fork_hook_vm_lock_lev;
4949
};
5050

51+
#define OBJ_FREE_BUF_CAPACITY 128
52+
5153
struct MMTk_ractor_cache {
5254
struct ccan_list_node list_node;
5355

5456
MMTk_Mutator *mutator;
5557
bool gc_mutator_p;
5658

5759
MMTk_BumpPointer *bump_pointer;
60+
61+
MMTk_ObjectReference obj_free_parallel_buf[OBJ_FREE_BUF_CAPACITY];
62+
size_t obj_free_parallel_count;
63+
MMTk_ObjectReference obj_free_non_parallel_buf[OBJ_FREE_BUF_CAPACITY];
64+
size_t obj_free_non_parallel_count;
5865
};
5966

6067
struct MMTk_final_job {
@@ -143,6 +150,8 @@ rb_mmtk_resume_mutators(void)
143150
}
144151
}
145152

153+
static void mmtk_flush_obj_free_buffer(struct MMTk_ractor_cache *cache);
154+
146155
static void
147156
rb_mmtk_block_for_gc(MMTk_VMMutatorThread mutator)
148157
{
@@ -173,6 +182,11 @@ rb_mmtk_block_for_gc(MMTk_VMMutatorThread mutator)
173182

174183
rb_gc_vm_barrier();
175184

185+
struct MMTk_ractor_cache *rc;
186+
ccan_list_for_each(&objspace->ractor_caches, rc, list_node) {
187+
mmtk_flush_obj_free_buffer(rc);
188+
}
189+
176190
objspace->world_stopped = true;
177191

178192
pthread_cond_broadcast(&objspace->cond_world_stopped);
@@ -584,7 +598,7 @@ rb_gc_impl_ractor_cache_alloc(void *objspace_ptr, void *ractor)
584598
}
585599
objspace->live_ractor_cache_count++;
586600

587-
struct MMTk_ractor_cache *cache = malloc(sizeof(struct MMTk_ractor_cache));
601+
struct MMTk_ractor_cache *cache = calloc(1, sizeof(struct MMTk_ractor_cache));
588602
ccan_list_add(&objspace->ractor_caches, &cache->list_node);
589603

590604
cache->mutator = mmtk_bind_mutator(cache);
@@ -601,6 +615,8 @@ rb_gc_impl_ractor_cache_free(void *objspace_ptr, void *cache_ptr)
601615

602616
ccan_list_del(&cache->list_node);
603617

618+
mmtk_flush_obj_free_buffer(cache);
619+
604620
if (ruby_free_at_exit_p()) {
605621
MMTK_ASSERT(objspace->live_ractor_cache_count > 0);
606622
}
@@ -801,6 +817,42 @@ obj_can_parallel_free_p(VALUE obj)
801817
}
802818
}
803819

820+
static void
821+
mmtk_flush_obj_free_buffer(struct MMTk_ractor_cache *cache)
822+
{
823+
if (cache->obj_free_parallel_count > 0) {
824+
mmtk_add_obj_free_candidates(cache->obj_free_parallel_buf,
825+
cache->obj_free_parallel_count, true);
826+
cache->obj_free_parallel_count = 0;
827+
}
828+
if (cache->obj_free_non_parallel_count > 0) {
829+
mmtk_add_obj_free_candidates(cache->obj_free_non_parallel_buf,
830+
cache->obj_free_non_parallel_count, false);
831+
cache->obj_free_non_parallel_count = 0;
832+
}
833+
}
834+
835+
static inline void
836+
mmtk_buffer_obj_free_candidate(struct MMTk_ractor_cache *cache, VALUE obj)
837+
{
838+
if (obj_can_parallel_free_p(obj)) {
839+
cache->obj_free_parallel_buf[cache->obj_free_parallel_count++] = (MMTk_ObjectReference)obj;
840+
if (cache->obj_free_parallel_count >= OBJ_FREE_BUF_CAPACITY) {
841+
mmtk_add_obj_free_candidates(cache->obj_free_parallel_buf,
842+
cache->obj_free_parallel_count, true);
843+
cache->obj_free_parallel_count = 0;
844+
}
845+
}
846+
else {
847+
cache->obj_free_non_parallel_buf[cache->obj_free_non_parallel_count++] = (MMTk_ObjectReference)obj;
848+
if (cache->obj_free_non_parallel_count >= OBJ_FREE_BUF_CAPACITY) {
849+
mmtk_add_obj_free_candidates(cache->obj_free_non_parallel_buf,
850+
cache->obj_free_non_parallel_count, false);
851+
cache->obj_free_non_parallel_count = 0;
852+
}
853+
}
854+
}
855+
804856
VALUE
805857
rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags, bool wb_protected, size_t alloc_size)
806858
{
@@ -837,7 +889,7 @@ rb_gc_impl_new_obj(void *objspace_ptr, void *cache_ptr, VALUE klass, VALUE flags
837889
mmtk_post_alloc(ractor_cache->mutator, (void*)alloc_obj, alloc_size, MMTK_ALLOCATION_SEMANTICS_DEFAULT);
838890

839891
// TODO: only add when object needs obj_free to be called
840-
mmtk_add_obj_free_candidate(alloc_obj, obj_can_parallel_free_p((VALUE)alloc_obj));
892+
mmtk_buffer_obj_free_candidate(ractor_cache, (VALUE)alloc_obj);
841893

842894
objspace->total_allocated_objects++;
843895

gc/mmtk/mmtk.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,9 @@ void mmtk_post_alloc(MMTk_Mutator *mutator,
123123
size_t bytes,
124124
MMTk_AllocationSemantics semantics);
125125

126-
void mmtk_add_obj_free_candidate(MMTk_ObjectReference object, bool can_parallel_free);
126+
void mmtk_add_obj_free_candidates(const MMTk_ObjectReference *objects,
127+
size_t count,
128+
bool can_parallel_free);
127129

128130
void mmtk_declare_weak_references(MMTk_ObjectReference object);
129131

gc/mmtk/src/api.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -297,12 +297,16 @@ pub unsafe extern "C" fn mmtk_post_alloc(
297297
memory_manager::post_alloc::<Ruby>(unsafe { &mut *mutator }, refer, bytes, semantics)
298298
}
299299

300-
// TODO: Replace with buffered mmtk_add_obj_free_candidates
301300
#[no_mangle]
302-
pub extern "C" fn mmtk_add_obj_free_candidate(object: ObjectReference, can_parallel_free: bool) {
301+
pub unsafe extern "C" fn mmtk_add_obj_free_candidates(
302+
objects: *const ObjectReference,
303+
count: usize,
304+
can_parallel_free: bool,
305+
) {
306+
let objects = unsafe { std::slice::from_raw_parts(objects, count) };
303307
binding()
304308
.weak_proc
305-
.add_obj_free_candidate(object, can_parallel_free)
309+
.add_obj_free_candidates_batch(objects, can_parallel_free)
306310
}
307311

308312
// =============== Weak references ===============

gc/mmtk/src/weak_proc.rs

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,16 @@ impl WeakProcessor {
4848
}
4949
}
5050

51-
/// Add an object as a candidate for `obj_free`.
51+
/// Add a batch of objects as candidates for `obj_free`.
5252
///
53-
/// Multiple mutators can call it concurrently, so it has `&self`.
54-
pub fn add_obj_free_candidate(&self, object: ObjectReference, can_parallel_free: bool) {
53+
/// Amortizes mutex acquisition over the entire batch. Called when a
54+
/// mutator's local buffer is flushed (buffer full or stop-the-world).
55+
pub fn add_obj_free_candidates_batch(&self, objects: &[ObjectReference], can_parallel_free: bool) {
56+
if objects.is_empty() {
57+
return;
58+
}
59+
5560
if can_parallel_free {
56-
// Newly allocated objects are placed in parallel_obj_free_candidates using
57-
// round-robin. This may not be ideal for load balancing.
5861
let idx = self
5962
.parallel_obj_free_candidates_counter
6063
.fetch_add(1, Ordering::Relaxed)
@@ -63,12 +66,12 @@ impl WeakProcessor {
6366
self.parallel_obj_free_candidates[idx]
6467
.lock()
6568
.unwrap()
66-
.push(object);
69+
.extend_from_slice(objects);
6770
} else {
6871
self.non_parallel_obj_free_candidates
6972
.lock()
7073
.unwrap()
71-
.push(object);
74+
.extend_from_slice(objects);
7275
}
7376
}
7477

0 commit comments

Comments
 (0)