From 066cf7c792b7cb6af37f1aa439e7ab3fc9c83ded Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 12 Aug 2022 13:11:18 +0800 Subject: [PATCH 1/6] Collect xmalloc-ed garbages We register common types that contain underlying xmalloc-ed buffers, such as strings, arrays and hashes, as finalization candidates. When xmalloc allocates enough memory, it will trigger GC using MMTk's API. The GC will identify objects ready for finalization. We will call obj_free on them to free their underlying buffers. --- gc.c | 19 +++++++++++++++++++ mmtk.h | 1 + 2 files changed, 20 insertions(+) diff --git a/gc.c b/gc.c index 1e901657a990fe..4aaab5820b3401 100644 --- a/gc.c +++ b/gc.c @@ -2547,6 +2547,9 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, #if USE_MMTK if (rb_mmtk_enabled_p()) { switch (RB_BUILTIN_TYPE(obj)) { + case T_STRING: + case T_ARRAY: + case T_HASH: case T_DATA: case T_FILE: case T_SYMBOL: @@ -12433,11 +12436,20 @@ objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_siz if (type == MEMOP_TYPE_MALLOC) { retry: if (malloc_increase > malloc_limit && ruby_native_thread_p() && !dont_gc_val()) { +#if USE_MMTK + if (rb_mmtk_enabled_p()) { + mmtk_handle_user_collection_request((MMTk_VMMutatorThread)GET_THREAD()); + gc_reset_malloc_info(objspace, true); + } else { +#endif if (ruby_thread_has_gvl_p() && is_lazy_sweeping(objspace)) { gc_rest(objspace); /* gc_rest can reduce malloc_increase */ goto retry; } garbage_collect_with_gvl(objspace, GPR_FLAG_MALLOC); +#if USE_MMTK + } +#endif } } @@ -15280,6 +15292,12 @@ rb_mmtk_scan_object_ruby_style(void *object) gc_mark_children(objspace, obj); } +static void +rb_mmtk_obj_free(void *object) { + printf("Called back from Finalization. Freeing %p\n", object); + obj_free(&rb_objspace, (VALUE)object); +} + RubyUpcalls ruby_upcalls = { rb_mmtk_init_gc_worker_thread, rb_mmtk_get_gc_thread_tls, @@ -15293,6 +15311,7 @@ RubyUpcalls ruby_upcalls = { rb_mmtk_scan_thread_roots, rb_mmtk_scan_thread_root, rb_mmtk_scan_object_ruby_style, + rb_mmtk_obj_free, }; // Use up to 80% of memory for the heap diff --git a/mmtk.h b/mmtk.h index d5dfcabf6a7890..2a00caa183bfb3 100644 --- a/mmtk.h +++ b/mmtk.h @@ -37,6 +37,7 @@ typedef struct { void (*scan_thread_roots)(void); void (*scan_thread_root)(MMTk_VMMutatorThread mutator, MMTk_VMWorkerThread worker); void (*scan_object_ruby_style)(void *object); + void (*obj_free)(void *object); } RubyUpcalls; /** From 55f4175328eeef4792ea52c142b2c0f91ba8db5a Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Fri, 12 Aug 2022 16:47:53 +0800 Subject: [PATCH 2/6] Use MMTk's official API to handle finalizers --- gc.c | 87 +++++++++++++++++++++++++++++++++++++------------------- mmtk.h | 22 +++++++++----- string.c | 6 ---- 3 files changed, 72 insertions(+), 43 deletions(-) diff --git a/gc.c b/gc.c index 4aaab5820b3401..097655eb10cf01 100644 --- a/gc.c +++ b/gc.c @@ -2553,7 +2553,7 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, case T_DATA: case T_FILE: case T_SYMBOL: - mmtk_register_finalizable((void*)obj); + mmtk_add_finalizer((void*)obj); RUBY_DEBUG_LOG("Object registered for finalization: %p: %s %s", (void*)obj, rb_type_str(RB_BUILTIN_TYPE(obj)), @@ -4701,6 +4701,60 @@ force_chain_object(st_data_t key, st_data_t val, st_data_t arg) bool rb_obj_is_main_ractor(VALUE gv); +#if USE_MMTK +void +rb_mmtk_call_finalizer_inner(rb_objspace_t *objspace, VALUE obj) { + if (USE_RUBY_DEBUG_LOG) { + RUBY_DEBUG_LOG("Resurrected for obj_free: %p: %s %s", + resurrected, + rb_type_str(RB_BUILTIN_TYPE(obj)), + CLASS_OF(obj)==0?"(null)":rb_class2name(CLASS_OF(obj)) + ); + } + if (rb_obj_is_thread(obj)) { + RUBY_DEBUG_LOG("Skipped thread: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); + return; + } + if (rb_obj_is_mutex(obj)) { + RUBY_DEBUG_LOG("Skipped mutex: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); + return; + } + if (rb_obj_is_fiber(obj)) { + RUBY_DEBUG_LOG("Skipped fiber: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); + return; + } + if (rb_obj_is_main_ractor(obj)) { + RUBY_DEBUG_LOG("Skipped main ractor: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); + return; + } + obj_free(objspace, obj); + RUBY_DEBUG_LOG("Object freed: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); +} + +void +rb_mmtk_call_finalizer(rb_objspace_t *objspace, bool on_exit) +{ + if (on_exit) { + struct RawVecOfObjRef resurrrected_objs = mmtk_get_all_finalizers(); + + for (size_t i = 0; i < resurrrected_objs.len; i++) { + void *resurrected = resurrrected_objs.ptr[resurrrected_objs.len - i - 1]; + + VALUE obj = (VALUE)resurrected; + rb_mmtk_call_finalizer_inner(objspace, obj); + } + + mmtk_free_raw_vec_of_obj_ref(resurrrected_objs); + } else { + void *resurrected; + while ((resurrected = mmtk_get_finalized_object()) != NULL) { + VALUE obj = (VALUE)resurrected; + rb_mmtk_call_finalizer_inner(objspace, obj); + } + } +} +#endif + void rb_objspace_call_finalizer(rb_objspace_t *objspace) { @@ -4797,35 +4851,7 @@ rb_objspace_call_finalizer(rb_objspace_t *objspace) #if USE_MMTK if (rb_mmtk_enabled_p()) { - void *resurrected; - while ((resurrected = mmtk_poll_finalizable(true)) != NULL) { - VALUE obj = (VALUE)resurrected; - if (USE_RUBY_DEBUG_LOG) { - RUBY_DEBUG_LOG("Resurrected for obj_free: %p: %s %s", - resurrected, - rb_type_str(RB_BUILTIN_TYPE(obj)), - CLASS_OF(obj)==0?"(null)":rb_class2name(CLASS_OF(obj)) - ); - } - if (rb_obj_is_thread(obj)) { - RUBY_DEBUG_LOG("Skipped thread: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); - continue; - } - if (rb_obj_is_mutex(obj)) { - RUBY_DEBUG_LOG("Skipped mutex: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); - continue; - } - if (rb_obj_is_fiber(obj)) { - RUBY_DEBUG_LOG("Skipped fiber: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); - continue; - } - if (rb_obj_is_main_ractor(obj)) { - RUBY_DEBUG_LOG("Skipped main ractor: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); - continue; - } - obj_free(objspace, obj); - RUBY_DEBUG_LOG("Object freed: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); - } + rb_mmtk_call_finalizer(objspace, true); } #endif @@ -12440,6 +12466,7 @@ objspace_malloc_increase_body(rb_objspace_t *objspace, void *mem, size_t new_siz if (rb_mmtk_enabled_p()) { mmtk_handle_user_collection_request((MMTk_VMMutatorThread)GET_THREAD()); gc_reset_malloc_info(objspace, true); + rb_mmtk_call_finalizer(objspace, false); } else { #endif if (ruby_thread_has_gvl_p() && is_lazy_sweeping(objspace)) { diff --git a/mmtk.h b/mmtk.h index 2a00caa183bfb3..b971294c3a6c12 100644 --- a/mmtk.h +++ b/mmtk.h @@ -24,6 +24,17 @@ typedef void* MMTk_VMMutatorThread; #define MMTK_GC_THREAD_KIND_CONTROLLER 0 #define MMTK_GC_THREAD_KIND_WORKER 1 +struct ObjectClosure { + void* (*c_function)(void* rust_closure, void* worker, void *data); + void* rust_closure; +}; + +struct RawVecOfObjRef { + void **ptr; + size_t len; + size_t capa; +}; + typedef struct { void (*init_gc_worker_thread)(MMTk_VMWorkerThread worker_tls); MMTk_VMWorkerThread (*get_gc_thread_tls)(void); @@ -111,13 +122,10 @@ extern void mmtk_add_phantom_candidate(void* ref); extern void mmtk_harness_begin(void *tls); extern void mmtk_harness_end(void *tls); -extern void mmtk_register_finalizable(void *reff); -extern void* mmtk_poll_finalizable(bool include_live); - -struct ObjectClosure { - void* (*c_function)(void* rust_closure, void* worker, void *data); - void* rust_closure; -}; +extern void mmtk_add_finalizer(void *reff); +extern void* mmtk_get_finalized_object(); +extern struct RawVecOfObjRef mmtk_get_all_finalizers(); +extern void mmtk_free_raw_vec_of_obj_ref(struct RawVecOfObjRef raw_vec); #ifdef __cplusplus } diff --git a/string.c b/string.c index 73cb9fc25e6e58..538104e67e0624 100644 --- a/string.c +++ b/string.c @@ -475,12 +475,6 @@ fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data, int exist } RBASIC(str)->flags |= RSTRING_FSTR; -#if USE_MMTK - if (rb_mmtk_enabled_p()) { - mmtk_register_finalizable((void *)str); - } -#endif - *key = *value = arg->fstr = str; return ST_CONTINUE; } From d1549d44cf9b0b491d4eee575d54460d8d97bb9f Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Mon, 15 Aug 2022 15:50:28 +0800 Subject: [PATCH 3/6] We don't need to expose obj_free to Rust. --- gc.c | 7 ------- mmtk.h | 1 - 2 files changed, 8 deletions(-) diff --git a/gc.c b/gc.c index 097655eb10cf01..8965934f5f6fbf 100644 --- a/gc.c +++ b/gc.c @@ -15319,12 +15319,6 @@ rb_mmtk_scan_object_ruby_style(void *object) gc_mark_children(objspace, obj); } -static void -rb_mmtk_obj_free(void *object) { - printf("Called back from Finalization. Freeing %p\n", object); - obj_free(&rb_objspace, (VALUE)object); -} - RubyUpcalls ruby_upcalls = { rb_mmtk_init_gc_worker_thread, rb_mmtk_get_gc_thread_tls, @@ -15338,7 +15332,6 @@ RubyUpcalls ruby_upcalls = { rb_mmtk_scan_thread_roots, rb_mmtk_scan_thread_root, rb_mmtk_scan_object_ruby_style, - rb_mmtk_obj_free, }; // Use up to 80% of memory for the heap diff --git a/mmtk.h b/mmtk.h index b971294c3a6c12..5009031d078897 100644 --- a/mmtk.h +++ b/mmtk.h @@ -48,7 +48,6 @@ typedef struct { void (*scan_thread_roots)(void); void (*scan_thread_root)(MMTk_VMMutatorThread mutator, MMTk_VMWorkerThread worker); void (*scan_object_ruby_style)(void *object); - void (*obj_free)(void *object); } RubyUpcalls; /** From a21845d259be1bdb11b0ae4a0d9a09b3753d8f2d Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 18 Aug 2022 17:59:54 +0800 Subject: [PATCH 4/6] Add more types as finalizable --- gc.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 17 deletions(-) diff --git a/gc.c b/gc.c index 8965934f5f6fbf..27cb7c71377f67 100644 --- a/gc.c +++ b/gc.c @@ -2531,6 +2531,64 @@ gc_event_hook_body(rb_execution_context_t *ec, rb_objspace_t *objspace, const rb #define gc_event_hook(objspace, event, data) gc_event_hook_prep(objspace, event, data, (void)0) +#if USE_MMTK +static inline void +maybe_register_finalizable(VALUE obj) { + // Any object that has non-trivial cleaning-up code in `obj_free` + // should be registered as "finalizable" to MMTk. + switch (RB_BUILTIN_TYPE(obj)) { + case T_OBJECT: + // FIXME: Ordinary objects can be non-embedded, too, + // but there are just too many such objects, + // and few of them have large buffers. + // Just let them leak for now. + // We'll prioritize eliminating the underlying buffer of ordinary objects. + break; + case T_MODULE: + case T_CLASS: + case T_STRING: + case T_ARRAY: + case T_HASH: + case T_REGEXP: + case T_DATA: + case T_MATCH: + case T_FILE: + case T_ICLASS: + case T_BIGNUM: + case T_STRUCT: + case T_SYMBOL: + case T_IMEMO: + mmtk_add_finalizer((void*)obj); + RUBY_DEBUG_LOG("Object registered for finalization: %p: %s %s", + (void*)obj, + rb_type_str(RB_BUILTIN_TYPE(obj)), + klass==0?"(null)":rb_class2name(klass) + ); + break; + case T_RATIONAL: + case T_COMPLEX: + case T_FLOAT: + // There are only counters increments for these types in `obj_free` + break; + case T_NIL: + case T_FIXNUM: + case T_TRUE: + case T_FALSE: + // These are non-heap value types. + case T_MOVED: + // Should not see this when object is just created. + case T_NODE: + // GC doesn't handle T_NODE. + rb_bug("maybe_register_finalizable: unexpected data type 0x%x(%p) 0x%"PRIxVALUE, + BUILTIN_TYPE(obj), (void*)obj, RBASIC(obj)->flags); + break; + default: + rb_bug("maybe_register_finalizable: unknown data type 0x%x(%p) 0x%"PRIxVALUE, + BUILTIN_TYPE(obj), (void*)obj, RBASIC(obj)->flags); + } +} +#endif + static inline VALUE newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, VALUE obj) { @@ -2546,23 +2604,7 @@ newobj_init(VALUE klass, VALUE flags, int wb_protected, rb_objspace_t *objspace, #if USE_MMTK if (rb_mmtk_enabled_p()) { - switch (RB_BUILTIN_TYPE(obj)) { - case T_STRING: - case T_ARRAY: - case T_HASH: - case T_DATA: - case T_FILE: - case T_SYMBOL: - mmtk_add_finalizer((void*)obj); - RUBY_DEBUG_LOG("Object registered for finalization: %p: %s %s", - (void*)obj, - rb_type_str(RB_BUILTIN_TYPE(obj)), - klass==0?"(null)":rb_class2name(klass) - ); - break; - default: - break; // Do nothing. - } + maybe_register_finalizable(obj); } #endif From 552a7c1824c0f148ec59aaf6933e362b3128ae0d Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Thu, 18 Aug 2022 19:09:43 +0800 Subject: [PATCH 5/6] Don't skip special objects unless before exit --- gc.c | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/gc.c b/gc.c index 27cb7c71377f67..b6faa9e5231b08 100644 --- a/gc.c +++ b/gc.c @@ -4745,7 +4745,7 @@ bool rb_obj_is_main_ractor(VALUE gv); #if USE_MMTK void -rb_mmtk_call_finalizer_inner(rb_objspace_t *objspace, VALUE obj) { +rb_mmtk_call_finalizer_inner(rb_objspace_t *objspace, VALUE obj, bool on_exit) { if (USE_RUBY_DEBUG_LOG) { RUBY_DEBUG_LOG("Resurrected for obj_free: %p: %s %s", resurrected, @@ -4753,21 +4753,23 @@ rb_mmtk_call_finalizer_inner(rb_objspace_t *objspace, VALUE obj) { CLASS_OF(obj)==0?"(null)":rb_class2name(CLASS_OF(obj)) ); } - if (rb_obj_is_thread(obj)) { - RUBY_DEBUG_LOG("Skipped thread: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); - return; - } - if (rb_obj_is_mutex(obj)) { - RUBY_DEBUG_LOG("Skipped mutex: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); - return; - } - if (rb_obj_is_fiber(obj)) { - RUBY_DEBUG_LOG("Skipped fiber: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); - return; - } - if (rb_obj_is_main_ractor(obj)) { - RUBY_DEBUG_LOG("Skipped main ractor: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); - return; + if (on_exit) { + if (rb_obj_is_thread(obj)) { + RUBY_DEBUG_LOG("Skipped thread: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); + return; + } + if (rb_obj_is_mutex(obj)) { + RUBY_DEBUG_LOG("Skipped mutex: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); + return; + } + if (rb_obj_is_fiber(obj)) { + RUBY_DEBUG_LOG("Skipped fiber: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); + return; + } + if (rb_obj_is_main_ractor(obj)) { + RUBY_DEBUG_LOG("Skipped main ractor: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); + return; + } } obj_free(objspace, obj); RUBY_DEBUG_LOG("Object freed: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); @@ -4783,7 +4785,7 @@ rb_mmtk_call_finalizer(rb_objspace_t *objspace, bool on_exit) void *resurrected = resurrrected_objs.ptr[resurrrected_objs.len - i - 1]; VALUE obj = (VALUE)resurrected; - rb_mmtk_call_finalizer_inner(objspace, obj); + rb_mmtk_call_finalizer_inner(objspace, obj, on_exit); } mmtk_free_raw_vec_of_obj_ref(resurrrected_objs); @@ -4791,7 +4793,7 @@ rb_mmtk_call_finalizer(rb_objspace_t *objspace, bool on_exit) void *resurrected; while ((resurrected = mmtk_get_finalized_object()) != NULL) { VALUE obj = (VALUE)resurrected; - rb_mmtk_call_finalizer_inner(objspace, obj); + rb_mmtk_call_finalizer_inner(objspace, obj, on_exit); } } } From c86dc56fc0f784a2e3995933f3c96e6bef63b26e Mon Sep 17 00:00:00 2001 From: Kunshan Wang Date: Wed, 24 Aug 2022 18:01:58 +0800 Subject: [PATCH 6/6] Clear object header after calling `obj_free` This allows the stack scanner to skip such invalid objects during stack scanning, and prevents scanning such invalid objects during tracing. --- gc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/gc.c b/gc.c index b6faa9e5231b08..cf9ab2bd9e6aec 100644 --- a/gc.c +++ b/gc.c @@ -4772,6 +4772,14 @@ rb_mmtk_call_finalizer_inner(rb_objspace_t *objspace, VALUE obj, bool on_exit) { } } obj_free(objspace, obj); + + // The object may contain dangling pointers after `obj_free`. + // Clear its flags field to ensure the GC does not attempt to scan it. + // TODO: We can instead clear the VO bit (a.k.a. alloc-bit) when mmtk-core supports that. + RVALUE *v = RANY(obj); + v->as.free.flags = 0; + v->as.free.next = NULL; + RUBY_DEBUG_LOG("Object freed: %p: %s", resurrected, rb_type_str(RB_BUILTIN_TYPE(obj))); } @@ -15359,6 +15367,11 @@ rb_mmtk_scan_object_ruby_style(void *object) VALUE obj = (VALUE)object; + // TODO: When mmtk-core can clear the VO bit (a.k.a. alloc-bit), we can remove this. + if (RB_BUILTIN_TYPE(obj) == T_NONE) { + return; + } + rb_objspace_t *objspace = &rb_objspace; gc_mark_children(objspace, obj); }