Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix bad behaviour of caml_alloc_custom_mem #12318

Merged
merged 9 commits into from Nov 13, 2023
5 changes: 5 additions & 0 deletions Changes
Expand Up @@ -100,6 +100,11 @@ Working version
(Guillaume Munch-Maccagnoni, review by Anil Madhavapeddy and KC
Sivaramakrishnan)

- #12318: GC: simplify the meaning of custom_minor_max_size: blocks with
out-of-heap memory above this limit are now allocated directly in
the major heap.
(Damien Doligez, report by Stephen Dolan, review by Gabriel Scherer)

- #12408: `Domain.spawn` no longer leaks its functional argument for
the whole duration of the children domain lifetime.
(Guillaume Munch-Maccagnoni, review by Gabriel Scherer)
Expand Down
2 changes: 1 addition & 1 deletion runtime/caml/config.h
Expand Up @@ -258,7 +258,7 @@ typedef uint64_t uintnat;
/* Default setting for maximum size of custom objects counted as garbage
in the minor heap.
Documented in gc.mli */
#define Custom_minor_max_bsz_def 8192
#define Custom_minor_max_bsz_def 70000

/* Minimum amount of work to do in a major GC slice. */
#define Major_slice_work_min 512
Expand Down
9 changes: 9 additions & 0 deletions runtime/caml/custom.h
Expand Up @@ -52,18 +52,27 @@ extern "C" {
#endif


CAMLextern uintnat caml_custom_major_ratio;

CAMLextern value caml_alloc_custom(const struct custom_operations * ops,
uintnat size, /*size in bytes*/
mlsize_t mem, /*resources consumed*/
mlsize_t max /*max resources*/);

/* [caml_alloc_custom_mem] allocates a custom block with dependent memory
(memory outside the heap that will be reclaimed when the block is
finalized). If [mem] is greater than [custom_minor_max_size] (see gc.mli)
the block is allocated directly in the major heap. */
CAMLextern value caml_alloc_custom_mem(const struct custom_operations * ops,
uintnat size, /*size in bytes*/
mlsize_t mem /*memory consumed*/);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to gc.mli, mem and caml_minor_max_size are supposed to be bytes. This should be documented, for example /* memory consumed */ could be /* dependent memory in bytes */.

(The behavior of the runtime is scale-independent with respect to these variables, but we should fix a convention to avoid ending up with different libraries using different units, and so that users know how to configure the maximum.)


CAMLextern void
caml_register_custom_operations(const struct custom_operations * ops);

/* Return the current [max] factor for [caml_alloc_custom_mem] allocations. */
CAMLextern mlsize_t caml_custom_get_max_major (void);

/* Global variable moved to Caml_state in 4.10 */
#define caml_compare_unordered (Caml_state_field(compare_unordered))

Expand Down
1 change: 1 addition & 0 deletions runtime/caml/memory.h
Expand Up @@ -38,6 +38,7 @@ CAMLextern value caml_alloc_shr_noexc(mlsize_t wosize, tag_t);
CAMLextern value caml_alloc_shr_reserved (mlsize_t, tag_t, reserved_t);

CAMLextern void caml_adjust_gc_speed (mlsize_t, mlsize_t);
CAMLextern void caml_adjust_minor_gc_speed (mlsize_t, mlsize_t);
CAMLextern void caml_alloc_dependent_memory (mlsize_t bsz);
CAMLextern void caml_free_dependent_memory (mlsize_t bsz);
CAMLextern void caml_modify (volatile value *, value);
Expand Down
78 changes: 41 additions & 37 deletions runtime/custom.c
Expand Up @@ -32,42 +32,55 @@ uintnat caml_custom_major_ratio = Custom_major_ratio_def;
uintnat caml_custom_minor_ratio = Custom_minor_ratio_def;
uintnat caml_custom_minor_max_bsz = Custom_minor_max_bsz_def;

mlsize_t caml_custom_get_max_major (void)
{
/* The major ratio is a percentage relative to the major heap size.
A complete GC cycle will be done every time 2/3 of that much
memory is allocated for blocks in the major heap. Assuming
constant allocation and deallocation rates, this means there are
at most [M/100 * major-heap-size] bytes of floating garbage at
any time. The reason for a factor of 2/3 (or 1.5) is, roughly
speaking, because the major GC takes 1.5 cycles (previous cycle +
marking phase) before it starts to deallocate dead blocks
allocated during the previous cycle. [heap_size / 150] is really
[heap_size * (2/3) / 100] (but faster). */
return caml_heap_size(Caml_state->shared_heap) / 150
* caml_custom_major_ratio;
}

/* [mem] is an amount of out-of-heap resources, in the same units as
[max_major] and [max_minor]. When the cumulated amount of such
resources reaches [max_minor] (for resources held by the minor
heap) we do a minor collection; when it reaches [max_major] (for
resources held by the major heap), we guarantee that a major cycle
is done.

If [max_major] is 0, then [mem] is a number of bytes and the actual
limit is [caml_custom_get_max_major ()] computed at the
time when the custom block is promoted to the major heap.
*/
static value alloc_custom_gen (const struct custom_operations * ops,
uintnat bsz,
mlsize_t mem,
mlsize_t max_major,
mlsize_t mem_minor,
mlsize_t max_minor)
{
mlsize_t wosize;
CAMLparam0();
CAMLlocal1(result);

/* [mem] is the total amount of out-of-heap memory, [mem_minor] is how much
of it should be counted against [max_minor]. */
CAMLassert (mem_minor <= mem);

wosize = 1 + (bsz + sizeof(value) - 1) / sizeof(value);
if (wosize <= Max_young_wosize) {
if (wosize <= Max_young_wosize && mem <= caml_custom_minor_max_bsz) {
result = caml_alloc_small(wosize, Custom_tag);
Custom_ops_val(result) = ops;
if (ops->finalize != NULL || mem != 0) {
if (mem > mem_minor) {
caml_adjust_gc_speed (mem - mem_minor, max_major);
}
/* The remaining [mem_minor] will be counted if the block survives a
minor GC */
/* Record the extra resources in case the block gets promoted. */
add_to_custom_table (&Caml_state->minor_tables->custom, result,
mem, max_major);
/* Keep track of extra resources held by custom block in
minor heap. */
if (mem_minor != 0) {
if (max_minor == 0) max_minor = 1;
Caml_state->extra_heap_resources_minor +=
(double) mem_minor / (double) max_minor;
if (Caml_state->extra_heap_resources_minor > 1.0) {
caml_request_minor_gc ();
}
if (mem != 0) {
damiendoligez marked this conversation as resolved.
Show resolved Hide resolved
caml_adjust_minor_gc_speed (mem, max_minor);
}
}
} else {
Expand All @@ -79,36 +92,27 @@ static value alloc_custom_gen (const struct custom_operations * ops,
CAMLreturn(result);
}

Caml_inline mlsize_t get_max_minor (void)
{
return
Bsize_wsize (Caml_state->minor_heap_wsz) / 100 * caml_custom_minor_ratio;
}

CAMLexport value caml_alloc_custom(const struct custom_operations * ops,
uintnat bsz,
mlsize_t mem,
mlsize_t max)
{
return alloc_custom_gen (ops, bsz, mem, max, mem, max);
mlsize_t max_major = max;
mlsize_t max_minor = max == 0 ? get_max_minor() : max;
return alloc_custom_gen (ops, bsz, mem, max_major, max_minor);
}

CAMLexport value caml_alloc_custom_mem(const struct custom_operations * ops,
uintnat bsz,
mlsize_t mem)
{

mlsize_t mem_minor =
mem < caml_custom_minor_max_bsz ? mem : caml_custom_minor_max_bsz;
mlsize_t max_major =
/* The major ratio is a percentage relative to the major heap size.
A complete GC cycle will be done every time 2/3 of that much memory
is allocated for blocks in the major heap. Assuming constant
allocation and deallocation rates, this means there are at most
[M/100 * major-heap-size] bytes of floating garbage at any time.
The reason for a factor of 2/3 (or 1.5) is, roughly speaking, because
the major GC takes 1.5 cycles (previous cycle + marking phase) before
it starts to deallocate dead blocks allocated during the previous cycle.
[heap_size / 150] is really [heap_size * (2/3) / 100] (but faster). */
caml_heap_size(Caml_state->shared_heap) / 150 * caml_custom_major_ratio;
mlsize_t max_minor =
Bsize_wsize (Caml_state->minor_heap_wsz) / 100 * caml_custom_minor_ratio;
value v = alloc_custom_gen (ops, bsz, mem, max_major, mem_minor, max_minor);
return v;
return alloc_custom_gen (ops, bsz, mem, 0, get_max_minor());
}

struct custom_operations_list {
Expand Down
22 changes: 21 additions & 1 deletion runtime/memory.c
Expand Up @@ -21,6 +21,7 @@
#include <stdarg.h>
#include <stddef.h>
#include "caml/config.h"
#include "caml/custom.h"
#include "caml/misc.h"
#include "caml/fail.h"
#include "caml/memory.h"
Expand Down Expand Up @@ -254,10 +255,16 @@ CAMLexport void caml_free_dependent_memory (mlsize_t nbytes)
this time.
Note that only [res/max] is relevant. The units (and kind of
resource) can change between calls to [caml_adjust_gc_speed].

If [max] = 0, then we use a number proportional to the major heap
size and [caml_custom_major_ratio]. In this case, [mem] should
be a number of bytes and the trade-off between GC work and space
overhead is under the control of the user through
[caml_custom_major_ratio].
*/
CAMLexport void caml_adjust_gc_speed (mlsize_t res, mlsize_t max)
{
if (max == 0) max = 1;
if (max == 0) max = caml_custom_get_max_major ();
if (res > max) res = max;
Caml_state->extra_heap_resources += (double) res / (double) max;
if (Caml_state->extra_heap_resources > 0.2){
Expand All @@ -266,6 +273,19 @@ CAMLexport void caml_adjust_gc_speed (mlsize_t res, mlsize_t max)
}
}

/* This function is analogous to [caml_adjust_gc_speed]. When the
accumulated sum of [res/max] values reaches 1, a minor GC is
triggered.
*/
CAMLexport void caml_adjust_minor_gc_speed (mlsize_t res, mlsize_t max)
{
if (max == 0) max = 1;
Caml_state->extra_heap_resources_minor += (double) res / (double) max;
if (Caml_state->extra_heap_resources_minor > 1.0) {
caml_request_minor_gc ();
}
}

/* You must use [caml_intialize] to store the initial value in a field of a
block, unless you are sure the value is not a young block, in which case a
plain assignment would do.
Expand Down
8 changes: 3 additions & 5 deletions stdlib/gc.mli
Expand Up @@ -210,13 +210,11 @@ type control =

custom_minor_max_size : int;
(** Maximum amount of out-of-heap memory for each custom value
allocated in the minor heap. When a custom value is allocated
on the minor heap and holds more than this many bytes, only
this value is counted against [custom_minor_ratio] and the
rest is directly counted against [custom_major_ratio].
allocated in the minor heap. Custom values that hold more
than this many bytes are allocated on the major heap.
Note: this only applies to values allocated with
[caml_alloc_custom_mem] (e.g. bigarrays).
Default: 8192 bytes.
Default: 70000 bytes.
@since 4.08 *)
}
(** The GC parameters are given as a [control] record. Note that
Expand Down
2 changes: 2 additions & 0 deletions testsuite/tests/lib-runtime-events/test.ml
Expand Up @@ -7,6 +7,7 @@ external start_runtime_events : unit -> unit = "start_runtime_events"
external get_event_counts : unit -> (int * int) = "get_event_counts"

let () =
Gc.full_major ();
start_runtime_events ();
for a = 0 to 2 do
ignore(Sys.opaque_identity(ref 42));
Expand All @@ -21,4 +22,5 @@ let () =
Gc.compact ();
Runtime_events.pause ()
done;
let (minors, majors) = get_event_counts () in
Printf.printf "minors: %d, majors: %d\n" minors majors
2 changes: 1 addition & 1 deletion testsuite/tests/lib-runtime-events/test.reference
@@ -1,2 +1,2 @@
minors: 9, majors: 0
minors: 9, majors: 0
minors: 18, majors: 0
1 change: 1 addition & 0 deletions testsuite/tests/lib-runtime-events/test_caml.ml
Expand Up @@ -78,6 +78,7 @@ let majors_per_epoch = 50
let conses_per_major = 10

let () =
Gc.full_major ();
let list_ref = ref [] in
start ();
let cursor = create_cursor None in
Expand Down
48 changes: 33 additions & 15 deletions testsuite/tests/lib-runtime-events/test_instrumented.ml
Expand Up @@ -8,13 +8,12 @@
open Runtime_events

let list_ref = ref []
let total_sizes = ref 0
let total_blocks = ref 0
let total_minors = ref 0
let lost_event_words = ref 0

let alloc domain_id ts sizes =
let size_accum = Array.fold_left (fun x y -> x + y) 0 sizes in
total_sizes := !total_sizes + size_accum
let alloc domain_id ts counts =
total_blocks := Array.fold_left ( + ) !total_blocks counts

let runtime_end domain_id ts phase =
match phase with
Expand All @@ -26,15 +25,34 @@ let runtime_end domain_id ts phase =
let lost_events domain_id words =
lost_event_words := !lost_event_words + words

let callbacks = Callbacks.create ~runtime_end ~alloc ~lost_events ()

let reset cursor =
ignore (read_poll cursor callbacks None);
total_blocks := 0;
total_minors := 0

let loop n cursor =
Gc.full_major ();
reset cursor;
let minors_before = Gc.((quick_stat ()).minor_collections) in
for a = 1 to n do
list_ref := (Sys.opaque_identity(ref 42)) :: !list_ref
done;
Gc.full_major ();
ignore(read_poll cursor callbacks None);
let minors_after = Gc.((quick_stat ()).minor_collections) in
minors_after - minors_before

let () =
Gc.full_major ();
start ();
let cursor = create_cursor None in
for a = 0 to 1_000_000 do
list_ref := (Sys.opaque_identity(ref 42)) :: !list_ref
done;
Gc.full_major ();
let callbacks = Callbacks.create ~runtime_end ~alloc ~lost_events () in
ignore(read_poll cursor callbacks None);
Printf.printf "lost_event_words: %d, total_sizes: %d, total_minors: %d\n"
!lost_event_words !total_sizes !total_minors
start ();
let cursor = create_cursor None in
let self_minors_base = loop 0 cursor in
let blocks_base = !total_blocks in
let minors_base = !total_minors in
let self_minors = loop 1_000_000 cursor - self_minors_base in
let blocks = !total_blocks in
let minors = !total_minors in
Printf.printf "lost_event_words: %d, total_blocks: %d, diff_minors: %d\n"
!lost_event_words (blocks - blocks_base)
(minors - minors_base - self_minors)
@@ -1 +1 @@
lost_event_words: 0, total_sizes: 2000004, total_minors: 31
lost_event_words: 0, total_blocks: 2000000, diff_minors: 0