Skip to content

Commit aeffdbe

Browse files
committed
Auto resetting single word barrier
1 parent 4fdeee6 commit aeffdbe

File tree

2 files changed

+31
-21
lines changed

2 files changed

+31
-21
lines changed

lib/barrier.ml

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,26 @@
1-
type t = { counter : int Atomic.t; total : int }
1+
(** This barrier is designed to take a single cache line (or word) and to return
2+
with the participating domains synchronized as precisely as possible. *)
3+
4+
type t = int Atomic.t
5+
6+
let bits = (Sys.int_size - 1) / 2
7+
let mask = (1 lsl bits) - 1
8+
let one = 1 lsl bits
29

310
let make total =
4-
{ counter = Atomic.make 0 |> Multicore_magic.copy_as_padded; total }
5-
|> Multicore_magic.copy_as_padded
6-
7-
let await { counter; total } =
8-
if Atomic.get counter = total then
9-
Atomic.compare_and_set counter total 0 |> ignore;
10-
Atomic.incr counter;
11-
while Atomic.get counter < total do
11+
if total <= 0 || mask < total then invalid_arg "Barrier: out of bounds";
12+
Atomic.make total |> Multicore_magic.copy_as_padded
13+
14+
let await t =
15+
let state = Atomic.fetch_and_add t one in
16+
let total = state land mask in
17+
if state lsr bits = total - 1 then Atomic.set t (total - (total lsl bits));
18+
19+
while 0 < Atomic.get t do
20+
Domain.cpu_relax ()
21+
done;
22+
23+
Atomic.fetch_and_add t one |> ignore;
24+
while Atomic.get t < 0 do
1225
Domain.cpu_relax ()
1326
done

lib/times.ml

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,7 @@ type t = { inverted : bool; times_per_domain : float array array; runs : int }
33
let record ~budgetf ~n_domains ?(ensure_multi_domain = true)
44
?(domain_local_await = `Busy_wait) ?(n_warmups = 3) ?(n_runs_min = 7)
55
?(before = Fun.id) ~init ~work ?(after = Fun.id) () =
6-
let barrier_before = Barrier.make n_domains in
7-
let barrier_init = Barrier.make n_domains in
8-
let barrier_work = Barrier.make n_domains in
9-
let barrier_after = Barrier.make n_domains in
6+
let barrier = Barrier.make n_domains in
107
let results =
118
Array.init n_domains @@ fun _ ->
129
Stack.create () |> Multicore_magic.copy_as_padded
@@ -54,20 +51,20 @@ let record ~budgetf ~n_domains ?(ensure_multi_domain = true)
5451
let main domain_i =
5552
let benchmark () =
5653
for _ = 1 to n_warmups do
57-
Barrier.await barrier_before;
54+
Barrier.await barrier;
5855
if domain_i = 0 then begin
5956
before ();
6057
Gc.major ()
6158
end;
62-
Barrier.await barrier_init;
59+
Barrier.await barrier;
6360
let state = init domain_i in
64-
Barrier.await barrier_work;
61+
Barrier.await barrier;
6562
work domain_i state;
66-
Barrier.await barrier_after;
63+
Barrier.await barrier;
6764
if domain_i = 0 then after ()
6865
done;
6966
while !runs < n_runs_min || not !budget_used do
70-
Barrier.await barrier_before;
67+
Barrier.await barrier;
7168
if domain_i = 0 then begin
7269
Multicore_magic.fenceless_set start_earliest Mtime.Span.zero;
7370
before ();
@@ -83,9 +80,9 @@ let record ~budgetf ~n_domains ?(ensure_multi_domain = true)
8380
incr runs;
8481
Gc.major ()
8582
end;
86-
Barrier.await barrier_init;
83+
Barrier.await barrier;
8784
let state = init domain_i in
88-
Barrier.await barrier_work;
85+
Barrier.await barrier;
8986
if Multicore_magic.fenceless_get start_earliest == Mtime.Span.zero then begin
9087
let start_current = Mtime_clock.elapsed () in
9188
if Multicore_magic.fenceless_get start_earliest == Mtime.Span.zero
@@ -95,7 +92,7 @@ let record ~budgetf ~n_domains ?(ensure_multi_domain = true)
9592
end;
9693
work domain_i state;
9794
let stop_current = Mtime_clock.elapsed () in
98-
Barrier.await barrier_after;
95+
Barrier.await barrier;
9996
if domain_i = 0 then after ();
10097
Stack.push
10198
(Mtime.Span.to_float_ns

0 commit comments

Comments
 (0)