Skip to content

Commit f462038

Browse files
stall detection demo: fix bugs, simplify things
1 parent 300bfe0 commit f462038

File tree

1 file changed

+31
-52
lines changed

1 file changed

+31
-52
lines changed

test/stall_detection/selfdetector.ml

Lines changed: 31 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,61 +2,40 @@ let () = Runtime_events.start ()
22

33
let rec stall d =
44
let open Lwt.Syntax in
5+
Printf.printf "stalling for %dns\n" (int_of_float (d *. 1_000_000_000.));
6+
flush stdout;
57
Unix.sleepf d;
68
let* () = Lwt.pause () in
7-
stall (d+.0.01)
9+
stall (1.5*.(d+.0.01))
810

9-
let last_lap = ref 0L
10-
let started = ref false
11+
(* set to maxint to avoid the first hit being a false positive *)
12+
let last_lap = ref Int64.max_int
1113

14+
let cursor = Runtime_events.create_cursor None
15+
let cb =
16+
Runtime_events.Callbacks.create ()
17+
|> Runtime_events.Callbacks.add_user_event
18+
Runtime_events.Type.unit
19+
(fun _ t e () ->
20+
match Runtime_events.User.tag e with
21+
| Lwt_main.Scheduler_lap ->
22+
let delta = Int64.sub (Runtime_events.Timestamp.to_int64 t) !last_lap in
23+
last_lap := Runtime_events.Timestamp.to_int64 t;
24+
if delta > 1_000_000_000L (* 1 second *) then begin
25+
Printf.printf "ALARM: stall 1s+ CRASHING\n"; flush stdout;
26+
exit 0
27+
end;
28+
if delta > 500_000_000L (* 0.5 second *) then begin
29+
Printf.printf "ALARM: stall .5s+\n"; flush stdout
30+
end
31+
| _ -> ())
32+
let rec detect () =
33+
Unix.sleepf 0.01;
34+
let _ : int = Runtime_events.read_poll cursor cb None in
35+
detect ()
1236

13-
let detect () =
14-
let cursor = Runtime_events.create_cursor None in
15-
let cb_stall =
16-
Runtime_events.Callbacks.create ()
17-
|> Runtime_events.Callbacks.add_user_event
18-
Runtime_events.Type.unit
19-
(fun _ t e () ->
20-
match Runtime_events.User.tag e with
21-
| Lwt_main.Scheduler_lap ->
22-
let delta = Int64.sub (Runtime_events.Timestamp.to_int64 t) !last_lap in
23-
if delta > 1_000_000_000L (* 1 second *) then begin
24-
Printf.printf "ALARM: stall 1s+ CRASHING\n"; flush stdout;
25-
exit 0
26-
end;
27-
if delta > 500_000_000L (* 0.5 second *) then begin
28-
Printf.printf "ALARM: stall .5s+\n"; flush stdout
29-
end
30-
| _ -> ())
31-
in
32-
let rec detect_stall () =
33-
Unix.sleepf 0.01;
34-
let _ : int = Runtime_events.read_poll cursor cb_stall None in
35-
detect_stall ()
36-
in
37-
let cb_pre_start =
38-
Runtime_events.Callbacks.create ()
39-
|> Runtime_events.Callbacks.add_user_event
40-
Runtime_events.Type.span
41-
(fun _ t e ev ->
42-
match Runtime_events.User.tag e, ev with
43-
| Lwt_main.Scheduler_call, Runtime_events.Type.Begin ->
44-
last_lap := Runtime_events.Timestamp.to_int64 t;
45-
started := true
46-
| Lwt_main.Scheduler_call, Runtime_events.Type.End ->
47-
failwith "NO"
48-
| _ -> failwith "NOPE")
49-
in
50-
let rec detect_start () =
51-
if not !started then begin
52-
Unix.sleepf 0.001;
53-
let _ : int = Runtime_events.read_poll cursor cb_pre_start None in
54-
detect_start ()
55-
end else
56-
detect_stall ()
57-
in
58-
detect_start ()
37+
let _ : _ Domain.t = Domain.spawn detect
5938

60-
let _ = Domain.spawn (fun () -> detect ())
61-
62-
let () = Lwt_main.run (stall 0.)
39+
let () =
40+
Printf.printf "start\n"; flush stdout;
41+
Lwt_main.run (stall 0.)

0 commit comments

Comments
 (0)