Skip to content

Commit

Permalink
Merge pull request #500 from talex5/qlen
Browse files Browse the repository at this point in the history
ci: show queue lengths for resource pools in web UI
  • Loading branch information
talex5 committed Mar 14, 2017
2 parents fd382cc + 73aae4e commit 26ca6bc
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 21 deletions.
45 changes: 28 additions & 17 deletions ci/src/cI_monitored_pool.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ end
type t = {
label: string;
capacity: int;
mutable qlen : int;
mutable active: int;
pool: unit Lwt_pool.t;
mutable users : ((CI_s.job_id * string option) * CI_live_log.t option) list;
Expand All @@ -39,7 +40,7 @@ let registered_pools = ref String.Map.empty

let create label capacity =
let pool = Lwt_pool.create capacity Lwt.return in
let t = { label; capacity; active = 0; pool; users = [] } in
let t = { label; capacity; qlen = 0; active = 0; pool; users = [] } in
assert (not (String.Map.mem label !registered_pools));
registered_pools := String.Map.add label t !registered_pools;
Prometheus.Gauge.set (Metrics.capacity label) (float_of_int capacity);
Expand All @@ -53,24 +54,33 @@ let rec remove_first msg = function
let use ?log t ~reason fn =
let qlen = Metrics.qlen t.label in
Prometheus.Gauge.inc_one qlen;
t.qlen <- t.qlen + 1;
let dec = lazy (
Prometheus.Gauge.dec_one qlen;
t.qlen <- t.qlen - 1;
) in
let start_wait = Unix.gettimeofday () in
Lwt_pool.use t.pool
(fun v ->
Prometheus.Gauge.dec_one qlen;
let stop_wait = Unix.gettimeofday () in
Prometheus.Summary.observe (Metrics.wait_time t.label) (stop_wait -. start_wait);
t.active <- t.active + 1;
t.users <- (reason, log) :: t.users;
Prometheus.Gauge.track_inprogress (Metrics.resources_in_use t.label) @@ fun () ->
Lwt.finalize
(fun () -> fn v)
(fun () ->
let stop_use = Unix.gettimeofday () in
Prometheus.Summary.observe (Metrics.use_time t.label) (stop_use -. stop_wait);
t.active <- t.active - 1;
t.users <- remove_first reason t.users;
Lwt.return_unit)
Lwt.finalize
(fun () ->
Lwt_pool.use t.pool
(fun v ->
Lazy.force dec;
let stop_wait = Unix.gettimeofday () in
Prometheus.Summary.observe (Metrics.wait_time t.label) (stop_wait -. start_wait);
t.active <- t.active + 1;
t.users <- (reason, log) :: t.users;
Prometheus.Gauge.track_inprogress (Metrics.resources_in_use t.label) @@ fun () ->
Lwt.finalize
(fun () -> fn v)
(fun () ->
let stop_use = Unix.gettimeofday () in
Prometheus.Summary.observe (Metrics.use_time t.label) (stop_use -. stop_wait);
t.active <- t.active - 1;
t.users <- remove_first reason t.users;
Lwt.return_unit)
)
)
(fun () -> Lazy.force dec; Lwt.return ())

let use t ?log ?label job_id fn =
let reason = (job_id, label) in
Expand All @@ -82,5 +92,6 @@ let use t ?log ?label job_id fn =

let active t = t.active
let capacity t = t.capacity
let qlen t = t.qlen
let pools () = !registered_pools
let users t = t.users
2 changes: 2 additions & 0 deletions ci/src/cI_monitored_pool.mli
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ val active : t -> int

val capacity : t -> int

val qlen : t -> int

val users : t -> ((CI_s.job_id * string option) * CI_live_log.t option) list
(** [users t] is the list of reasons why resources are being used, one per resource, and (optionally) its
log (through which it may be possible to cancel the job). *)
13 changes: 9 additions & 4 deletions ci/src/cI_web_templates.ml
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,15 @@ let html_of_user ~csrf_token ((job, label), log) =

let resource_pools ~csrf_token =
let items =
let open CI_monitored_pool in
String.Map.bindings (pools ()) |> List.map (fun (name, pool) ->
let used = Fmt.strf "%d / %d" (active pool) (capacity pool) in
let uses = users pool |> List.map (html_of_user ~csrf_token) |> List.concat in
String.Map.bindings (CI_monitored_pool.pools ()) |> List.map (fun (name, pool) ->
let active = CI_monitored_pool.active pool in
let capacity = CI_monitored_pool.capacity pool in
let qlen = CI_monitored_pool.qlen pool in
let used =
if active < capacity then Fmt.strf "%d / %d" active capacity
else Fmt.strf "%d / %d [%d queued]" active capacity qlen
in
let uses = CI_monitored_pool.users pool |> List.map (html_of_user ~csrf_token) |> List.concat in
tr [th [pcdata name]; td (pcdata used :: uses)];
)
in
Expand Down

0 comments on commit 26ca6bc

Please sign in to comment.