Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: show queue lengths for resource pools in web UI #500

Merged
merged 1 commit into from
Mar 14, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 28 additions & 17 deletions ci/src/cI_monitored_pool.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ end
type t = {
label: string;
capacity: int;
mutable qlen : int;
mutable active: int;
pool: unit Lwt_pool.t;
mutable users : ((CI_s.job_id * string option) * CI_live_log.t option) list;
Expand All @@ -39,7 +40,7 @@ let registered_pools = ref String.Map.empty

let create label capacity =
let pool = Lwt_pool.create capacity Lwt.return in
let t = { label; capacity; active = 0; pool; users = [] } in
let t = { label; capacity; qlen = 0; active = 0; pool; users = [] } in
assert (not (String.Map.mem label !registered_pools));
registered_pools := String.Map.add label t !registered_pools;
Prometheus.Gauge.set (Metrics.capacity label) (float_of_int capacity);
Expand All @@ -53,24 +54,33 @@ let rec remove_first msg = function
let use ?log t ~reason fn =
let qlen = Metrics.qlen t.label in
Prometheus.Gauge.inc_one qlen;
t.qlen <- t.qlen + 1;
let dec = lazy (
Prometheus.Gauge.dec_one qlen;
t.qlen <- t.qlen - 1;
) in
let start_wait = Unix.gettimeofday () in
Lwt_pool.use t.pool
(fun v ->
Prometheus.Gauge.dec_one qlen;
let stop_wait = Unix.gettimeofday () in
Prometheus.Summary.observe (Metrics.wait_time t.label) (stop_wait -. start_wait);
t.active <- t.active + 1;
t.users <- (reason, log) :: t.users;
Prometheus.Gauge.track_inprogress (Metrics.resources_in_use t.label) @@ fun () ->
Lwt.finalize
(fun () -> fn v)
(fun () ->
let stop_use = Unix.gettimeofday () in
Prometheus.Summary.observe (Metrics.use_time t.label) (stop_use -. stop_wait);
t.active <- t.active - 1;
t.users <- remove_first reason t.users;
Lwt.return_unit)
Lwt.finalize
(fun () ->
Lwt_pool.use t.pool
(fun v ->
Lazy.force dec;
let stop_wait = Unix.gettimeofday () in
Prometheus.Summary.observe (Metrics.wait_time t.label) (stop_wait -. start_wait);
t.active <- t.active + 1;
t.users <- (reason, log) :: t.users;
Prometheus.Gauge.track_inprogress (Metrics.resources_in_use t.label) @@ fun () ->
Lwt.finalize
(fun () -> fn v)
(fun () ->
let stop_use = Unix.gettimeofday () in
Prometheus.Summary.observe (Metrics.use_time t.label) (stop_use -. stop_wait);
t.active <- t.active - 1;
t.users <- remove_first reason t.users;
Lwt.return_unit)
)
)
(fun () -> Lazy.force dec; Lwt.return ())

let use t ?log ?label job_id fn =
let reason = (job_id, label) in
Expand All @@ -82,5 +92,6 @@ let use t ?log ?label job_id fn =

let active t = t.active
let capacity t = t.capacity
let qlen t = t.qlen
let pools () = !registered_pools
let users t = t.users
2 changes: 2 additions & 0 deletions ci/src/cI_monitored_pool.mli
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ val active : t -> int

val capacity : t -> int

val qlen : t -> int

val users : t -> ((CI_s.job_id * string option) * CI_live_log.t option) list
(** [users t] is the list of reasons why resources are being used, one per resource, and (optionally) its
log (through which it may be possible to cancel the job). *)
13 changes: 9 additions & 4 deletions ci/src/cI_web_templates.ml
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,15 @@ let html_of_user ~csrf_token ((job, label), log) =

let resource_pools ~csrf_token =
let items =
let open CI_monitored_pool in
String.Map.bindings (pools ()) |> List.map (fun (name, pool) ->
let used = Fmt.strf "%d / %d" (active pool) (capacity pool) in
let uses = users pool |> List.map (html_of_user ~csrf_token) |> List.concat in
String.Map.bindings (CI_monitored_pool.pools ()) |> List.map (fun (name, pool) ->
let active = CI_monitored_pool.active pool in
let capacity = CI_monitored_pool.capacity pool in
let qlen = CI_monitored_pool.qlen pool in
let used =
if active < capacity then Fmt.strf "%d / %d" active capacity
else Fmt.strf "%d / %d [%d queued]" active capacity qlen
in
let uses = CI_monitored_pool.users pool |> List.map (html_of_user ~csrf_token) |> List.concat in
tr [th [pcdata name]; td (pcdata used :: uses)];
)
in
Expand Down