From 56cf8a094fa6fd8966b8041de75f339057a5d4a3 Mon Sep 17 00:00:00 2001 From: Michael Davis Date: Thu, 8 May 2025 10:02:07 -0400 Subject: [PATCH] Add an 'is-in-service' health check wrapping `rabbit:is_serving/0` This is useful for a load balancer, for example, to be able to avoid sending new connections to a node which is running and has listeners bound to TCP ports but is being drained for maintenance. (cherry picked from commit 07fe6307c676be861c198a15883c8cca2c3bd8dd) --- .../priv/www/api/index.html | 13 ++++++ .../src/rabbit_mgmt_dispatcher.erl | 1 + ...bit_mgmt_wm_health_check_is_in_service.erl | 44 +++++++++++++++++++ .../rabbit_mgmt_http_health_checks_SUITE.erl | 15 ++++++- 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_is_in_service.erl diff --git a/deps/rabbitmq_management/priv/www/api/index.html b/deps/rabbitmq_management/priv/www/api/index.html index b319d4236e35..54015e0fc91e 100644 --- a/deps/rabbitmq_management/priv/www/api/index.html +++ b/deps/rabbitmq_management/priv/www/api/index.html @@ -1239,6 +1239,19 @@

Reference

+ + X + + + + /api/health/checks/is-in-service + + Responds a 200 OK if the target node is booted, running, and ready to + serve clients, otherwise responds with a 503 Service Unavailable. If the + target node is being drained for maintenance then this check returns 503 + Service Unavailable. + + X diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl b/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl index 41ce78677ecb..ece7c1372666 100644 --- a/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl +++ b/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl @@ -207,6 +207,7 @@ dispatcher() -> {"/health/checks/quorum-queues-without-elected-leaders/all-vhosts/pattern/:pattern", rabbit_mgmt_wm_health_check_quorum_queues_without_elected_leaders_across_all_vhosts, []}, {"/health/checks/quorum-queues-without-elected-leaders/vhost/:vhost/pattern/:pattern", rabbit_mgmt_wm_health_check_quorum_queues_without_elected_leaders, []}, {"/health/checks/node-is-quorum-critical", rabbit_mgmt_wm_health_check_node_is_quorum_critical, []}, + {"/health/checks/is-in-service", rabbit_mgmt_wm_health_check_is_in_service, []}, {"/reset", rabbit_mgmt_wm_reset, []}, {"/reset/:node", rabbit_mgmt_wm_reset, []}, {"/rebalance/queues", rabbit_mgmt_wm_rebalance_queues, [{queues, all}]}, diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_is_in_service.erl b/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_is_in_service.erl new file mode 100644 index 000000000000..205a304a016a --- /dev/null +++ b/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_is_in_service.erl @@ -0,0 +1,44 @@ +%% This Source Code Form is subject to the terms of the Mozilla Public +%% License, v. 2.0. If a copy of the MPL was not distributed with this +%% file, You can obtain one at https://mozilla.org/MPL/2.0/. +%% +%% Copyright (c) 2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved. +%% + +-module(rabbit_mgmt_wm_health_check_is_in_service). + +-export([init/2]). +-export([to_json/2, content_types_provided/2]). +-export([variances/2]). + +-include("rabbit_mgmt.hrl"). +-include_lib("rabbitmq_management_agent/include/rabbit_mgmt_records.hrl"). + +init(Req, _State) -> + Req1 = rabbit_mgmt_headers:set_no_cache_headers( + rabbit_mgmt_headers:set_common_permission_headers( + Req, ?MODULE), ?MODULE), + {cowboy_rest, Req1, #context{}}. + +variances(Req, Context) -> + {[<<"accept-encoding">>, <<"origin">>], Req, Context}. + +content_types_provided(ReqData, Context) -> + {rabbit_mgmt_util:responder_map(to_json), ReqData, Context}. + +to_json(ReqData, Context) -> + case rabbit:is_serving() of + true -> + rabbit_mgmt_util:reply(#{status => ok}, ReqData, Context); + false -> + Msg = "this rabbit node is not currently available to serve", + failure(Msg, ReqData, Context) + end. + +failure(Message, ReqData, Context) -> + Body = #{ + status => failed, + reason => rabbit_data_coercion:to_binary(Message) + }, + {Response, ReqData1, Context1} = rabbit_mgmt_util:reply(Body, ReqData, Context), + {stop, cowboy_req:reply(?HEALTH_CHECK_FAILURE_STATUS, #{}, Response, ReqData1), Context1}. diff --git a/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl b/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl index 7b755b862fad..975e6f6ee409 100644 --- a/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl +++ b/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl @@ -50,7 +50,8 @@ all_tests() -> [ metadata_store_initialized_with_data_test, protocol_listener_test, port_listener_test, - certificate_expiration_test + certificate_expiration_test, + is_in_service_test ]. %% ------------------------------------------------------------------- @@ -457,6 +458,18 @@ certificate_expiration_test(Config) -> passed. +is_in_service_test(Config) -> + Path = "/health/checks/is-in-service", + Check0 = http_get(Config, Path, ?OK), + ?assertEqual(<<"ok">>, maps:get(status, Check0)), + + true = rabbit_ct_broker_helpers:mark_as_being_drained(Config, 0), + Body0 = http_get_failed(Config, Path), + ?assertEqual(<<"failed">>, maps:get(<<"status">>, Body0)), + true = rabbit_ct_broker_helpers:unmark_as_being_drained(Config, 0), + + passed. + http_get_failed(Config, Path) -> {ok, {{_, Code, _}, _, ResBody}} = req(Config, get, Path, [auth_header("guest", "guest")]), ?assertEqual(Code, ?HEALTH_CHECK_FAILURE_STATUS),