From e72d9110803e2c5899e91fe622b0abb62079f49d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Wed, 26 Feb 2025 10:38:22 +0100 Subject: [PATCH] rabbit_peer_discovery: Compute start time once ... and cache it. [Why] It happens at least in CI that the computed start time varies by a few seconds. I think this comes from the Erlang time offset which might be adjusted over time. This affects peer discovery's sorting of RabbitMQ nodes which uses that start time to determine the oldest node. When the start time of a node changes, it could be considered the seed node to join by some nodes but ignored by the other nodes, leading to troubles with cluster formation. --- deps/rabbit/src/rabbit_peer_discovery.erl | 30 +++++----- .../src/rabbit_boot_state.erl | 58 ++++++++++++++++++- .../src/rabbit_prelaunch.erl | 1 + 3 files changed, 75 insertions(+), 14 deletions(-) diff --git a/deps/rabbit/src/rabbit_peer_discovery.erl b/deps/rabbit/src/rabbit_peer_discovery.erl index 40a97b472d5d..9872e8d380dd 100644 --- a/deps/rabbit/src/rabbit_peer_discovery.erl +++ b/deps/rabbit/src/rabbit_peer_discovery.erl @@ -637,8 +637,7 @@ query_node_props2([{Node, Members} | Rest], NodesAndProps, FromNode) -> ["Peer discovery: temporary hidden node '~ts' " "queries properties from node '~ts'", [node(), Node]], FromNode), - StartTime = get_node_start_time( - Node, microsecond, FromNode), + StartTime = get_node_start_time(Node, FromNode), IsReady = is_node_db_ready(Node, FromNode), NodeAndProps = {Node, Members, StartTime, IsReady}, NodesAndProps1 = [NodeAndProps | NodesAndProps], @@ -666,9 +665,8 @@ query_node_props2([], NodesAndProps, _FromNode) -> ?assert(length(NodesAndProps1) =< length(nodes(hidden))), NodesAndProps1. --spec get_node_start_time(Node, Unit, FromNode) -> StartTime when +-spec get_node_start_time(Node, FromNode) -> StartTime when Node :: node(), - Unit :: erlang:time_unit(), FromNode :: node(), StartTime :: non_neg_integer(). %% @doc Returns the start time of the given `Node' in `Unit'. @@ -689,15 +687,21 @@ query_node_props2([], NodesAndProps, _FromNode) -> %% %% @private -get_node_start_time(Node, Unit, FromNode) -> - NativeStartTime = erpc_call( - Node, erlang, system_info, [start_time], FromNode), - TimeOffset = erpc_call(Node, erlang, time_offset, [], FromNode), - SystemStartTime = NativeStartTime + TimeOffset, - StartTime = erpc_call( - Node, erlang, convert_time_unit, - [SystemStartTime, native, Unit], FromNode), - StartTime. +get_node_start_time(Node, FromNode) -> + try + erpc_call(Node,rabbit_boot_state, get_start_time, [], FromNode) + catch + error:{exception, _, _} -> + NativeStartTime = erpc_call( + Node, erlang, system_info, [start_time], + FromNode), + TimeOffset = erpc_call(Node, erlang, time_offset, [], FromNode), + SystemStartTime = NativeStartTime + TimeOffset, + StartTime = erpc_call( + Node, erlang, convert_time_unit, + [SystemStartTime, native, microsecond], FromNode), + StartTime + end. -spec is_node_db_ready(Node, FromNode) -> IsReady when Node :: node(), diff --git a/deps/rabbitmq_prelaunch/src/rabbit_boot_state.erl b/deps/rabbitmq_prelaunch/src/rabbit_boot_state.erl index 8dfe8e252811..649e0403a425 100644 --- a/deps/rabbitmq_prelaunch/src/rabbit_boot_state.erl +++ b/deps/rabbitmq_prelaunch/src/rabbit_boot_state.erl @@ -17,9 +17,12 @@ set/1, wait_for/2, has_reached/1, - has_reached_and_is_active/1]). + has_reached_and_is_active/1, + get_start_time/0, + record_start_time/0]). -define(PT_KEY_BOOT_STATE, {?MODULE, boot_state}). +-define(PT_KEY_START_TIME, {?MODULE, start_time}). -type boot_state() :: stopped | booting | @@ -95,3 +98,56 @@ has_reached_and_is_active(TargetBootState) -> andalso not has_reached(CurrentBootState, stopping) end. + +-spec get_start_time() -> StartTime when + StartTime :: integer(). +%% @doc Returns the start time of the Erlang VM. +%% +%% This time was recorded by {@link record_start_time/0} as early as possible +%% and is immutable. + +get_start_time() -> + persistent_term:get(?PT_KEY_START_TIME). + +-spec record_start_time() -> ok. +%% @doc Records the start time of the Erlang VM. +%% +%% The time is expressed in microseconds since Epoch. It can be compared to +%% other non-native times. This is used by the Peer Discovery subsystem to +%% sort nodes and select a seed node if the peer discovery backend did not +%% select one. +%% +%% This time is recorded once. Calling this function multiple times won't +%% overwrite the value. + +record_start_time() -> + Key = ?PT_KEY_START_TIME, + try + %% Check if the start time was recorded. + _ = persistent_term:get(Key), + ok + catch + error:badarg -> + %% The start time was not recorded yet. Acquire a lock and check + %% again in case another process got the lock first and recorded + %% the start time. + Node = node(), + LockId = {?PT_KEY_START_TIME, self()}, + true = global:set_lock(LockId, [Node]), + try + _ = persistent_term:get(Key), + ok + catch + error:badarg -> + %% We are really the first to get the lock and we can + %% record the start time. + NativeStartTime = erlang:system_info(start_time), + TimeOffset = erlang:time_offset(), + SystemStartTime = NativeStartTime + TimeOffset, + StartTime = erlang:convert_time_unit( + SystemStartTime, native, microsecond), + persistent_term:put(Key, StartTime) + after + global:del_lock(LockId, [Node]) + end + end. diff --git a/deps/rabbitmq_prelaunch/src/rabbit_prelaunch.erl b/deps/rabbitmq_prelaunch/src/rabbit_prelaunch.erl index e9d9d66d0e91..832ecd1cc1a0 100644 --- a/deps/rabbitmq_prelaunch/src/rabbit_prelaunch.erl +++ b/deps/rabbitmq_prelaunch/src/rabbit_prelaunch.erl @@ -48,6 +48,7 @@ run_prelaunch_first_phase() -> do_run() -> %% Indicate RabbitMQ is booting. clear_stop_reason(), + rabbit_boot_state:record_start_time(), rabbit_boot_state:set(booting), %% Configure dbg if requested.