From 1f1a13521b5c26904673faac1384ad28199c2fdf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jean-S=C3=A9bastien=20P=C3=A9dron?= Date: Wed, 12 Feb 2025 17:13:24 +0100 Subject: [PATCH] Skip peer discovery clustering tests if multiple Khepri machine versions ... are being used at the same time. [Why] Depending on which node clusters with which, a node running an older version of the Khepri Ra machine may not be able to apply Ra commands and could be stuck. There is no real solution and this clearly an unsupported scenario. An old node won't always be able to join a newer cluster. [How] In the testsuites, we skip clustering tests if we detect that multiple Khepri Ra machine versions are being used. --- .../src/rabbit_ct_broker_helpers.erl | 9 +++++- .../test/system_SUITE.erl | 24 ++++++++++++++-- .../test/system_SUITE.erl | 28 ++++++++++++++++--- 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl b/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl index 00eb0262ef72..6edff885905d 100644 --- a/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl +++ b/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl @@ -173,7 +173,8 @@ user/1, configured_metadata_store/1, - await_metadata_store_consistent/2 + await_metadata_store_consistent/2, + do_nodes_run_same_ra_machine_version/2 ]). %% Internal functions exported to be used by rpc:call/4. @@ -1174,6 +1175,12 @@ ra_last_applied(ServerId) -> #{last_applied := LastApplied} = ra:key_metrics(ServerId), LastApplied. +do_nodes_run_same_ra_machine_version(Config, RaMachineMod) -> + [MacVer1 | MacVerN] = MacVers = rpc_all(Config, RaMachineMod, version, []), + ct:pal("Ra machine versions of ~s: ~0p", [RaMachineMod, MacVers]), + is_integer(MacVer1) andalso + lists:all(fun(MacVer) -> MacVer =:= MacVer1 end, MacVerN). + rewrite_node_config_file(Config, Node) -> NodeConfig = get_node_config(Config, Node), I = if diff --git a/deps/rabbitmq_peer_discovery_consul/test/system_SUITE.erl b/deps/rabbitmq_peer_discovery_consul/test/system_SUITE.erl index a39e2bc7bf9e..417e3b773d04 100644 --- a/deps/rabbitmq_peer_discovery_consul/test/system_SUITE.erl +++ b/deps/rabbitmq_peer_discovery_consul/test/system_SUITE.erl @@ -83,9 +83,27 @@ init_per_testcase(Testcase, Config) case Config3 of _ when is_list(Config3) -> try - _ = rabbit_ct_broker_helpers:rpc_all( - Config3, rabbit_peer_discovery_backend, api_version, []), - Config3 + SameMacVer = ( + rabbit_ct_broker_helpers: + do_nodes_run_same_ra_machine_version( + Config3, khepri_machine)), + case SameMacVer of + true -> + _ = rabbit_ct_broker_helpers:rpc_all( + Config3, + rabbit_peer_discovery_backend, api_version, []), + Config3; + false -> + Config5 = rabbit_ct_helpers:run_steps( + Config3, + rabbit_ct_client_helpers:teardown_steps() + ++ + rabbit_ct_broker_helpers:teardown_steps()), + rabbit_ct_helpers:testcase_finished(Config5, Testcase), + {skip, + "Nodes are using different Khepri Ra machine " + "versions; clustering will likely fail"} + end catch error:{exception, undef, [{rabbit_peer_discovery_backend, api_version, _, _} diff --git a/deps/rabbitmq_peer_discovery_etcd/test/system_SUITE.erl b/deps/rabbitmq_peer_discovery_etcd/test/system_SUITE.erl index 7531f3bd92eb..1dfef24b0d06 100644 --- a/deps/rabbitmq_peer_discovery_etcd/test/system_SUITE.erl +++ b/deps/rabbitmq_peer_discovery_etcd/test/system_SUITE.erl @@ -92,9 +92,27 @@ init_per_testcase(Testcase, Config) case Config3 of _ when is_list(Config3) -> try - _ = rabbit_ct_broker_helpers:rpc_all( - Config3, rabbit_peer_discovery_backend, api_version, []), - Config3 + SameMacVer = ( + rabbit_ct_broker_helpers: + do_nodes_run_same_ra_machine_version( + Config3, khepri_machine)), + case SameMacVer of + true -> + _ = rabbit_ct_broker_helpers:rpc_all( + Config3, + rabbit_peer_discovery_backend, api_version, []), + Config3; + false -> + Config5 = rabbit_ct_helpers:run_steps( + Config3, + rabbit_ct_client_helpers:teardown_steps() + ++ + rabbit_ct_broker_helpers:teardown_steps()), + rabbit_ct_helpers:testcase_finished(Config5, Testcase), + {skip, + "Nodes are using different Khepri Ra machine " + "versions; clustering will likely fail"} + end catch error:{exception, undef, [{rabbit_peer_discovery_backend, api_version, _, _} @@ -239,7 +257,9 @@ wait_for_etcd(EtcdEndpoints) -> Timeout = 60000, rabbit_ct_helpers:await_condition( fun() -> - case eetcd:open(test, EtcdEndpoints) of + Ret = eetcd:open(test, EtcdEndpoints), + ct:pal("Ret = ~p", [Ret]), + case Ret of {ok, _Pid} -> true; _ -> false end