diff --git a/.github/workflows/test-make-target.yaml b/.github/workflows/test-make-target.yaml index bce876e24b44..ebf88b3ae108 100644 --- a/.github/workflows/test-make-target.yaml +++ b/.github/workflows/test-make-target.yaml @@ -82,6 +82,7 @@ jobs: - name: RUN TESTS if: inputs.plugin != 'rabbitmq_cli' run: | + sudo netstat -ntp make -C deps/${{ inputs.plugin }} ${{ inputs.make_target }} RABBITMQ_METADATA_STORE=${{ inputs.metadata_store }} # rabbitmq_cli needs a correct broker version for two of its tests. diff --git a/deps/rabbit/Makefile b/deps/rabbit/Makefile index c8da33210061..5d6ac4f6183a 100644 --- a/deps/rabbit/Makefile +++ b/deps/rabbit/Makefile @@ -241,10 +241,10 @@ define ct_master.erl peer:call(Pid2, net_kernel, set_net_ticktime, [5]), peer:call(Pid3, net_kernel, set_net_ticktime, [5]), peer:call(Pid4, net_kernel, set_net_ticktime, [5]), - peer:call(Pid1, persistent_term, put, [rabbit_ct_tcp_port_base, 23000]), - peer:call(Pid2, persistent_term, put, [rabbit_ct_tcp_port_base, 25000]), - peer:call(Pid3, persistent_term, put, [rabbit_ct_tcp_port_base, 27000]), - peer:call(Pid4, persistent_term, put, [rabbit_ct_tcp_port_base, 29000]), + peer:call(Pid1, persistent_term, put, [rabbit_ct_tcp_port_base, 16000]), + peer:call(Pid2, persistent_term, put, [rabbit_ct_tcp_port_base, 20000]), + peer:call(Pid3, persistent_term, put, [rabbit_ct_tcp_port_base, 24000]), + peer:call(Pid4, persistent_term, put, [rabbit_ct_tcp_port_base, 28000]), [{[_], {ok, Results}}] = ct_master_fork:run("$1"), peer:stop(Pid4), peer:stop(Pid3), diff --git a/deps/rabbit/docs/rabbitmqctl.8 b/deps/rabbit/docs/rabbitmqctl.8 index fd7b5f31ef60..a61fc9348999 100644 --- a/deps/rabbit/docs/rabbitmqctl.8 +++ b/deps/rabbit/docs/rabbitmqctl.8 @@ -346,7 +346,7 @@ next time it is started: .sp .Dl rabbitmqctl force_boot .\" ------------------------------------------------------------------ -.It Cm force_reset +.It Cm force_reset Em (deprecated) .Pp Forcefully returns a RabbitMQ node to its virgin state. .Pp @@ -359,6 +359,13 @@ management database state and cluster configuration. It should only be used as a last resort if the database or cluster configuration has been corrupted. .Pp +The +.Cm force_reset +command is +.Sy deprecated . +It remains available when the Mnesia metadata store is used. +It is unsupported with the Khepri metadata store. +.Pp For .Cm reset and diff --git a/deps/rabbit/src/rabbit_db.erl b/deps/rabbit/src/rabbit_db.erl index a506c91259a2..2bf52b3a01c8 100644 --- a/deps/rabbit/src/rabbit_db.erl +++ b/deps/rabbit/src/rabbit_db.erl @@ -163,11 +163,13 @@ force_reset_using_mnesia() -> #{domain => ?RMQLOG_DOMAIN_DB}), rabbit_mnesia:force_reset(). +-spec force_reset_using_khepri() -> no_return(). + force_reset_using_khepri() -> - ?LOG_DEBUG( - "DB: resetting node forcefully (using Khepri)", + ?LOG_ERROR( + "DB: resetting node forcefully is unsupported with Khepri", #{domain => ?RMQLOG_DOMAIN_DB}), - rabbit_khepri:force_reset(). + throw({error, "Forced reset is unsupported with Khepri"}). -spec force_load_on_next_boot() -> Ret when Ret :: ok. diff --git a/deps/rabbit/src/rabbit_khepri.erl b/deps/rabbit/src/rabbit_khepri.erl index a370914a3a40..5ad603665adb 100644 --- a/deps/rabbit/src/rabbit_khepri.erl +++ b/deps/rabbit/src/rabbit_khepri.erl @@ -168,8 +168,7 @@ -export([check_cluster_consistency/0, check_cluster_consistency/2, node_info/0]). --export([reset/0, - force_reset/0]). +-export([reset/0]). -export([cluster_status_from_khepri/0, cli_cluster_status/0]). @@ -601,23 +600,6 @@ reset() -> %% @private -force_reset() -> - case rabbit:is_running() of - false -> - ok = khepri:stop(?RA_CLUSTER_NAME), - DataDir = maps:get(data_dir, ra_system:fetch(?RA_SYSTEM)), - ok = rabbit_ra_systems:ensure_ra_system_stopped(?RA_SYSTEM), - ok = rabbit_file:recursive_delete( - filelib:wildcard(DataDir ++ "/*")), - - _ = file:delete(rabbit_guid:filename()), - ok; - true -> - throw({error, rabbitmq_unexpectedly_running}) - end. - -%% @private - force_shrink_member_to_current_member() -> ok = ra_server_proc:force_shrink_members_to_current_member( {?RA_CLUSTER_NAME, node()}). diff --git a/deps/rabbit/test/amqp_auth_SUITE.erl b/deps/rabbit/test/amqp_auth_SUITE.erl index f3cbdbf7d996..f272f6ce22b7 100644 --- a/deps/rabbit/test/amqp_auth_SUITE.erl +++ b/deps/rabbit/test/amqp_auth_SUITE.erl @@ -112,12 +112,17 @@ init_per_group(Group, Config0) -> Config1, rabbit_ct_broker_helpers:setup_steps() ++ rabbit_ct_client_helpers:setup_steps()), - Vhost = <<"test vhost">>, - User = <<"test user">>, - ok = rabbit_ct_broker_helpers:add_vhost(Config, Vhost), - ok = rabbit_ct_broker_helpers:add_user(Config, User), - [{test_vhost, Vhost}, - {test_user, User}] ++ Config. + case Config of + _ when is_list(Config) -> + Vhost = <<"test vhost">>, + User = <<"test user">>, + ok = rabbit_ct_broker_helpers:add_vhost(Config, Vhost), + ok = rabbit_ct_broker_helpers:add_user(Config, User), + [{test_vhost, Vhost}, + {test_user, User}] ++ Config; + {skip, _} = Skip -> + Skip + end. end_per_group(_Group, Config) -> ok = rabbit_ct_broker_helpers:delete_user(Config, ?config(test_user, Config)), diff --git a/deps/rabbit/test/clustering_management_SUITE.erl b/deps/rabbit/test/clustering_management_SUITE.erl index b3ebd74eb080..2fc9be09fe54 100644 --- a/deps/rabbit/test/clustering_management_SUITE.erl +++ b/deps/rabbit/test/clustering_management_SUITE.erl @@ -76,7 +76,6 @@ groups() -> status_with_alarm, pid_file_and_await_node_startup_in_khepri, await_running_count_in_khepri, - start_with_invalid_schema_in_path, persistent_cluster_id, stop_start_cluster_node, restart_cluster_node, @@ -331,7 +330,7 @@ restart_cluster_node(Config) -> assert_clustered([Rabbit, Hare]). join_and_part_cluster_in_khepri(Config) -> - [Rabbit, Hare, Bunny] = cluster_members(Config), + [Rabbit, Bunny, Hare] = cluster_members(Config), assert_not_clustered(Rabbit), assert_not_clustered(Hare), assert_not_clustered(Bunny), @@ -441,38 +440,38 @@ join_to_start_interval(Config) -> assert_clustered([Rabbit, Hare]). join_cluster_in_minority(Config) -> - [Rabbit, Hare, Bunny] = cluster_members(Config), + [Rabbit, Bunny, Hare] = cluster_members(Config), assert_not_clustered(Rabbit), assert_not_clustered(Hare), assert_not_clustered(Bunny), - stop_join_start(Config, Bunny, Rabbit), + stop_join_start(Config, Rabbit, Bunny), assert_clustered([Rabbit, Bunny]), - ok = rabbit_ct_broker_helpers:stop_node(Config, Bunny), + ok = rabbit_ct_broker_helpers:stop_node(Config, Rabbit), ok = stop_app(Config, Hare), - ?assertEqual(ok, join_cluster(Config, Hare, Rabbit, false)), + ?assertEqual(ok, join_cluster(Config, Hare, Bunny, false)), - ok = rabbit_ct_broker_helpers:start_node(Config, Bunny), + ok = rabbit_ct_broker_helpers:start_node(Config, Rabbit), ?assertEqual(ok, join_cluster(Config, Hare, Rabbit, false)), ?assertEqual(ok, start_app(Config, Hare)), assert_clustered([Rabbit, Bunny, Hare]). join_cluster_with_rabbit_stopped(Config) -> - [Rabbit, Hare, Bunny] = cluster_members(Config), + [Rabbit, Bunny, Hare] = cluster_members(Config), assert_not_clustered(Rabbit), assert_not_clustered(Hare), assert_not_clustered(Bunny), - stop_join_start(Config, Bunny, Rabbit), + stop_join_start(Config, Rabbit, Bunny), assert_clustered([Rabbit, Bunny]), - ok = stop_app(Config, Bunny), + ok = stop_app(Config, Rabbit), ok = stop_app(Config, Hare), - ?assertEqual(ok, join_cluster(Config, Hare, Rabbit, false)), + ?assertEqual(ok, join_cluster(Config, Hare, Bunny, false)), - ok = start_app(Config, Bunny), + ok = start_app(Config, Rabbit), ?assertEqual(ok, join_cluster(Config, Hare, Rabbit, false)), ?assertEqual(ok, start_app(Config, Hare)), @@ -947,22 +946,11 @@ force_reset_node_in_khepri(Config) -> stop_join_start(Config, Rabbit, Hare), stop_app(Config, Rabbit), - ok = force_reset(Config, Rabbit), - assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Hare]}, [Hare]), - %% Khepri is stopped, so it won't report anything. - assert_status({[Rabbit], [], [Rabbit], [Rabbit], []}, [Rabbit]), - %% Hare thinks that Rabbit is still clustered - assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Hare]}, - [Hare]), - ok = start_app(Config, Rabbit), - assert_not_clustered(Rabbit), - %% We can rejoin Rabbit and Hare. Unlike with Mnesia, we try to solve the - %% inconsistency instead of returning an error. - ok = stop_app(Config, Rabbit), - ?assertEqual(ok, join_cluster(Config, Rabbit, Hare, false)), - ok = start_app(Config, Rabbit), - assert_cluster_status({[Rabbit, Hare], [Rabbit, Hare], [Rabbit, Hare]}, - [Rabbit, Hare]). + {error, 69, Msg} = force_reset(Config, Rabbit), + ?assertEqual( + match, + re:run( + Msg, "Forced reset is unsupported with Khepri", [{capture, none}])). status_with_alarm(Config) -> [Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config, @@ -1124,7 +1112,7 @@ await_running_count_in_khepri(Config) -> await_running_count, [5, 1000])). start_nodes_in_reverse_order(Config) -> - [Rabbit, Hare, Bunny] = cluster_members(Config), + [Rabbit, Bunny, Hare] = cluster_members(Config), assert_not_clustered(Rabbit), assert_not_clustered(Hare), assert_not_clustered(Bunny), @@ -1147,7 +1135,7 @@ start_nodes_in_reverse_order(Config) -> %% Test booting nodes in the wrong order for Mnesia. Interesting... start_nodes_in_stop_order(Config) -> - [Rabbit, Hare, Bunny] = cluster_members(Config), + [Rabbit, Bunny, Hare] = cluster_members(Config), assert_not_clustered(Rabbit), assert_not_clustered(Hare), assert_not_clustered(Bunny), @@ -1172,7 +1160,7 @@ start_nodes_in_stop_order(Config) -> end. start_nodes_in_stop_order_in_khepri(Config) -> - [Rabbit, Hare, Bunny] = cluster_members(Config), + [Rabbit, Bunny, Hare] = cluster_members(Config), assert_not_clustered(Rabbit), assert_not_clustered(Hare), assert_not_clustered(Bunny), @@ -1195,7 +1183,7 @@ start_nodes_in_stop_order_in_khepri(Config) -> %% TODO test force_boot with Khepri involved start_nodes_in_stop_order_with_force_boot(Config) -> - [Rabbit, Hare, Bunny] = cluster_members(Config), + [Rabbit, Bunny, Hare] = cluster_members(Config), assert_not_clustered(Rabbit), assert_not_clustered(Hare), assert_not_clustered(Bunny), diff --git a/deps/rabbit/test/rabbit_stream_queue_SUITE.erl b/deps/rabbit/test/rabbit_stream_queue_SUITE.erl index 2111e8e51cbf..3183a8783ebf 100644 --- a/deps/rabbit/test/rabbit_stream_queue_SUITE.erl +++ b/deps/rabbit/test/rabbit_stream_queue_SUITE.erl @@ -1563,13 +1563,13 @@ format(Config) -> case length(Nodes) of 3 -> [_, Server2, Server3] = Nodes, - ok = rabbit_control_helper:command(stop_app, Server2), ok = rabbit_control_helper:command(stop_app, Server3), + ok = rabbit_control_helper:command(stop_app, Server2), Fmt2 = rabbit_ct_broker_helpers:rpc(Config, Server, rabbit_stream_queue, ?FUNCTION_NAME, [QRecord, #{}]), - ok = rabbit_control_helper:command(start_app, Server2), ok = rabbit_control_helper:command(start_app, Server3), + ok = rabbit_control_helper:command(start_app, Server2), ?assertEqual(stream, proplists:get_value(type, Fmt2)), ?assertEqual(minority, proplists:get_value(state, Fmt2)), ?assertEqual(Server, proplists:get_value(leader, Fmt2)), @@ -2741,7 +2741,7 @@ retry_if_coordinator_unavailable(Config, Server, Cmd, Retry) -> case re:run(Msg, ".*coordinator_unavailable.*", [{capture, none}]) of match -> ct:pal("Attempt to execute command ~p failed, coordinator unavailable", [Cmd]), - retry_if_coordinator_unavailable(Config, Ch, Cmd, Retry - 1); + retry_if_coordinator_unavailable(Config, Server, Cmd, Retry - 1); _ -> exit(Error) end diff --git a/deps/rabbit/test/unit_credit_flow_SUITE.erl b/deps/rabbit/test/unit_credit_flow_SUITE.erl index 189d0287290d..bdc3a0679b85 100644 --- a/deps/rabbit/test/unit_credit_flow_SUITE.erl +++ b/deps/rabbit/test/unit_credit_flow_SUITE.erl @@ -11,6 +11,9 @@ -compile(export_all). +suite() -> + [{timetrap, {minutes, 3}}]. + all() -> [ {group, sequential_tests} diff --git a/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl b/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl index 87fee4f5ae1d..ef1de028a9e2 100644 --- a/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl +++ b/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl @@ -55,7 +55,6 @@ kill_node_after/3, reset_node/2, - force_reset_node/2, forget_cluster_node/3, forget_cluster_node/4, @@ -174,7 +173,8 @@ user/1, configured_metadata_store/1, - await_metadata_store_consistent/2 + await_metadata_store_consistent/2, + do_nodes_run_same_ra_machine_version/2 ]). %% Internal functions exported to be used by rpc:call/4. @@ -1071,6 +1071,12 @@ ra_last_applied(ServerId) -> #{last_applied := LastApplied} = ra:key_metrics(ServerId), LastApplied. +do_nodes_run_same_ra_machine_version(Config, RaMachineMod) -> + [MacVer1 | MacVerN] = MacVers = rpc_all(Config, RaMachineMod, version, []), + ct:pal("Ra machine versions of ~s: ~0p", [RaMachineMod, MacVers]), + is_integer(MacVer1) andalso + lists:all(fun(MacVer) -> MacVer =:= MacVer1 end, MacVerN). + rewrite_node_config_file(Config, Node) -> NodeConfig = get_node_config(Config, Node), I = if @@ -2055,10 +2061,6 @@ reset_node(Config, Node) -> Name = get_node_config(Config, Node, nodename), rabbit_control_helper:command(reset, Name). -force_reset_node(Config, Node) -> - Name = get_node_config(Config, Node, nodename), - rabbit_control_helper:command(force_reset, Name). - forget_cluster_node(Config, Node, NodeToForget) -> forget_cluster_node(Config, Node, NodeToForget, []). forget_cluster_node(Config, Node, NodeToForget, Opts) -> diff --git a/deps/rabbitmq_peer_discovery_consul/test/system_SUITE.erl b/deps/rabbitmq_peer_discovery_consul/test/system_SUITE.erl index 194d6b2e4132..044860906269 100644 --- a/deps/rabbitmq_peer_discovery_consul/test/system_SUITE.erl +++ b/deps/rabbitmq_peer_discovery_consul/test/system_SUITE.erl @@ -81,9 +81,27 @@ init_per_testcase(Testcase, Config) case Config3 of _ when is_list(Config3) -> try - _ = rabbit_ct_broker_helpers:rpc_all( - Config3, rabbit_peer_discovery_backend, api_version, []), - Config3 + SameMacVer = ( + rabbit_ct_broker_helpers: + do_nodes_run_same_ra_machine_version( + Config3, khepri_machine)), + case SameMacVer of + true -> + _ = rabbit_ct_broker_helpers:rpc_all( + Config3, + rabbit_peer_discovery_backend, api_version, []), + Config3; + false -> + Config5 = rabbit_ct_helpers:run_steps( + Config3, + rabbit_ct_client_helpers:teardown_steps() + ++ + rabbit_ct_broker_helpers:teardown_steps()), + rabbit_ct_helpers:testcase_finished(Config5, Testcase), + {skip, + "Nodes are using different Khepri Ra machine " + "versions; clustering will likely fail"} + end catch error:{exception, undef, [{rabbit_peer_discovery_backend, api_version, _, _} diff --git a/deps/rabbitmq_peer_discovery_etcd/test/system_SUITE.erl b/deps/rabbitmq_peer_discovery_etcd/test/system_SUITE.erl index 2f7c0bcda85e..3d68526c25a4 100644 --- a/deps/rabbitmq_peer_discovery_etcd/test/system_SUITE.erl +++ b/deps/rabbitmq_peer_discovery_etcd/test/system_SUITE.erl @@ -90,9 +90,27 @@ init_per_testcase(Testcase, Config) case Config3 of _ when is_list(Config3) -> try - _ = rabbit_ct_broker_helpers:rpc_all( - Config3, rabbit_peer_discovery_backend, api_version, []), - Config3 + SameMacVer = ( + rabbit_ct_broker_helpers: + do_nodes_run_same_ra_machine_version( + Config3, khepri_machine)), + case SameMacVer of + true -> + _ = rabbit_ct_broker_helpers:rpc_all( + Config3, + rabbit_peer_discovery_backend, api_version, []), + Config3; + false -> + Config5 = rabbit_ct_helpers:run_steps( + Config3, + rabbit_ct_client_helpers:teardown_steps() + ++ + rabbit_ct_broker_helpers:teardown_steps()), + rabbit_ct_helpers:testcase_finished(Config5, Testcase), + {skip, + "Nodes are using different Khepri Ra machine " + "versions; clustering will likely fail"} + end catch error:{exception, undef, [{rabbit_peer_discovery_backend, api_version, _, _} @@ -237,7 +255,9 @@ wait_for_etcd(EtcdEndpoints) -> Timeout = 60000, rabbit_ct_helpers:await_condition( fun() -> - case eetcd:open(test, EtcdEndpoints) of + Ret = eetcd:open(test, EtcdEndpoints), + ct:pal("Ret = ~p", [Ret]), + case Ret of {ok, _Pid} -> true; _ -> false end