From b1cfedc71209a9fe91016b90107ac2e3639eab43 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Mon, 23 Feb 2015 16:10:39 +0000 Subject: [PATCH 1/3] Fix clearer timeout_waiting_for_tables error message ...and replace {boot_step, _, _} wrapping with an extra parameter which might make it harder to make similar mistakes in future. --- src/rabbit.erl | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/rabbit.erl b/src/rabbit.erl index 664da2068892..3bfd4428986c 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -335,9 +335,9 @@ start_it(StartFun) -> end catch throw:{could_not_start, _App, _Reason}=Err -> - boot_error(Err, not_available); + boot_error(Err, wrapper, not_available); _:Reason -> - boot_error(Reason, erlang:get_stacktrace()) + boot_error(Reason, wrapper, erlang:get_stacktrace()) after unlink(Marker), Marker ! stop, @@ -539,10 +539,9 @@ run_step(StepName, Attributes, AttributeName) -> apply(M,F,A) of ok -> ok; - {error, Reason} -> boot_error({boot_step, StepName, Reason}, - not_available) + {error, Reason} -> boot_error(Reason, StepName, not_available) catch - _:Reason -> boot_error({boot_step, StepName, Reason}, + _:Reason -> boot_error(Reason, StepName, erlang:get_stacktrace()) end || {M,F,A} <- MFAs], ok @@ -607,9 +606,10 @@ sort_boot_steps(UnsortedSteps) -> end. -ifdef(use_specs). --spec(boot_error/2 :: (term(), not_available | [tuple()]) -> no_return()). +-spec(boot_error/3 :: (term(), atom(), not_available | [tuple()]) + -> no_return()). -endif. -boot_error(Term={error, {timeout_waiting_for_tables, _}}, _Stacktrace) -> +boot_error(Term={error, {timeout_waiting_for_tables, _}}, _Step, _Stacktrace) -> AllNodes = rabbit_mnesia:cluster_nodes(all), {Err, Nodes} = case AllNodes -- [node()] of @@ -622,10 +622,11 @@ boot_error(Term={error, {timeout_waiting_for_tables, _}}, _Stacktrace) -> end, basic_boot_error(Term, Err ++ rabbit_nodes:diagnostics(Nodes) ++ "~n~n", []); -boot_error(Reason, Stacktrace) -> - Fmt = "Error description:~n ~p~n~n" ++ +boot_error(Reason, Step, Stacktrace) -> + Fmt = "Boot step:~n ~p~n~n" + "Error description:~n ~p~n~n" "Log files (may contain more information):~n ~s~n ~s~n~n", - Args = [Reason, log_location(kernel), log_location(sasl)], + Args = [Step, Reason, log_location(kernel), log_location(sasl)], boot_error(Reason, Fmt, Args, Stacktrace). -ifdef(use_specs). From cfc289b72cfc2619b4694fe13fa100c64d328018 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 26 Feb 2015 14:48:28 +0000 Subject: [PATCH 2/3] Detangle boot error handling, step 1 We want to clean up basic_boot_error/3, but these call sites make it harder. Really, trying to produce nicely formatted errors here is a waste of time, these errors can only be caused when adding boot steps. Normal users should never see them. So don't complicate things by handling them specially. --- src/rabbit.erl | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/src/rabbit.erl b/src/rabbit.erl index 3bfd4428986c..7e3522466975 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -580,29 +580,13 @@ sort_boot_steps(UnsortedSteps) -> {_App, StepName, Attributes} <- SortedSteps, {mfa, {M,F,A}} <- Attributes, not erlang:function_exported(M, F, length(A))] of - [] -> SortedSteps; - MissingFunctions -> basic_boot_error( - {missing_functions, MissingFunctions}, - "Boot step functions not exported: ~p~n", - [MissingFunctions]) + [] -> SortedSteps; + MissingFns -> exit({boot_functions_not_exported, MissingFns}) end; {error, {vertex, duplicate, StepName}} -> - basic_boot_error({duplicate_boot_step, StepName}, - "Duplicate boot step name: ~w~n", [StepName]); + exit({duplicate_boot_step, StepName}); {error, {edge, Reason, From, To}} -> - basic_boot_error( - {invalid_boot_step_dependency, From, To}, - "Could not add boot step dependency of ~w on ~w:~n~s", - [To, From, - case Reason of - {bad_vertex, V} -> - io_lib:format("Boot step not registered: ~w~n", [V]); - {bad_edge, [First | Rest]} -> - [io_lib:format("Cyclic dependency: ~w", [First]), - [io_lib:format(" depends on ~w", [Next]) || - Next <- Rest], - io_lib:format(" depends on ~w~n", [First])] - end]) + exit({invalid_boot_step_dependency, From, To, Reason}) end. -ifdef(use_specs). From db0aaf821c7f41810bd7f2a452205548633355b4 Mon Sep 17 00:00:00 2001 From: Simon MacMullen Date: Thu, 26 Feb 2015 14:54:58 +0000 Subject: [PATCH 3/3] Detangle boot error handling, step 2 We used to have a weird situation where an error inside a boot step would cause us to go through error logging twice. The exception would get caught in run_step/3, then go through boot_error() etc, and basic_boot_error/3 would exit again, leading to more stuff being logged as an application start failure. So to fix that, let's exit with a special atom which prevents further logging. Also rename functions to be a bit more meaningful. --- src/rabbit.erl | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/rabbit.erl b/src/rabbit.erl index 7e3522466975..de91f8a39d7c 100644 --- a/src/rabbit.erl +++ b/src/rabbit.erl @@ -334,7 +334,11 @@ start_it(StartFun) -> false -> StartFun() end catch - throw:{could_not_start, _App, _Reason}=Err -> + throw:{could_not_start, rabbit, + boot_failure_already_logged} -> + throw(boot_failure_already_logged), + ok; + throw:{could_not_start, _App, _Reason} = Err -> boot_error(Err, wrapper, not_available); _:Reason -> boot_error(Reason, wrapper, erlang:get_stacktrace()) @@ -387,7 +391,7 @@ stop_apps(Apps) -> ok. handle_app_error(Term) -> - fun(App, {bad_return, {_MFA, {'EXIT', {ExitReason, _}}}}) -> + fun(App, {bad_return, {_MFA, {'EXIT', ExitReason}}}) -> throw({Term, App, ExitReason}); (App, Reason) -> throw({Term, App, Reason}) @@ -593,7 +597,7 @@ sort_boot_steps(UnsortedSteps) -> -spec(boot_error/3 :: (term(), atom(), not_available | [tuple()]) -> no_return()). -endif. -boot_error(Term={error, {timeout_waiting_for_tables, _}}, _Step, _Stacktrace) -> +boot_error({error, {timeout_waiting_for_tables, _}}, _Step, _Stacktrace) -> AllNodes = rabbit_mnesia:cluster_nodes(all), {Err, Nodes} = case AllNodes -- [node()] of @@ -604,30 +608,34 @@ boot_error(Term={error, {timeout_waiting_for_tables, _}}, _Step, _Stacktrace) -> "Timeout contacting cluster nodes: ~p.~n", [Ns]), Ns} end, - basic_boot_error(Term, - Err ++ rabbit_nodes:diagnostics(Nodes) ++ "~n~n", []); + log_boot_error_and_exit( + Err ++ rabbit_nodes:diagnostics(Nodes) ++ "~n~n", []); boot_error(Reason, Step, Stacktrace) -> - Fmt = "Boot step:~n ~p~n~n" + Fmt0 = case Step of + wrapper -> ""; + _ -> rabbit_misc:format("Boot step:~n ~p~n~n", [Step]) + end, + Fmt = Fmt0 ++ "Error description:~n ~p~n~n" "Log files (may contain more information):~n ~s~n ~s~n~n", - Args = [Step, Reason, log_location(kernel), log_location(sasl)], - boot_error(Reason, Fmt, Args, Stacktrace). + Args = [Reason, log_location(kernel), log_location(sasl)], + boot_error1(Fmt, Args, Stacktrace). -ifdef(use_specs). --spec(boot_error/4 :: (term(), string(), [any()], not_available | [tuple()]) +-spec(boot_error1/3 :: (string(), [any()], not_available | [tuple()]) -> no_return()). -endif. -boot_error(Reason, Fmt, Args, not_available) -> - basic_boot_error(Reason, Fmt, Args); -boot_error(Reason, Fmt, Args, Stacktrace) -> - basic_boot_error(Reason, Fmt ++ "Stack trace:~n ~p~n~n", - Args ++ [Stacktrace]). +boot_error1(Fmt, Args, not_available) -> + log_boot_error_and_exit(Fmt, Args); +boot_error1(Fmt, Args, Stacktrace) -> + log_boot_error_and_exit(Fmt ++ "Stack trace:~n ~p~n~n", + Args ++ [Stacktrace]). -basic_boot_error(Reason, Format, Args) -> +log_boot_error_and_exit(Format, Args) -> io:format("~n~nBOOT FAILED~n===========~n~n" ++ Format, Args), rabbit_log:info(Format, Args), timer:sleep(1000), - exit({?MODULE, failure_during_boot, Reason}). + exit(boot_failure_already_logged). %%--------------------------------------------------------------------------- %% boot step functions