Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

config: enable crash_loop_limit by default #13431

Merged
merged 2 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/v/config/node_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,10 @@ node_config::node_config() noexcept
*this,
"crash_loop_limit",
"Maximum consecutive crashes (unclean shutdowns) allowed after which "
"operator intervention is needed to startup the broker.",
"operator intervention is needed to startup the broker. Limit is not "
"enforced in developer mode.",
{.visibility = visibility::user},
std::nullopt)
5)
, upgrade_override_checks(
*this,
"upgrade_override_checks",
Expand Down
19 changes: 14 additions & 5 deletions src/v/redpanda/application.cc
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,11 @@ void application::check_environment() {
/// tracker file. This is to prevent on disk state from piling up in
/// each unclean run and creating more state to recover for the next run.
void application::check_for_crash_loop() {
if (config::node().developer_mode()) {
// crash loop tracking has value only in long running clusters
// that can potentially accumulate state across restarts.
return;
}
auto file_path = config::node().data_directory().path
/ crash_loop_tracker_file;
std::optional<crash_tracker_metadata> maybe_crash_md;
Expand Down Expand Up @@ -835,11 +840,15 @@ void application::schedule_crash_tracker_file_cleanup() {
// next run.
// We emplace it in the front to make it the last task to run.
_deferred.emplace_front([&] {
auto file = config::node().data_directory().path
/ crash_loop_tracker_file;
ss::remove_file(file.string()).get();
ss::sync_directory(config::node().data_directory().as_sstring()).get();
vlog(_log.debug, "Deleted crash loop tracker file: {}", file);
auto file = (config::node().data_directory().path
/ crash_loop_tracker_file)
.string();
if (ss::file_exists(file).get()) {
ss::remove_file(file).get();
ss::sync_directory(config::node().data_directory().as_sstring())
.get();
vlog(_log.debug, "Deleted crash loop tracker file: {}", file);
}
});
}

Expand Down
3 changes: 2 additions & 1 deletion tests/rptest/tests/crash_loop_checks_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def __init__(self, test_context):
num_brokers=1,
extra_node_conf={
"crash_loop_limit":
CrashLoopChecksTest.CRASH_LOOP_LIMIT
CrashLoopChecksTest.CRASH_LOOP_LIMIT,
"developer_mode": False
})

def remove_crash_loop_tracker_file(self, broker):
Expand Down
Loading