Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 46 additions & 12 deletions diagnostic_aggregator/src/aggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,9 @@ void Aggregator::publishData()
diag_toplevel_state.name = "toplevel_state";
diag_toplevel_state.level = DiagnosticStatus::STALE;
int max_level = -1;
int min_level = 255;
uint8_t max_level_without_stale = 0;
int non_ok_status_depth = 0;
std::shared_ptr<DiagnosticStatus> msg_to_report;

std::vector<std::shared_ptr<DiagnosticStatus>> processed;
{
Expand All @@ -239,39 +241,71 @@ void Aggregator::publishData()
}
for (const auto & msg : processed) {
diag_array.status.push_back(*msg);
const auto depth = std::count(msg->name.begin(), msg->name.end(), '/');

if (msg->level > max_level) {
max_level = msg->level;
non_ok_status_depth = depth;
msg_to_report = msg;
}
if (msg->level < min_level) {
min_level = msg->level;
if (msg->level == max_level && depth > non_ok_status_depth) {
// On non okay diagnostics also copy the deepest message to toplevel state
non_ok_status_depth = depth;
msg_to_report = msg;
}
if (
msg->level > max_level_without_stale &&
msg->level != diagnostic_msgs::msg::DiagnosticStatus::STALE)
{
max_level_without_stale = msg->level;
}
}

std::vector<std::shared_ptr<DiagnosticStatus>> processed_other =
other_analyzer_->report();
for (const auto & msg : processed_other) {
diag_array.status.push_back(*msg);
const auto depth = std::count(msg->name.begin(), msg->name.end(), '/');

if (msg->level > max_level) {
max_level = msg->level;
non_ok_status_depth = depth;
msg_to_report = msg;
}
if (msg->level == max_level && depth > non_ok_status_depth) {
// On non okay diagnostics also copy the deepest message to toplevel state
non_ok_status_depth = depth;
msg_to_report = msg;
}
if (msg->level < min_level) {
min_level = msg->level;
if (
msg->level > max_level_without_stale &&
msg->level != diagnostic_msgs::msg::DiagnosticStatus::STALE)
{
max_level_without_stale = msg->level;
}
}

// When a non-ok item was found, surface the offender via message/hardware_id/values
// but keep name stable as "toplevel_state" to avoid breaking downstream consumers
if (max_level > DiagnosticStatus::OK && msg_to_report) {
diag_toplevel_state.message = msg_to_report->name + ": " + msg_to_report->message;
diag_toplevel_state.hardware_id = msg_to_report->hardware_id;
diag_toplevel_state.values = msg_to_report->values;
}

diag_array.header.stamp = clock_->now();
agg_pub_->publish(diag_array);

diag_toplevel_state.level = max_level;
if (max_level < 0 ||
(max_level > DiagnosticStatus::ERROR && min_level <= DiagnosticStatus::ERROR))
{
// Top level is error if we got no diagnostic level or
// have stale items but not all are stale
diag_toplevel_state.level = DiagnosticStatus::ERROR;
if (max_level_without_stale > DiagnosticStatus::OK) {
diag_toplevel_state.level = max_level_without_stale;
} else if (max_level == diagnostic_msgs::msg::DiagnosticStatus::STALE) {
diag_toplevel_state.level = diagnostic_msgs::msg::DiagnosticStatus::STALE;
} else if (max_level < 0) {
diag_toplevel_state.level = diagnostic_msgs::msg::DiagnosticStatus::STALE;
} else {
diag_toplevel_state.level = DiagnosticStatus::OK;
}

last_top_level_state_ = diag_toplevel_state.level;

toplevel_state_pub_->publish(diag_toplevel_state);
Expand Down
17 changes: 11 additions & 6 deletions diagnostic_aggregator/src/analyzer_group.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ std::vector<std::shared_ptr<diagnostic_msgs::msg::DiagnosticStatus>> AnalyzerGro
return output;
}

bool all_stale = true;
uint8_t max_level_without_stale = 0;

for (auto j = 0u; j < analyzers_.size(); ++j) {
std::string path = analyzers_[j]->getPath();
Expand All @@ -317,17 +317,22 @@ std::vector<std::shared_ptr<diagnostic_msgs::msg::DiagnosticStatus>> AnalyzerGro
kv.key = nice_name;
kv.value = processed[i]->message;

all_stale = all_stale &&
(processed[i]->level == diagnostic_msgs::msg::DiagnosticStatus::STALE);
if (processed[i]->level != diagnostic_msgs::msg::DiagnosticStatus::STALE) {
max_level_without_stale = max(max_level_without_stale, processed[i]->level);
}
header_status->level = max(header_status->level, processed[i]->level);
header_status->values.push_back(kv);
}
}
}

// Report stale as errors unless all stale
if (header_status->level == diagnostic_msgs::msg::DiagnosticStatus::STALE && !all_stale) {
header_status->level = diagnostic_msgs::msg::DiagnosticStatus::ERROR;
// WARN/ERROR always beats STALE; if only STALE present, report STALE
if (max_level_without_stale > diagnostic_msgs::msg::DiagnosticStatus::OK) {
header_status->level = max_level_without_stale;
} else if (header_status->level == diagnostic_msgs::msg::DiagnosticStatus::STALE) {
header_status->level = diagnostic_msgs::msg::DiagnosticStatus::STALE;
} else {
header_status->level = diagnostic_msgs::msg::DiagnosticStatus::OK;
}

header_status->message = valToMsg(header_status->level);
Expand Down
113 changes: 102 additions & 11 deletions diagnostic_aggregator/test/test_discard_behavior.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
# POSSIBILITY OF SUCH DAMAGE.
#
# DESCRIPTION
# This test ensures that a parent AnalyzerGroup does not roll up an ERROR state when a
# This test ensures that a parent AnalyzerGroup does *not* roll up an ERROR state when a
# GenericAnalyzer child block is marked with discard_stale: true and either of these
# conditions are met:
#
Expand Down Expand Up @@ -86,6 +86,7 @@
# A status value of 'None' means that the state is never sent (it's missing).
TEST_METADATA = [
# CASE 1: both 'foo' and 'bar' are marked discard_stale := true
# --> the aggregator should roll up to OK, because the stale statuses are discarded.
TestMetadata(
foo_discard=True,
foo_status=None,
Expand All @@ -107,7 +108,16 @@
bar_status=DiagnosticStatus.STALE,
agg_expected=DiagnosticStatus.OK,
),
TestMetadata(
foo_discard=True,
foo_status=DiagnosticStatus.STALE,
bar_discard=True,
bar_status=None,
agg_expected=DiagnosticStatus.OK,
),
# CASE 2: both 'foo' and 'bar' are marked discard_stale := false
# --> the aggregator should roll up to STALE, as long as they aggregated are either
# missing or stale, because the stale statuses are *not* discarded.
TestMetadata(
foo_discard=False,
foo_status=None,
Expand All @@ -129,38 +139,119 @@
bar_status=DiagnosticStatus.STALE,
agg_expected=DiagnosticStatus.STALE,
),
# CASE 3: one of 'foo' or 'bar' are marked discard_stale := true
TestMetadata(
foo_discard=False,
foo_status=DiagnosticStatus.STALE,
bar_discard=False,
bar_status=None,
agg_expected=DiagnosticStatus.STALE,
),
# CASE 3: one of 'foo' or 'bar' are marked discard_stale := true and the corresponding status
# is missing or stale while the other is OK.
# --> the aggregator should roll up to OK, because the stale status is discarded.
TestMetadata(
foo_discard=True,
foo_status=None,
bar_discard=False,
bar_status=DiagnosticStatus.OK,
agg_expected=DiagnosticStatus.OK,
),
TestMetadata(
foo_discard=True,
foo_status=DiagnosticStatus.STALE,
bar_discard=False,
bar_status=DiagnosticStatus.OK,
agg_expected=DiagnosticStatus.OK,
),
TestMetadata(
foo_discard=True,
foo_status=DiagnosticStatus.OK,
bar_discard=False,
bar_status=DiagnosticStatus.OK,
agg_expected=DiagnosticStatus.OK,
),
TestMetadata(
foo_discard=False,
foo_status=DiagnosticStatus.OK,
bar_discard=True,
bar_status=None,
agg_expected=DiagnosticStatus.ERROR, # <-- This is the case we are testing for.
# if one of the children is *not* marked discard_stale := true and
# there are no statuses, then the parent should roll up to ERROR.
agg_expected=DiagnosticStatus.OK,
),
TestMetadata(
foo_discard=False,
foo_status=DiagnosticStatus.OK,
bar_discard=True,
bar_status=DiagnosticStatus.STALE,
agg_expected=DiagnosticStatus.OK,
),
TestMetadata(
foo_discard=True,
foo_status=DiagnosticStatus.OK,
bar_discard=False,
bar_status=DiagnosticStatus.OK,
agg_expected=DiagnosticStatus.OK,
),
# CASE 4: one of 'foo' or 'bar' are marked discard_stale := true and the corresponding status
# is missing or stale while the other is missing or stale.
# --> the aggregator should roll up to STALE
TestMetadata(
foo_discard=True,
foo_status=None,
bar_discard=False,
bar_status=None,
agg_expected=DiagnosticStatus.STALE,
),
TestMetadata(
foo_discard=True,
foo_status=DiagnosticStatus.STALE,
bar_discard=False,
bar_status=None,
agg_expected=DiagnosticStatus.ERROR,
agg_expected=DiagnosticStatus.STALE,
),
TestMetadata(
foo_discard=True,
foo_status=None,
bar_discard=False,
bar_status=DiagnosticStatus.OK,
agg_expected=DiagnosticStatus.OK, # <-- This is the case we are testing for.
# but if a child is marked discard_stale := true and there are no statuses,
# the parent should roll up to OK.
bar_status=DiagnosticStatus.STALE,
agg_expected=DiagnosticStatus.STALE,
),
TestMetadata(
foo_discard=True,
foo_status=DiagnosticStatus.STALE,
bar_discard=False,
bar_status=DiagnosticStatus.STALE,
agg_expected=DiagnosticStatus.STALE,
),
# CASE 5: one of 'foo' or 'bar' are marked discard_stale := true and the corresponding status
# is OK while the other is missing or stale.
# --> the aggregator should roll up to STALE, because it has a higher severity than OK.
TestMetadata(
foo_discard=True,
foo_status=DiagnosticStatus.OK,
bar_discard=False,
bar_status=None,
agg_expected=DiagnosticStatus.STALE,
),
TestMetadata(
foo_discard=True,
foo_status=DiagnosticStatus.OK,
bar_discard=False,
bar_status=DiagnosticStatus.STALE,
agg_expected=DiagnosticStatus.STALE,
),
TestMetadata(
foo_discard=False,
foo_status=None,
bar_discard=True,
bar_status=DiagnosticStatus.OK,
agg_expected=DiagnosticStatus.OK,
agg_expected=DiagnosticStatus.STALE,
),
TestMetadata(
foo_discard=False,
foo_status=DiagnosticStatus.STALE,
bar_discard=True,
bar_status=DiagnosticStatus.OK,
agg_expected=DiagnosticStatus.STALE,
),
]

Expand Down
Loading