Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Return non-zero code on logexporter failures #95563

Merged
merged 1 commit into from Oct 23, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 18 additions & 0 deletions cluster/log-dump/log-dump.sh
Expand Up @@ -63,6 +63,14 @@ readonly windows_node_otherfiles="C:\\Windows\\MEMORY.dmp"
# file descriptors for large clusters.
readonly max_dump_processes=25

# Indicator variable whether we experienced a significant failure during
# logexporter creation or execution.
logexporter_failed=0

# Percentage of nodes that must be logexported successfully (otherwise the
# process will exit with a non-zero exit code).
readonly log_dump_expected_success_percentage="${LOG_DUMP_EXPECTED_SUCCESS_PERCENTAGE:-0}"

# TODO: Get rid of all the sourcing of bash dependencies eventually.
function setup() {
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/../..
Expand Down Expand Up @@ -546,6 +554,7 @@ function dump_nodes_with_logexporter() {
echo 'Failed to create logexporter daemonset.. falling back to logdump through SSH'
"${KUBECTL}" delete namespace "${logexporter_namespace}" || true
dump_nodes "${NODE_NAMES[@]}"
logexporter_failed=1
return
fi

Expand Down Expand Up @@ -595,6 +604,7 @@ function dump_nodes_with_logexporter() {
echo 'Final attempt to list marker files failed.. falling back to logdump through SSH'
"${KUBECTL}" delete namespace "${logexporter_namespace}" || true
dump_nodes "${NODE_NAMES[@]}"
logexporter_failed=1
return
fi
sleep 2
Expand All @@ -611,6 +621,11 @@ function dump_nodes_with_logexporter() {
done
fi

# If less than a certain ratio of the nodes got logexported, report an error.
if [[ $(((${#NODE_NAMES[@]} - ${#failed_nodes[@]}) * 100)) -lt $((${#NODE_NAMES[@]} * log_dump_expected_success_percentage )) ]]; then
logexporter_failed=1
fi

# Delete the logexporter resources and dump logs for the failed nodes (if any) through SSH.
"${KUBECTL}" get pods --namespace "${logexporter_namespace}" || true
"${KUBECTL}" delete namespace "${logexporter_namespace}" || true
Expand Down Expand Up @@ -675,6 +690,9 @@ function main() {
fi

detect_node_failures
if [[ ${logexporter_failed} -ne 0 && ${log_dump_expected_success_percentage} -gt 0 ]]; then
return 1
fi
}

main