Skip to content
This repository has been archived by the owner on Sep 12, 2023. It is now read-only.

Fix exitCode check #190

Merged
merged 1 commit into from Jun 9, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 1 addition & 34 deletions pkg/util/train/train_util.go
Expand Up @@ -16,38 +16,5 @@
package train

func IsRetryableExitCode(exitCode int32) bool {
if exitCode == 1 || exitCode == 2 || exitCode == 126 ||
exitCode == 127 || exitCode == 128 || exitCode == 139 {
// Refers to http://tldp.org/LDP/abs/html/exitcodes.html, we identify the following exit codes
// as permanent errors:
// 1: General errors
// 2: Misuse of shell builtins
// 126: Command invoked cannot execute
// 127: Command not found
// 128: Invalid argument to exit
// 139(128+11): terminated by SIGSEGV(Invalid memory reference)
return false
}

if exitCode == 130 || exitCode == 137 || exitCode == 143 {
// We think it's retryable error if the container exits due to the following sys signals
// that are usually caused by transient issues(e.g. VM was rescheduled):
// 130(128+2): Container terminated by Control-C
// 137(128+9): Container received a SIGKILL
// 143(128+15): Container received a SIGTERM
// The exit code of container will be 128 + n for fatal error signals.
// More info can be found in:
// http://tldp.org/LDP/abs/html/exitcodes.html,
// https://stackoverflow.com/questions/31297616/what-is-the-authoritative-list-of-docker-run-exit-codes
return true
}

if exitCode == 138 {
// We allow users to specify exit code for the cases that they think should retry.
// We decide to take the exit code of SIGUSR1(138 = 128 + 10) for user defined retryable error.
return true
}

// We make no guarantee for other exit status. Currently handling them same as permanent errors.
return false
return exitCode >= 128
}