Skip to content

Commit

Permalink
scripts/lbnl_ps.nhc: Improve LSF support
Browse files Browse the repository at this point in the history
Based on a couple changes suggested by @SMark-Black in his PR #53, add
another command to look for to auto-detect LSF, and add support for
the LSF `res` daemon to the `check_ps_userproc_lineage()` check.  Also
moved the setting of `$RM_DAEMON_MATCH` to inside the check -- that's
the only thing in that whole entire file that actually requires a
resource manager!
  • Loading branch information
mej committed Dec 29, 2018
1 parent 88a5fab commit 9e8d0dc
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 30 deletions.
6 changes: 3 additions & 3 deletions nhc
Expand Up @@ -449,12 +449,12 @@ function nhcmain_find_rm() {
elif type -a -p -f -P scontrol >&/dev/null ; then
NHC_RM="slurm"
return 0
elif type -a -p -f -P badmin >&/dev/null ; then
NHC_RM="lsf"
return 0
elif type -a -p -f -P qselect >&/dev/null ; then
NHC_RM="sge"
return 0
elif type -a -p -f -P badmin >&/dev/null || type -a -p -f -P sbatchd >&/dev/null ; then
NHC_RM="lsf"
return 0
fi

if [[ -z "$NHC_RM" ]]; then
Expand Down
63 changes: 36 additions & 27 deletions scripts/lbnl_ps.nhc
Expand Up @@ -34,29 +34,7 @@ function nhc_ps_gather_data() {

# We need this for user authorization checks. Try to read system defaults.
[[ -z "$MAX_SYS_UID" ]] && nhc_common_get_max_sys_uid
MAX_SYS_UID=${MAX_SYS_UID:-99}

# Set default for $RM_DAEMON_MATCH based on configured resource manager.
if [[ "$NHC_RM" == "pbs" ]]; then
RM_DAEMON_MATCH="${RM_DAEMON_MATCH:-/\bpbs_mom\b/}"
elif [[ "$NHC_RM" == "slurm" ]]; then
RM_DAEMON_MATCH="${RM_DAEMON_MATCH:-/\bslurmstepd\b/}"
elif [[ "$NHC_RM" == "sge" ]]; then
# If you limit this to execd, you lose when it's been restarted,
# and the shepherd is detached. Even if execd is safe because of
# system uids, it can spawn mail commands as the job owner, at
# least. (The shepherd process name is normally
# sge_shepherd-<jobnum>, but maybe not if you change shepherd_cmd.)
RM_DAEMON_MATCH="${RM_DAEMON_MATCH:-/\bsge_(execd|shepherd)\b/}"
else
dbg "Unsupported RM detected in ${FUNCNAME}(): \"$NHC_RM\""
fi
if [[ ! "test" =~ \btest\b ]]; then
# Workaround for lack of \b support in regexp library.
RM_DAEMON_MATCH="${RM_DAEMON_MATCH//#\\b/(^|[^A-Za-z0-9])}"
RM_DAEMON_MATCH="${RM_DAEMON_MATCH//%\\b/(\$|[^A-Za-z0-9])}"
fi


# Create array $LINES[] by splitting "ps" output on newlines.
IFS=$'\n'
LINES=( $(COLUMNS=1024 ps axo 'user:32,uid,pid,ppid,pcpu,pmem,rss,vsz,bsdtime,args') )
Expand Down Expand Up @@ -115,6 +93,9 @@ function nhc_ps_pid_lineage() {
nhc_ps_gather_data
fi

if [[ -z "$PROCNAME" ]]; then
return 1
fi
while [[ -n "$THIS_PID" && $THIS_PID -ne 1 ]]; do
THIS_PID=${PS_PPID[$THIS_PID]}
mcheck "${PS_ARGS[$THIS_PID]}" "$PROCNAME" && return 0
Expand Down Expand Up @@ -647,8 +628,8 @@ function check_ps_unauth_users() {
u) IGNORE_UID="$IGNORE_UID $OPTARG" ; dbg "Ignoring UID $OPTARG" ;;
n) IGNORE_USER="$IGNORE_USER $OPTARG" ; dbg "Ignoring User $OPTARG" ;;
p) IGNORE_PID="$IGNORE_PID $OPTARG" ; dbg "Ignoring PID $OPTARG" ;;
:) die 1 "$CHECK: Option -$OPTARG requires an argument." ; return 1 ;;
\?) die 1 "$CHECK: Invalid option: -$OPTARG" ; return 1 ;;
:) die 1 "$FUNCNAME: Option -$OPTARG requires an argument." ; return 1 ;;
\?) die 1 "$FUNCNAME: Invalid option: -$OPTARG" ; return 1 ;;
esac
done
shift $((OPTIND-1))
Expand Down Expand Up @@ -715,8 +696,8 @@ function check_ps_userproc_lineage() {
u) IGNORE_UID="$IGNORE_UID $OPTARG" ; dbg "Ignoring UID $OPTARG" ;;
n) IGNORE_USER="$IGNORE_USER $OPTARG" ; dbg "Ignoring User $OPTARG" ;;
p) IGNORE_PID="$IGNORE_PID $OPTARG" ; dbg "Ignoring PID $OPTARG" ;;
:) die 1 "$CHECK: Option -$OPTARG requires an argument." ; return 1 ;;
\?) die 1 "$CHECK: Invalid option: -$OPTARG" ; return 1 ;;
:) die 1 "$FUNCNAME: Option -$OPTARG requires an argument." ; return 1 ;;
\?) die 1 "$FUNCNAME: Invalid option: -$OPTARG" ; return 1 ;;
esac
done
shift $((OPTIND-1))
Expand All @@ -734,6 +715,34 @@ function check_ps_userproc_lineage() {
nhc_common_load_passwd
fi

if [[ -z "$NHC_RM" || "$NHC_RM" == "none" ]]; then
die 1 "$FUNCNAME: This check requires a supported resource manager (e.g., Slurm, TORQUE, SGE, LSF)"
return 1
elif [[ -z "$RM_DAEMON_MATCH" ]]; then
# Set default for $RM_DAEMON_MATCH based on configured resource manager.
if [[ "$NHC_RM" == "pbs" ]]; then
RM_DAEMON_MATCH="${RM_DAEMON_MATCH:-/\bpbs_mom\b/}"
elif [[ "$NHC_RM" == "slurm" ]]; then
RM_DAEMON_MATCH="${RM_DAEMON_MATCH:-/\bslurmstepd\b/}"
elif [[ "$NHC_RM" == "sge" ]]; then
# If you limit this to execd, you lose when it's been restarted,
# and the shepherd is detached. Even if execd is safe because of
# system uids, it can spawn mail commands as the job owner, at
# least. (The shepherd process name is normally
# sge_shepherd-<jobnum>, but maybe not if you change shepherd_cmd.)
RM_DAEMON_MATCH="${RM_DAEMON_MATCH:-/\bsge_(execd|shepherd)\b/}"
elif [[ "$NHC_RM" == "lsf" ]]; then
RM_DAEMON_MATCH="${RM_DAEMON_MATCH:-/\bres\b/}"
else
dbg "Unsupported RM detected in ${FUNCNAME}(): \"$NHC_RM\""
fi
if [[ ! "test" =~ \btest\b ]]; then
# Workaround for lack of \b support in regexp library.
RM_DAEMON_MATCH="${RM_DAEMON_MATCH//#\\b/(^|[^A-Za-z0-9])}"
RM_DAEMON_MATCH="${RM_DAEMON_MATCH//%\\b/(\$|[^A-Za-z0-9])}"
fi
fi

for ((i=0; i < ${#PS_PROCS[*]}; i++)); do
THIS_PID=${PS_PROCS[$i]}
THIS_UID="${PS_UID[$THIS_PID]}"
Expand Down

0 comments on commit 9e8d0dc

Please sign in to comment.