Skip to content

Commit

Permalink
Update hardware raid to account for unconfigured drives
Browse files Browse the repository at this point in the history
  • Loading branch information
solarkennedy committed Apr 3, 2012
1 parent 25e4a6f commit 2f5fa2b
Show file tree
Hide file tree
Showing 7 changed files with 300 additions and 3 deletions.
8 changes: 5 additions & 3 deletions check_hardware_raid
Original file line number Diff line number Diff line change
Expand Up @@ -287,9 +287,11 @@ sub check_megacli {

foreach my $dev (@devs) {
push(@status, sprintf "Dev%02d (%s): %s", $dev->{dev}, $dev->{name}, $dev->{state});
if ($dev->{state} ne 'Online, Spun Up') {
# TODO: process other statuses
$status = $ERRORS{CRITICAL};
if ($dev->{state} ne 'Online, Spun Up' and $dev->{state} ne 'Unconfigured(good), Spun Up') {
# TODO: process other statuses
$status = $ERRORS{CRITICAL};
#print $dev->{state};
#print "\n";
}
}

Expand Down
99 changes: 99 additions & 0 deletions check_nx_lock_sanity
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash
# Gets a list of locks and compares them agains what NX thinks it should have
# Reports back if there is a difference

# check for plugin directory where utils.sh lives
[ -d /usr/lib/nagios/plugins ] && UTILPATH=/usr/lib/nagios/plugins
[ -d /usr/lib64/nagios/plugins ] && UTILPATH=/usr/lib64/nagios/plugins


# load states and strings
if [ -x "$UTILPATH"/utils.sh ]; then
. "$UTILPATH"/utils.sh
else
echo "ERROR: Cannot find utils.sh"
exit
fi

# Our field separater is the new line, not a space or tab. Needed when we iterate through nxserver --list
IFS="
"

STATE=$STATE_OK
TEMPFOLDER=`mktemp -d`

# If we are load balancing, check all defined nodes, localhost otherwise
source /etc/nxserver/node.conf
if [ -z $LOAD_BALANCE_SERVERS ] ; then
NXTARGETS="127.0.0.1"
else
NXTARGETS=$LOAD_BALANCE_SERVERS
fi

# Collect what NX thinks the locks should be
for line in `sudo /usr/bin/nxserver --list| grep "^.*\..*\..*\..*"`
do
NXTARGET=`echo $line | awk '{print $1}'`
DISPLAY=`echo $line | awk '{print $2}'`
USER=`echo $line | awk '{print $3}'`
SESSION=`echo $line | awk '{print $5}'`

#Collect all the displays that should exist on the target
echo "$DISPLAY" >> $TEMPFOLDER/$NXTARGET-goodlocks.txt

done

# If there is no one logged in, we have to touch this file to be empty
touch $TEMPFOLDER/127.0.0.1-goodlocks.txt

# Go through the servers and see what locks there are
for NXTARGET in `echo $NXTARGETS | tr " " "\n" | sort -u`
do
# Get all the existing locks on that system
/usr/lib64/nagios/plugins/check_nrpe -u -H $NXTARGET -c get_X_locks | grep -v "NRPE" | sort > $TEMPFOLDER/$NXTARGET-existing_x_locks.txt
/usr/lib64/nagios/plugins/check_nrpe -u -H $NXTARGET -c get_X11_sockets | grep -v "NRPE" | sort > $TEMPFOLDER/$NXTARGET-existing_x11_sockets.txt


# Sort the locks we have that we know are good
sort $TEMPFOLDER/$NXTARGET-goodlocks.txt -o $TEMPFOLDER/$NXTARGET-goodlocks.txt

# Diff it
diff $TEMPFOLDER/$NXTARGET-existing_x_locks.txt $TEMPFOLDER/$NXTARGET-goodlocks.txt > /dev/null
if [ $? -ne 0 ]; then
STATE=$STATE_CRITICAL
echo "Difference in X lock (/tmp/.Xbla) files for $NXTARGET:"
diff $TEMPFOLDER/$NXTARGET-existing_x_locks.txt $TEMPFOLDER/$NXTARGET-goodlocks.txt | grep -e "<" -e ">"
fi
diff $TEMPFOLDER/$NXTARGET-existing_x11_sockets.txt $TEMPFOLDER/$NXTARGET-goodlocks.txt > /dev/null
if [ $? -ne 0 ]; then
STATE=$STATE_CRITICAL
echo "Difference in X socket files (/tmp/.X11-unix) for $NXTARGET:"
diff $TEMPFOLDER/$NXTARGET-existing_x11_sockets.txt $TEMPFOLDER/$NXTARGET-goodlocks.txt | grep -e "<" -e ">"
fi
done

# Check NX locks, outside the for loop because they are always on localhost
/usr/lib64/nagios/plugins/check_nrpe -u -H localhost -c get_NX_locks | grep -v "NRPE" | sort > $TEMPFOLDER/all-existing_nx_locks.txt
cat $TEMPFOLDER/*-goodlocks.txt | sort > $TEMPFOLDER/all-goodlocks.txt
diff $TEMPFOLDER/all-existing_nx_locks.txt $TEMPFOLDER/all-goodlocks.txt > /dev/null
if [ $? -ne 0 ]; then
STATE=$STATE_CRITICAL
echo "Difference in NX lock files (/tmp/.nXbla-unix) for localhost:"
diff $TEMPFOLDER/all-existing_nx_locks.txt $TEMPFOLDER/all-goodlocks.txt | grep -e "<" -e ">"
fi


if [ $STATE -eq $STATE_OK ];then
echo All Locks match on the nodes
else
echo "There are lock/socket differences. "
echo "< Means that the lock/socket is on the node, and shouldn't be"
echo "> Means nx thinks the lock/socket should be there, but isn't"
fi

# Don't feel safe to do this yet
#rm -rf $TEMPFOLDER

exit $STATE


47 changes: 47 additions & 0 deletions check_nx_session_sanity
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
#
# Nrpe check to ensure that the nx user's session is still valid, i.e. has gnome-session, gnome-panel and current, valid /tmp/.X<displayID>-lock file

# check for plugin directory where utils.sh lives
[ -d /usr/lib/nagios/plugins ] && UTILPATH=/usr/lib/nagios/plugins
[ -d /usr/lib64/nagios/plugins ] && UTILPATH=/usr/lib64/nagios/plugins


# load states and strings
if [ -x "$UTILPATH"/utils.sh ]; then
. "$UTILPATH"/utils.sh
else
echo "ERROR: Cannot find utils.sh"
exit
fi


IFS="
"

STATE=$STATE_OK

for line in `sudo /usr/bin/nxserver --list| grep "^.*\..*\..*\..*"`
do
NXTARGET=`echo $line | awk '{print $1}'`
DISPLAY=`echo $line | awk '{print $2}'`
USER=`echo $line | awk '{print $3}'`
SESSION=`echo $line | awk '{print $5}'`

# determine whether this session is a desktop session (with a window manager to health-check) or a portal-launched
# nx applicaiton. Pass argument to nx_node accordingly.

sessiontype=`sudo cat /var/lib/nxserver/db/running/sessionId\{$SESSION\} |tr "," "\n"|grep type=|awk -F= '{print $2}'`

/usr/lib64/nagios/plugins/check_nrpe -u -H $NXTARGET -c check_nx_single_session -a $USER $DISPLAY $SESSION $sessiontype || STATE=$?
done

if [ $STATE -eq $STATE_OK ];then
echo All NX sessions appear sane
else
echo Some NX sessions borked
fi

exit $STATE


134 changes: 134 additions & 0 deletions check_nx_single_session
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/bin/bash
#
# Nrpe check to ensure that the nx user's session is still valid, i.e. has gnome-session, gnome-panel and current, valid /tmp/.X<displayID>-lock file
# Called by check_nx_session_sanity and runs on actual nx node

# check for plugin directory where utils.sh lives
[ -d /usr/lib/nagios/plugins ] && UTILPATH=/usr/lib/nagios/plugins
[ -d /usr/lib64/nagios/plugins ] && UTILPATH=/usr/lib64/nagios/plugins


# load states and strings
if [ -x "$UTILPATH"/utils.sh ]; then
. "$UTILPATH"/utils.sh
else
echo "ERROR: Cannot find utils.sh"
exit
fi


IFS="
"

STATE=$STATE_OK

if [ "$#" != "4" ];then
echo "Usage: check_nx_single_session <userID> <displayID> <nxSessionID> <sessiontype>. Called with $*"
exit $STATE_UNKNOWN
fi

USER=$1
NXDISPLAY=$2
SESSION=$3
TYPE=$4

checkGNOME () {
# Verify that gnome-session is running
# This is correlated to the specific nxagent session by the use of process group ID

pgid=$1

if [ ! "$pgid" ]; then
echo checkGnome\(\) needs argument of process group ID
return $STATE_CRITICAL
fi

ps h -g $pgid -o args | grep -v dbus-launch|grep -q /usr/bin/gnome-session

if [ "$?" != "0" ]; then
echo No gnome-session for $USER/$NXDISPLAY/$SESSION on $HOSTNAME
return $STATE_CRITICAL
fi

# Verify that gnome-panel is running
# This is correlated to the specific nxagent session by the use of process group ID
ps h -g $pgid -o args | grep -q gnome-panel

if [ "$?" != "0" ]; then
echo No gnome-panel for $USER/$NXDISPLAY/$SESSION on $HOSTNAME
return $STATE_CRITICAL
fi

return $STATE_OK
}

checkKDE () {
# Verify that KDE is running
# This is correlated to the specific nxagent session by the use of process group ID

pgid=$1

if [ ! "$pgid" ]; then
echo checkKDE\(\) needs argument of process group ID
return $STATE_CRITICAL
fi

ps h -g $pgid -o args | grep -q "kwrapper4 ksmserver"

if [ $? -ne 0 ];then
echo No kwrapper4 ksmserver for $USER/$NXDISPLAY/$SESSION on $HOSTNAME
return $STATE_CRITICAL
fi

return $STATE_OK
}

# Verify that X lock file exists and is owned by user
# This is correlated to the specific nxagent session by the use of $NXDISPLAY
/usr/bin/stat /tmp/.X${NXDISPLAY}-lock | grep Uid: | egrep -q "/ *$USER\)"

if [ "$?" != "0" ];then
echo No X lockfile for $USER/$NXDISPLAY/$SESSION on $HOSTNAME
STATE=$STATE_CRITICAL
fi


# Verify that nxagent process is running for particular user, display and session ID
ps -f -u $USER | grep nxagent | grep :$NXDISPLAY |grep -q $SESSION

if [ "$?" != "0" ]; then
echo No nxagent for $USER/$NXDISPLAY/$SESSION on $HOSTNAME as reported by nxserver --list
exit $STATE_CRITICAL
else
# grab the process group ID of the nxagent process
pgid=`ps -u $USER -o pgid,comm,args | grep nxagent | grep :$NXDISPLAY |grep $SESSION | awk '{print $1}'`

# Don't even continue to check for gnome or kde processes if nxagent isn't running (need nxagent's pgid for those checks)
if [ ! $pgid ];then
echo Cannot find process group ID for nxagent with args $USER $NXDISPLAY $SESSION on $HOSTNAME
exit $STATE_CRITICAL
fi
fi

#
# Determine whether GNOME or KDE
#
if [ "$TYPE" == "unix-gnome" ];then
# only change STATE if it's not STATE_OK, our default
checkGNOME $pgid || STATE=$?
elif [ "$TYPE" == "unix-kde" ];then
# only change STATE if it's not STATE_OK, our default
checkKDE $pgid || STATE=?
elif [ "$TYPE" == "unix-application" ];then
/bin/true
else
echo The window manager type was not passed as an argument or Im not programmed to look it: $TYPE
fi


# nrpe requires *some* output
echo -n " "

exit $STATE


5 changes: 5 additions & 0 deletions get_NX_locks
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash

# Reports back a list of NXlocks for sanity checking
cd /tmp
ls .nX*-lock | sort | sed 's/^\.nX//g' | cut -f 1 -d "-"
5 changes: 5 additions & 0 deletions get_X11_sockets
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
# Reports back a list of X Sockets for sanity checking
cd /tmp/.X11-unix/
ls | sort | grep "^X....$" | sed 's/^X//g'

5 changes: 5 additions & 0 deletions get_X_locks
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
# Reports back a list of Xlocks for sanity checking
cd /tmp
ls .X*-lock | sort | grep ".X....-lock" | sed 's/^\.X//g' | cut -f 1 -d "-"

0 comments on commit 2f5fa2b

Please sign in to comment.