Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Update hardware raid to account for unconfigured drives

  • Loading branch information...
commit 2f5fa2b69327cb7896834c35715776b022175166 1 parent 25e4a6f
@solarkennedy authored
View
8 check_hardware_raid
@@ -287,9 +287,11 @@ sub check_megacli {
foreach my $dev (@devs) {
push(@status, sprintf "Dev%02d (%s): %s", $dev->{dev}, $dev->{name}, $dev->{state});
- if ($dev->{state} ne 'Online, Spun Up') {
- # TODO: process other statuses
- $status = $ERRORS{CRITICAL};
+ if ($dev->{state} ne 'Online, Spun Up' and $dev->{state} ne 'Unconfigured(good), Spun Up') {
+ # TODO: process other statuses
+ $status = $ERRORS{CRITICAL};
+ #print $dev->{state};
+ #print "\n";
}
}
View
99 check_nx_lock_sanity
@@ -0,0 +1,99 @@
+#!/bin/bash
+# Gets a list of locks and compares them agains what NX thinks it should have
+# Reports back if there is a difference
+
+# check for plugin directory where utils.sh lives
+[ -d /usr/lib/nagios/plugins ] && UTILPATH=/usr/lib/nagios/plugins
+[ -d /usr/lib64/nagios/plugins ] && UTILPATH=/usr/lib64/nagios/plugins
+
+
+# load states and strings
+if [ -x "$UTILPATH"/utils.sh ]; then
+ . "$UTILPATH"/utils.sh
+else
+ echo "ERROR: Cannot find utils.sh"
+ exit
+fi
+
+# Our field separater is the new line, not a space or tab. Needed when we iterate through nxserver --list
+IFS="
+"
+
+STATE=$STATE_OK
+TEMPFOLDER=`mktemp -d`
+
+# If we are load balancing, check all defined nodes, localhost otherwise
+source /etc/nxserver/node.conf
+if [ -z $LOAD_BALANCE_SERVERS ] ; then
+ NXTARGETS="127.0.0.1"
+else
+ NXTARGETS=$LOAD_BALANCE_SERVERS
+fi
+
+# Collect what NX thinks the locks should be
+for line in `sudo /usr/bin/nxserver --list| grep "^.*\..*\..*\..*"`
+do
+ NXTARGET=`echo $line | awk '{print $1}'`
+ DISPLAY=`echo $line | awk '{print $2}'`
+ USER=`echo $line | awk '{print $3}'`
+ SESSION=`echo $line | awk '{print $5}'`
+
+ #Collect all the displays that should exist on the target
+ echo "$DISPLAY" >> $TEMPFOLDER/$NXTARGET-goodlocks.txt
+
+done
+
+# If there is no one logged in, we have to touch this file to be empty
+touch $TEMPFOLDER/127.0.0.1-goodlocks.txt
+
+# Go through the servers and see what locks there are
+for NXTARGET in `echo $NXTARGETS | tr " " "\n" | sort -u`
+do
+ # Get all the existing locks on that system
+ /usr/lib64/nagios/plugins/check_nrpe -u -H $NXTARGET -c get_X_locks | grep -v "NRPE" | sort > $TEMPFOLDER/$NXTARGET-existing_x_locks.txt
+ /usr/lib64/nagios/plugins/check_nrpe -u -H $NXTARGET -c get_X11_sockets | grep -v "NRPE" | sort > $TEMPFOLDER/$NXTARGET-existing_x11_sockets.txt
+
+
+ # Sort the locks we have that we know are good
+ sort $TEMPFOLDER/$NXTARGET-goodlocks.txt -o $TEMPFOLDER/$NXTARGET-goodlocks.txt
+
+ # Diff it
+ diff $TEMPFOLDER/$NXTARGET-existing_x_locks.txt $TEMPFOLDER/$NXTARGET-goodlocks.txt > /dev/null
+ if [ $? -ne 0 ]; then
+ STATE=$STATE_CRITICAL
+ echo "Difference in X lock (/tmp/.Xbla) files for $NXTARGET:"
+ diff $TEMPFOLDER/$NXTARGET-existing_x_locks.txt $TEMPFOLDER/$NXTARGET-goodlocks.txt | grep -e "<" -e ">"
+ fi
+ diff $TEMPFOLDER/$NXTARGET-existing_x11_sockets.txt $TEMPFOLDER/$NXTARGET-goodlocks.txt > /dev/null
+ if [ $? -ne 0 ]; then
+ STATE=$STATE_CRITICAL
+ echo "Difference in X socket files (/tmp/.X11-unix) for $NXTARGET:"
+ diff $TEMPFOLDER/$NXTARGET-existing_x11_sockets.txt $TEMPFOLDER/$NXTARGET-goodlocks.txt | grep -e "<" -e ">"
+ fi
+done
+
+# Check NX locks, outside the for loop because they are always on localhost
+/usr/lib64/nagios/plugins/check_nrpe -u -H localhost -c get_NX_locks | grep -v "NRPE" | sort > $TEMPFOLDER/all-existing_nx_locks.txt
+cat $TEMPFOLDER/*-goodlocks.txt | sort > $TEMPFOLDER/all-goodlocks.txt
+diff $TEMPFOLDER/all-existing_nx_locks.txt $TEMPFOLDER/all-goodlocks.txt > /dev/null
+if [ $? -ne 0 ]; then
+ STATE=$STATE_CRITICAL
+ echo "Difference in NX lock files (/tmp/.nXbla-unix) for localhost:"
+ diff $TEMPFOLDER/all-existing_nx_locks.txt $TEMPFOLDER/all-goodlocks.txt | grep -e "<" -e ">"
+fi
+
+
+if [ $STATE -eq $STATE_OK ];then
+ echo All Locks match on the nodes
+else
+ echo "There are lock/socket differences. "
+ echo "< Means that the lock/socket is on the node, and shouldn't be"
+ echo "> Means nx thinks the lock/socket should be there, but isn't"
+fi
+
+# Don't feel safe to do this yet
+#rm -rf $TEMPFOLDER
+
+exit $STATE
+
+
View
47 check_nx_session_sanity
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# Nrpe check to ensure that the nx user's session is still valid, i.e. has gnome-session, gnome-panel and current, valid /tmp/.X<displayID>-lock file
+
+# check for plugin directory where utils.sh lives
+[ -d /usr/lib/nagios/plugins ] && UTILPATH=/usr/lib/nagios/plugins
+[ -d /usr/lib64/nagios/plugins ] && UTILPATH=/usr/lib64/nagios/plugins
+
+
+# load states and strings
+if [ -x "$UTILPATH"/utils.sh ]; then
+ . "$UTILPATH"/utils.sh
+else
+ echo "ERROR: Cannot find utils.sh"
+ exit
+fi
+
+
+IFS="
+"
+
+STATE=$STATE_OK
+
+for line in `sudo /usr/bin/nxserver --list| grep "^.*\..*\..*\..*"`
+do
+ NXTARGET=`echo $line | awk '{print $1}'`
+ DISPLAY=`echo $line | awk '{print $2}'`
+ USER=`echo $line | awk '{print $3}'`
+ SESSION=`echo $line | awk '{print $5}'`
+
+ # determine whether this session is a desktop session (with a window manager to health-check) or a portal-launched
+ # nx applicaiton. Pass argument to nx_node accordingly.
+
+ sessiontype=`sudo cat /var/lib/nxserver/db/running/sessionId\{$SESSION\} |tr "," "\n"|grep type=|awk -F= '{print $2}'`
+
+ /usr/lib64/nagios/plugins/check_nrpe -u -H $NXTARGET -c check_nx_single_session -a $USER $DISPLAY $SESSION $sessiontype || STATE=$?
+done
+
+if [ $STATE -eq $STATE_OK ];then
+ echo All NX sessions appear sane
+else
+ echo Some NX sessions borked
+fi
+
+exit $STATE
+
+
View
134 check_nx_single_session
@@ -0,0 +1,134 @@
+#!/bin/bash
+#
+# Nrpe check to ensure that the nx user's session is still valid, i.e. has gnome-session, gnome-panel and current, valid /tmp/.X<displayID>-lock file
+# Called by check_nx_session_sanity and runs on actual nx node
+
+# check for plugin directory where utils.sh lives
+[ -d /usr/lib/nagios/plugins ] && UTILPATH=/usr/lib/nagios/plugins
+[ -d /usr/lib64/nagios/plugins ] && UTILPATH=/usr/lib64/nagios/plugins
+
+
+# load states and strings
+if [ -x "$UTILPATH"/utils.sh ]; then
+ . "$UTILPATH"/utils.sh
+else
+ echo "ERROR: Cannot find utils.sh"
+ exit
+fi
+
+
+IFS="
+"
+
+STATE=$STATE_OK
+
+if [ "$#" != "4" ];then
+ echo "Usage: check_nx_single_session <userID> <displayID> <nxSessionID> <sessiontype>. Called with $*"
+ exit $STATE_UNKNOWN
+fi
+
+USER=$1
+NXDISPLAY=$2
+SESSION=$3
+TYPE=$4
+
+checkGNOME () {
+ # Verify that gnome-session is running
+ # This is correlated to the specific nxagent session by the use of process group ID
+
+ pgid=$1
+
+ if [ ! "$pgid" ]; then
+ echo checkGnome\(\) needs argument of process group ID
+ return $STATE_CRITICAL
+ fi
+
+ ps h -g $pgid -o args | grep -v dbus-launch|grep -q /usr/bin/gnome-session
+
+ if [ "$?" != "0" ]; then
+ echo No gnome-session for $USER/$NXDISPLAY/$SESSION on $HOSTNAME
+ return $STATE_CRITICAL
+ fi
+
+ # Verify that gnome-panel is running
+ # This is correlated to the specific nxagent session by the use of process group ID
+ ps h -g $pgid -o args | grep -q gnome-panel
+
+ if [ "$?" != "0" ]; then
+ echo No gnome-panel for $USER/$NXDISPLAY/$SESSION on $HOSTNAME
+ return $STATE_CRITICAL
+ fi
+
+ return $STATE_OK
+}
+
+checkKDE () {
+ # Verify that KDE is running
+ # This is correlated to the specific nxagent session by the use of process group ID
+
+ pgid=$1
+
+ if [ ! "$pgid" ]; then
+ echo checkKDE\(\) needs argument of process group ID
+ return $STATE_CRITICAL
+ fi
+
+ ps h -g $pgid -o args | grep -q "kwrapper4 ksmserver"
+
+ if [ $? -ne 0 ];then
+ echo No kwrapper4 ksmserver for $USER/$NXDISPLAY/$SESSION on $HOSTNAME
+ return $STATE_CRITICAL
+ fi
+
+ return $STATE_OK
+}
+
+# Verify that X lock file exists and is owned by user
+# This is correlated to the specific nxagent session by the use of $NXDISPLAY
+/usr/bin/stat /tmp/.X${NXDISPLAY}-lock | grep Uid: | egrep -q "/ *$USER\)"
+
+if [ "$?" != "0" ];then
+ echo No X lockfile for $USER/$NXDISPLAY/$SESSION on $HOSTNAME
+ STATE=$STATE_CRITICAL
+fi
+
+
+# Verify that nxagent process is running for particular user, display and session ID
+ps -f -u $USER | grep nxagent | grep :$NXDISPLAY |grep -q $SESSION
+
+if [ "$?" != "0" ]; then
+ echo No nxagent for $USER/$NXDISPLAY/$SESSION on $HOSTNAME as reported by nxserver --list
+ exit $STATE_CRITICAL
+else
+ # grab the process group ID of the nxagent process
+ pgid=`ps -u $USER -o pgid,comm,args | grep nxagent | grep :$NXDISPLAY |grep $SESSION | awk '{print $1}'`
+
+ # Don't even continue to check for gnome or kde processes if nxagent isn't running (need nxagent's pgid for those checks)
+ if [ ! $pgid ];then
+ echo Cannot find process group ID for nxagent with args $USER $NXDISPLAY $SESSION on $HOSTNAME
+ exit $STATE_CRITICAL
+ fi
+fi
+
+#
+# Determine whether GNOME or KDE
+#
+if [ "$TYPE" == "unix-gnome" ];then
+ # only change STATE if it's not STATE_OK, our default
+ checkGNOME $pgid || STATE=$?
+elif [ "$TYPE" == "unix-kde" ];then
+ # only change STATE if it's not STATE_OK, our default
+ checkKDE $pgid || STATE=?
+elif [ "$TYPE" == "unix-application" ];then
+ /bin/true
+else
+ echo The window manager type was not passed as an argument or Im not programmed to look it: $TYPE
+fi
+
+
+# nrpe requires *some* output
+echo -n " "
+
+exit $STATE
+
+
View
5 get_NX_locks
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+# Reports back a list of NXlocks for sanity checking
+cd /tmp
+ls .nX*-lock | sort | sed 's/^\.nX//g' | cut -f 1 -d "-"
View
5 get_X11_sockets
@@ -0,0 +1,5 @@
+#!/bin/bash
+# Reports back a list of X Sockets for sanity checking
+cd /tmp/.X11-unix/
+ls | sort | grep "^X....$" | sed 's/^X//g'
+
View
5 get_X_locks
@@ -0,0 +1,5 @@
+#!/bin/bash
+# Reports back a list of Xlocks for sanity checking
+cd /tmp
+ls .X*-lock | sort | grep ".X....-lock" | sed 's/^\.X//g' | cut -f 1 -d "-"
+
Please sign in to comment.
Something went wrong with that request. Please try again.