Permalink
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 419 lines (383 sloc) 22.4 KB
#!/bin/bash
#########################################################################
# Script: check_equallogic #
# Author: Claudio Kuenzler www.claudiokuenzler.com #
# Purpose: Monitor Dell Equallogic with Nagios #
# Description: Checks Dell Equallogic via SNMP. #
# Can be used to query status and performance info #
# Tested on: Check www.claudiokuenzler.com/ithowtos/ #
# nagios_check_equallogic.php for compatibility matrix #
# History: #
# 20091109 Started Script programming checks: #
# health, disk, raid, uptime, ps, info #
# 20091112 Added ethif, conn #
# 20091118 Added diskusage #
# 20091119 Bugfix on Outputs (removed Pipes) #
# 20091121 Public Release #
# 20091204 Bugfix (removed IP addresses) #
# 20091206 Bugfix (removed SNMP community names) #
# 20091222 Fixed raid, ps, health and diskusage checks when multiple #
# member devices exists. Mathias Sundman <mathias@openvpn.se> #
# 20100112 Successful tests on PS5000XV - thanks to Scott Sawin #
# 20100209 Compatibility matrix now on website (see Tested on above) #
# 20100416 Beta Testing for rewritten ethif check #
# 20100420 Corrected ethif output, finished new ethif check #
# 20100526 Using proper order of snmpwalk command, thanks Roland Ripoll #
# 20100531 Added perfdata for diskusage and connections, thx to Benoit #
# 20100630 Corrected perfdata output (+added thresholds), thx Christian #
# 20100809 Fixed conn type -> total of all eql members of group #
# 20101026 /bin/sh back to /bin/bash (Ubuntu problems with /bin/dash) #
# 20101026 Bugfix snmpwalk (vqe instead of vq), thanks Fabio Panigatti #
# 20101102 Added fan #
# 20101202 Added volumes (checks utilization of all volumes) #
#########################################################################
# Usage: ./check_equallogic -H host -C community -t type [-w warning] [-c critical]
#########################################################################
help="check_equallogic (c) 2009-2010 Claudio Kuenzler (published under GPL licence)\n
Usage: ./check_equallogic -H host -C community -t type [-w warning] [-c critical]\n
Options:\n-H Hostname\n-C SNMP-Community name (at least read-only)\n-t Type to check, see list below\n-w Warning Threshold\n-c Critical Threshold\n
Requirements: snmpwalk, awk, grep, wc\n
types:\nconn -> Checks number of ISCSI connections (if no thresholds are given, outputs information)
disk -> Checks Status of all disks
diskusage -> Checks the actual usage of the defined raid (if no thresholds are given, outputs information)
ethif -> Checks ehternet interfaces (if no thresholds are given, outputs information)
fan -> Status of Fans
health -> Overall health status of Equallogic device
info -> Shows some Information
raid -> Checks Status of Raid
ps -> Checks Power Supply/Supplies
raid -> Checks RAID status
uptime -> Shows uptime
volumes -> Checks utilization of all ISCSI volumes (if no thresholds are given, outputs information)"
STATE_OK=0 # define the exit code if status is OK
STATE_WARNING=1 # define the exit code if status is Warning
STATE_CRITICAL=2 # define the exit code if status is Critical
STATE_UNKNOWN=3 # define the exit code if status is Unknown
PATH=/usr/local/bin:/usr/bin:/bin # Set path
for cmd in snmpwalk awk grep wc [
do
if ! `which ${cmd} 1>/dev/null`
then
echo "UNKNOWN: ${cmd} does not exist, please check if command exists and PATH is correct"
exit ${STATE_UNKNOWN}
fi
done
# Check for people who need help - aren't we all nice ;-)
#########################################################################
if [ "${1}" = "--help" -o "${#}" = "0" ];
then
echo -e "${help}";
exit 1;
fi
# Get user-given variables
#########################################################################
while getopts "H:C:t:w:c:" Input;
do
case ${Input} in
H) host=${OPTARG};;
C) community=${OPTARG};;
t) type=${OPTARG};;
w) warning=${OPTARG};;
c) critical=${OPTARG};;
*) echo "Wrong option given. Please use options -H for host, -C for SNMP-Community, -t for type, -w for warning and -c for critical"
exit 1
;;
esac
done
# Check Different Types
#########################################################################
case ${type} in
health)
healthstatus=$(snmpwalk -v 2c -O vqe -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.5.1.1)
s_crit=0; s_warn=0; s_ok=0; s_unknown=0
for s in $healthstatus
do
if [ "$s" = "3" ]; then s_crit=$((s_crit + 1)); fi
if [ "$s" = "2" ]; then s_warn=$((s_warn + 1)); fi
if [ "$s" = "1" ]; then s_ok=$((s_ok + 1)); fi
if [ "$s" = "0" ]; then s_unkown=$((s_unknown + 1)); fi
done
if [ $s_crit -gt 0 ]; then echo "OVERALL HEALTH CRITICAL"; exit ${STATE_CRITICAL}; fi
if [ $s_warn -gt 0 ]; then echo "OVERALL HEALTH WARNING"; exit ${STATE_WARNING}; fi
if [ $s_unknown -gt 0 ]; then echo "OVERALL HEALTH UNKNOWN"; exit ${STATE_UNKNOWN}; fi
if [ $s_ok -gt 0 ]; then echo "OVERALL HEALTH OK"; exit ${STATE_OK}; fi
;;
disk)
diskstatusok=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 1 | wc -l)
diskstatusspare=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 2 | wc -l)
diskstatusfailed=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 3 | wc -l)
diskstatusoff=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 4 | wc -l)
diskstatusaltsig=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 5 | wc -l)
diskstatustoosmall=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 6 | wc -l)
diskstatushistfailures=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 7 | wc -l)
diskstatusunsupported=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.3.1.1.1.8 | grep 8 | wc -l)
if [ ${diskstatusfailed} -gt 0 ] || [ ${diskstatustoosmall} -gt 0 ] || [ ${diskstatushistfailures} -gt 0 ] || [ ${diskstatusunsupported} -gt 0 ]
then disksumcritical=$(($diskstatusfailed + $diskstatustoosmall + $diskstatushistfailures + $diskstatusunsupported))
echo "DISK CRITICAL ${disksumcritical} disk(s) in critical state"; exit ${STATE_CRITICAL}
elif [ ${diskstatusoff} -gt 0 ] || [ ${diskstatusaltsig} -gt 0 ]
then disksumwarning=$(( ${diskstatusoff} + ${diskstatusaltsig} ))
echo "DISK WARNING $disksumwarning disk(s) in warning state"; exit ${STATE_WARNING}
else echo "DISK OK ${diskstatusok} disks OK ${diskstatusspare} disks spare"; exit ${STATE_OK}
fi
;;
diskusage)
totalstorage_list=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.10.1.1)
usedstorage_list=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.10.1.2)
totalstorage=0
for t_storage in $totalstorage_list
do
totalstorage=$(($totalstorage + $t_storage))
done
usedstorage=0
for u_storage in $usedstorage_list
do
usedstorage=$(($usedstorage + $u_storage))
done
usedpercent=$(( ($usedstorage * 100)/$totalstorage ))
freestorage=$(( $totalstorage - $usedstorage))
totalstorage_perfdata=$(($totalstorage*1024*1024))
usedstorage_perfdata=$(($usedstorage*1024*1024))
if [ -n "${warning}" ] || [ -n "${critical}" ]
then
if [ ${usedpercent} -ge ${warning} ] && [ ${usedpercent} -lt ${critical} ]
then
echo "DISKUSAGE WARNING Total ${totalstorage} Used ${usedstorage} (${usedpercent}%) | 'space used'=${usedstorage_perfdata}; 'total space'=${totalstorage_perfdata}"
exit ${STATE_WARNING}
elif [ ${usedpercent} -ge ${critical} ]
then
echo "DISKUSAGE CRITICAL Total ${totalstorage} Used ${usedstorage} (${usedpercent}%) | 'space used'=${usedstorage_perfdata}; 'total space'=${totalstorage_perfdata}"
exit ${STATE_CRITICAL}
else
echo "DISKUSAGE OK Total ${totalstorage} Used ${usedstorage} (${usedpercent}%) | 'space used'=${usedstorage_perfdata}; 'total space'=${totalstorage_perfdata}"; exit ${STATE_OK}
fi
else echo "Total ${totalstorage} Used ${usedstorage} (${usedpercent}%) | 'space used'=${usedstorage_perfdata}; 'total space'=${totalstorage_perfdata}"; exit ${STATE_OK}
fi
;;
raid)
raidstatus=$(snmpwalk -v 2c -O vqe -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.13.1.1)
s7=0; s6=0; s5=0; s4=0; s3=0; s2=0; s1=0
for s in $raidstatus
do
if [ "$s" = "1" ]; then s1=$((s1 + 1)); fi
if [ "$s" = "2" ]; then s2=$((s2 + 1)); fi
if [ "$s" = "3" ]; then s3=$((s3 + 1)); fi
if [ "$s" = "4" ]; then s4=$((s4 + 1)); fi
if [ "$s" = "5" ]; then s5=$((s5 + 1)); fi
if [ "$s" = "6" ]; then s6=$((s6 + 1)); fi
if [ "$s" = "7" ]; then s7=$((s7 + 1)); fi
done
if [ $s6 -gt 0 ]; then echo "RAID CATASTROPHIC LOSS"; exit ${STATE_CRITICAL}; fi
if [ $s5 -gt 0 ]; then echo "RAID FAILED"; exit ${STATE_CRITICAL}; fi
if [ $s2 -gt 0 ]; then echo "RAID DEGRADED"; exit ${STATE_WARNING}; fi
if [ $s4 -gt 0 ]; then echo "RAID RECONSTRUCTING"; exit ${STATE_WARNING}; fi
if [ $s3 -gt 0 ]; then echo "RAID VERIFYING"; exit ${STATE_WARNING}; fi
if [ $s7 -gt 0 ]; then echo "RAID EXPANDING"; exit ${STATE_WARNING}; fi
if [ $s1 -gt 0 ]; then echo "RAID OK"; exit ${STATE_OK}; fi
;;
uptime)
uptimestatus=$(snmpwalk -v 2c -O v -c ${community} ${host} 1.3.6.1.2.1.1.3.0)
echo "${uptimestatus}"
exit ${STATE_OK}
;;
ps)
psstate=$(snmpwalk -v 2c -O vqe -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.8.1.3)
s3=0; s2=0; s1=0; ps_count=0
for s in $psstate
do
if [ "$s" = "1" ]; then s1=$((s1 + 1)); fi
if [ "$s" = "2" ]; then s2=$((s2 + 1)); fi
if [ "$s" = "3" ]; then s3=$((s3 + 1)); fi
ps_count=$(($ps_count + 1))
done
if [ $s3 -gt 0 ]; then echo "$s3 of $ps_count PSU(s): FAILED"; exit ${STATE_CRITICAL}; fi
if [ $s2 -gt 0 ]; then echo "$s2 of $ps_count PSU(s): NO AC POWER"; exit ${STATE_CRITICAL}; fi
if [ $s1 -gt 0 ]; then echo "$s1 of $ps_count PSU(s): OK"; exit ${STATE_OK}; fi
;;
info)
modelnumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.11.1.1)
serialnumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.11.1.2)
controllernumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.11.1.3)
disknumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.11.1.4)
firmware1=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.4.1.1.1.4.1 | cut -d " " -f 4 | awk 'NR==1')
firmware2=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.4.1.1.1.4.1 | cut -d " " -f 4 | awk 'NR==2')
if [ ${firmware1} = ${firmware2} ]
then firmware=${firmware1}
else firmware="Firmware differ!"
fi
echo "Equallogic Model ${modelnumber} Serial No ${serialnumber} ${controllernumber} controller(s) running FW ${firmware} ${disknumber} disks"
exit ${STATE_OK}
;;
ethif)
i=0
for line in $(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.2)
do ethnames[${i}]=$line; i=$(($i + 1 ))
done
j=0
for line in $(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.6)
do ethmacs[${j}]=$line; j=$(($j + 1))
done
ethnumber=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.2 | grep -c eth)
k=0
for line in $(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.4.1.1.1.9)
do contrstatus[${k}]=$line; k=$(($k + 1))
done
if [ $ethnumber = 3 ]
then ethinfo="${ethnames[0]} (${ethmacs[0]}), ${ethnames[1]} (${ethmacs[1]}), ${ethnames[2]} (${ethmacs[2]})"
elif [ $ethnumber = 4 ]
then ethinfo="${ethnames[0]} (${ethmacs[0]}), ${ethnames[1]} (${ethmacs[1]}), ${ethnames[2]} (${ethmacs[2]}), ${ethnames[3]} (${ethmacs[3]})"
fi
contr0ethstatus=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.7)
contr1ethstatus=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.8)
contr0ethup=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.7 | grep -c up)
contr1ethup=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.8 | grep -c up)
contr0ethdown=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.7 | grep -c down)
contr1ethdown=$(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.2.1.2.2.1.8 | grep -c down)
if [ ${contrstatus[0]} -lt ${contrstatus[1]} ]
then ethup=${contr0ethup}; ethdown=${contr0ethdown}; contractive=0
else ethup=${contr1ethup}; ethdown=${contr1ethdown}; contractive=1
fi
if [ -n "${warning}" ] || [ -n "${critical}" ]
then
if [ ${ethdown} -ge ${warning} ] && [ ${ethdown} -lt ${critical} ]
then echo "INTERFACES WARNING Total ${ethdown} interfaces down"; exit ${STATE_WARNING}
elif [ ${ethdown} -ge ${critical} ]
then echo "INTERFACES CRITICAL Total ${ethdown} interfaces down"; exit ${STATE_CRITICAL}
else echo "INTERFACES OK ${ethup} interfaces up, Controller ${contractive} is active, $ethinfo"; exit ${STATE_OK}
fi
else echo "INTERFACES OK ${ethup} interfaces up, Controller ${contractive} is active, $ethinfo"; exit ${STATE_OK}
fi
;;
conn)
connections=0
for line in $(snmpwalk -v 2c -O vq -c ${community} ${host} 1.3.6.1.4.1.12740.2.1.12.1.1)
do connections=`expr ${connections} + ${line}`
done
if [ -n "${warning}" ] || [ -n "${critical}" ]
then
if [ ${connections} -ge ${warning} ] && [ ${connections} -lt ${critical} ]
then echo "CONNECTIONS WARNING ${connections} ISCSI Connections (Threshold: ${warning}) | connections=${connections};${warning};${critical}"; exit ${STATE_WARNING}
elif [ ${connections} -ge ${critical} ]
then echo "CONNECTIONS CRITICAL ${connections} ISCSI Connections (Threshold: ${critical}) | connections=${connections};${warning};${critical}"; exit ${STATE_CRITICAL}
else echo "CONNECTIONS OK ${connections} ISCSI Connections | connections=${connections};${warning};${critical}"; exit ${STATE_OK}
fi
else
echo "${connections} ISCSI Connections | connections=${connections}"; exit ${STATE_OK}
fi
;;
fan)
declare -a fannames=($(snmpwalk -v 2c -O vqe -c ${community} ${host} .1.3.6.1.4.1.12740.2.1.7.1.2 | tr '\n' ' '))
#find out which fans are in critical state
declare -a fancrit=($(snmpwalk -v 2c -O vqe -c ${community} ${host} .1.3.6.1.4.1.12740.2.1.7.1.4 | grep -n "3" | awk -F : '{print $1}' | tr '\n' ' '))
c=0
for line in ${fancrit[@]}
do fancrit[$c]=`expr ${fancrit[$c]} - 1`
let c++
done
# find the corresponding names of the critical fans
c=0
for line in ${fancrit[@]}
do fanfinalcrit[${c}]=${fannames[$line]}
let c++
done
#find out which fans are in warning state
declare -a fanwarn=($(snmpwalk -v 2c -O vqe -c ${community} ${host} .1.3.6.1.4.1.12740.2.1.7.1.4 | grep -n "2" | awk -F : '{print $1}' | tr '\n' ' '))
w=0
for line in ${fanwarn[@]}
do warncrit[$w]=`expr ${fanwarn[$c]} - 1`
let w++
done
# find the corresponding names of the warning fans
w=0
for line in ${fanwarn[@]}
do fanfinalwarn[${w}]=${fannames[$line]}
let w++
done
#find out which fans are in unknown state
declare -a fanunknown=($(snmpwalk -v 2c -O vqe -c ${community} ${host} .1.3.6.1.4.1.12740.2.1.7.1.4 | grep -n "0" | awk -F : '{print $1}' | tr '\n' ' '))
if [[ ${#fanfinalcrit[*]} -gt 0 ]]
then echo "CRITICAL Fans: ${fanfinalcrit[*]}"; exit ${STATE_CRITICAL}
elif [[ ${#fanfinalwarn[*]} -gt 0 ]]
then echo "WARNING Fans: ${fanfinalwarn[*]}"; exit ${STATE_WARNING}
elif [[ ${#fanunknown[*]} -gt 0 ]]
then echo "UNKNOWN Check Fans, an unknown error occured"; exit ${STATE_UNKNOWN}
else echo "All Fans OK"; exit ${STATE_OK}
fi
;;
volumes)
volumescount=$(snmpwalk -v 2c -c ${community} ${host} 1.3.6.1.4.1.12740.5.1.7.1.1.4 | wc -l)
declare -a volumenames=($(snmpwalk -v 2c -O vqe -c ${community} ${host} 1.3.6.1.4.1.12740.5.1.7.1.1.4 | tr '\n' ' '))
declare -a volumeavailspace=($(snmpwalk -v 2c -O vqe -c ${community} ${host} 1.3.6.1.4.1.12740.5.1.7.1.1.8 | tr '\n' ' '))
declare -a volumeusedspace=($(snmpwalk -v 2c -O vqe -c ${community} ${host} 1.3.6.1.4.1.12740.5.1.7.7.1.13 | tr '\n' ' '))
# Determine special Equallogic volumes and remove them from array
ignorevolumes=($(snmpwalk -v 2c -O vqe -c ${community} ${host} 1.3.6.1.4.1.12740.5.1.7.1.1.8 | grep -n -w "0" | awk -F : '{print $1}' | tr '\n' ' '))
k=0
while [ ${k} -lt ${#ignorevolumes[@]} ]
do
finalignore[$k]=`expr ${ignorevolumes} - 1`
unset volumenames[${finalignore[$k]}]
unset volumeavailspace[${finalignore[$k]}]
unset volumeusedspace[${finalignore[$k]}]
let k++
done
# How many real volumes (manmade) do exist
realvolumescount=`expr ${volumescount} - ${#ignorevolumes[@]}`
# Calculate Free Space and Percentage per Volume
i=0
while [ ${i} -le ${realvolumescount} ]
do
if [ ${volumenames[${i}]} ]
then
volumefreespace[${i}]=`expr ${volumeavailspace[${i}]} - ${volumeusedspace[${i}]}`
volumepercentage[${i}]=`expr ${volumeusedspace[${i}]} \* 100 / ${volumeavailspace[${i}]}`
# echo "$i: ${volumenames[$i]}, free Space: ${volumefreespace[${i}]} used: ${volumepercentage[${i}]} %" # For Debug
let i++
else
let i++
fi
done
# Output Warning and Critical
if [ -n "${warning}" ] || [ -n "${critical}" ]
then
j=0
while [ ${j} -le ${realvolumescount} ]
do
if [ ${volumenames[${j}]} ]
then
if [ ${volumepercentage[${j}]} -ge ${warning} ] && [ ${volumepercentage[${j}]} -lt ${critical} ]
then volumewarning[${j}]="${volumenames[${j}]}: ${volumepercentage[${j}]}% used "
elif [ ${volumepercentage[${j}]} -ge ${critical} ]
then volumecritical[${j}]="${volumenames[${j}]}: ${volumepercentage[${j}]}% used "
else volumeok[${j}]="${volumenames[$j]}: ${volumepercentage[${j}]}% used "
fi
let j++
else
let j++
fi
done
if [ ${#volumewarning[@]} -gt 0 ] && [ ${#volumecritical[@]} -lt 1 ]
then echo "WARNING ${volumewarning[@]}"; exit ${STATE_WARNING}
elif [ ${#volumecritical[@]} -ge 1 ]
then echo "CRITICAL ${volumecritical[@]}"; exit ${STATE_CRITICAL}
else
echo "OK ${volumeok[*]}"; exit ${STATE_OK}
fi
# Output without thresholds
else
j=0
while [ ${j} -le ${realvolumescount} ]
do
if [ ${volumenames[${j}]} ]
then
volumefinaloutput[${j}]="${volumenames[$j]}: ${volumepercentage[${j}]}% used "
let j++
else
let j++
fi
done
echo "${volumefinaloutput[*]}"
exit ${STATE_OK}
fi
;;
esac
echo "UNKNOWN: should never reach this part"
exit ${STATE_UNKNOWN}