You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
--- /tmp/check_rhcs 2012-12-21 15:19:18.750913010 +0000
+++ /usr/lib64/nagios/plugins/check_rhcs 2012-12-21 15:28:59.380975106 +0000
@@ -68,15 +68,14 @@
return clusterElement.attributes['name'].value
-def getLocalNodeState(dom):
+def getLocalNodeState(dom, hostname):
"""
Get the state of the local node
"""
- hostname = socket.gethostname()
nodesList = dom.getElementsByTagName('node')
nodeState = {}
+
- print hostname
for node in nodesList:
if node.attributes['name'].value == hostname:
nodeState['name'] = node.attributes['name'].value
@@ -91,12 +90,11 @@
return nodeState
-def getServiceState(dom, service):
+def getServiceState(dom, service, hostname):
"""
Get the state of the named service
"""
groupList = dom.getElementsByTagName('group')
- hostname = socket.gethostname()
serviceState = {}
for group in groupList:
if group.attributes['name'].value in (service,"service:"+service,"vm:"+service):
@@ -109,15 +107,18 @@
def main():
try:
- opts, args = getopt.getopt(sys.argv[1:], 's:cZh', ['service=', 'cluster', 'supsended', 'help'])
+ opts, args = getopt.getopt(sys.argv[1:], 's:cZhn:', ['service=', 'cluster', 'supsended', 'help', 'nodename'])
except getopt.GetoptError:
usage()
sys.exit(2)
check_suspend = False
+ nodeName = None
for o, a in opts:
if o in ('-c', '--cluster'):
typeCheck = 'cluster'
+ if o in ('-n', '--nodename'):
+ nodeName = a
if o in ('-s', '--service'):
typeCheck = 'service'
serviceName = a
@@ -147,7 +148,7 @@
# Now we find the status of the local node from clustat.
# We only care about the local state since this way we can tie the alert to the host.
- nodeStates = getLocalNodeState(dom)
+ nodeStates = getLocalNodeState(dom, nodeName or socket.gethostname())
if nodeStates['state'] != "1":
print "WARNING: Local node state is offline!"
sys.exit(1)
@@ -159,7 +160,7 @@
sys.exit(0)
elif typeCheck == 'service':
- serviceState = getServiceState(dom, serviceName)
+ serviceState = getServiceState(dom, serviceName, nodeName or socket.gethostname())
if serviceState['state'] != 'started':
print "CRITICAL: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state"
sys.exit(2)
sometime node names are different than hostname in cluster.conf
The text was updated successfully, but these errors were encountered: