Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

check_rhcs users socket.gethostname(), must be able to specify alternative #2

Open
tomas-edwardsson opened this issue Dec 21, 2012 · 1 comment

Comments

@tomas-edwardsson
Copy link
Contributor

sometime node names are different than hostname in cluster.conf

@tomas-edwardsson
Copy link
Contributor Author

Patch

--- /tmp/check_rhcs 2012-12-21 15:19:18.750913010 +0000
+++ /usr/lib64/nagios/plugins/check_rhcs    2012-12-21 15:28:59.380975106 +0000
@@ -68,15 +68,14 @@
     return clusterElement.attributes['name'].value


-def getLocalNodeState(dom):
+def getLocalNodeState(dom, hostname):
     """
     Get the state of the local node
     """
-    hostname = socket.gethostname()
     nodesList = dom.getElementsByTagName('node')
     nodeState = {}
+

-    print hostname
     for node in nodesList:
         if node.attributes['name'].value == hostname:
             nodeState['name'] = node.attributes['name'].value
@@ -91,12 +90,11 @@
     return nodeState


-def getServiceState(dom, service):
+def getServiceState(dom, service, hostname):
     """ 
     Get the state of the named service
     """
     groupList = dom.getElementsByTagName('group')
-    hostname = socket.gethostname()
     serviceState = {}
     for group in groupList:
         if group.attributes['name'].value in (service,"service:"+service,"vm:"+service):
@@ -109,15 +107,18 @@

 def main():
     try:
-        opts, args = getopt.getopt(sys.argv[1:], 's:cZh', ['service=', 'cluster', 'supsended', 'help'])
+        opts, args = getopt.getopt(sys.argv[1:], 's:cZhn:', ['service=', 'cluster', 'supsended', 'help', 'nodename'])
     except getopt.GetoptError:
         usage()
         sys.exit(2)

     check_suspend = False
+    nodeName = None
     for o, a in opts:
         if o in ('-c', '--cluster'):
             typeCheck = 'cluster'
+        if o in ('-n', '--nodename'):
+            nodeName = a
         if o in ('-s', '--service'):
             typeCheck = 'service'
             serviceName = a
@@ -147,7 +148,7 @@

         # Now we find the status of the local node from clustat.
         # We only care about the local state since this way we can tie the alert to the host.
-        nodeStates = getLocalNodeState(dom) 
+        nodeStates = getLocalNodeState(dom, nodeName or socket.gethostname()) 
         if nodeStates['state'] != "1":
             print "WARNING: Local node state is offline!"
             sys.exit(1)
@@ -159,7 +160,7 @@
             sys.exit(0)

     elif typeCheck == 'service':
-        serviceState = getServiceState(dom, serviceName)
+        serviceState = getServiceState(dom, serviceName, nodeName or socket.gethostname())
         if serviceState['state'] != 'started':
             print "CRITICAL: Service " + serviceName + " on " + serviceState['owner'] + " is in " + serviceState['state'] + " state"
             sys.exit(2)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant