Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 347 lines (280 sloc) 11.595 kb
1b7fe15 @mzupan first import. Connect working
mzupan authored
1 #!/usr/bin/env python
2
3 #
e3e46fc @mzupan added in the usage
mzupan authored
4 # A MongoDB Nagios check script
5 #
e1dc407 changed the argument parser
Frank Brandewiede authored
6 # Script idea taken from Mike Zupans check_mongodb.py. Special thanks to Mike for fixing problems within minutes,
11756aa typo...
Frank Brandewiede authored
7 # also for being up all sorts of crazy hours ;-)
8 # And thanks to Travel IQ < http://www.travel-iq.com http://www.hotelauskunft.de > best flight and hotelsearch of the world.
1b7fe15 @mzupan first import. Connect working
mzupan authored
9 #
e1dc407 changed the argument parser
Frank Brandewiede authored
10 # Contributer of this fork
11756aa typo...
Frank Brandewiede authored
11 # - Frank Brandewiede <brande@travel-iq.com> <brande@bfiw.de> <brande@novolab.de>
e1dc407 changed the argument parser
Frank Brandewiede authored
12 #
13 #
14 # Last changes (11.10.2010): - changes argument parser and error handling
15 # - added check for replsets
c0c57c7 changed argument parser and error handling, added check for replsets,…
Frank Brandewiede authored
16 # (14.10.2010) - added flushing check from Mike Zupans
1b7fe15 @mzupan first import. Connect working
mzupan authored
17 #
18 #
e3e46fc @mzupan added in the usage
mzupan authored
19 # USAGE
20 #
21 # See the README.md
22 #
1b7fe15 @mzupan first import. Connect working
mzupan authored
23
24 import os
e1dc407 changed the argument parser
Frank Brandewiede authored
25 import re
1b7fe15 @mzupan first import. Connect working
mzupan authored
26 import sys
27 import getopt
28 import time
e1dc407 changed the argument parser
Frank Brandewiede authored
29 import optparse
30 import string
1b7fe15 @mzupan first import. Connect working
mzupan authored
31
32 try:
33 import pymongo
34 except:
35 print "need to install pymongo"
36 sys.exit(2)
37
38 def usage():
e3e46fc @mzupan added in the usage
mzupan authored
39 print
40 print "%s -H host -A action -W warning -C critical" % sys.argv[0]
41 print
42 print "Below are the following flags you can use"
43 print
44 print " -H : The hostname you want to connect to"
45 print " -A : The action you want to take"
46 print " - replication_lag : checks the replication lag"
47 print " - connections : checks the percentage of free connections"
48 print " - connect: can we connect to the mongodb server"
2260578 @mzupan added memory checking
mzupan authored
49 print " - memory: checks the resident memory used by mongodb in gigabytes"
ca4660f @mzupan added a lock percentage check
mzupan authored
50 print " - lock: checks percentage of lock time for the server"
180c06b added flushing check
Frank Brandewiede authored
51 print " - flushing: checks the average flush time the server"
52 print " - replset_state: State of the node within a replset configuration"
e3e46fc @mzupan added in the usage
mzupan authored
53 print " -W : The warning threshold we want to set"
54 print " -C : The critical threshold we want to set"
55 print
e1dc407 changed the argument parser
Frank Brandewiede authored
56 print
57
58 def main(argv):
59
60 if len(argv) == 0:
61 usage()
62 sys.exit(2)
63
64 p = optparse.OptionParser(conflict_handler="resolve", description=\
65 "This Nagios plugin checks the health of mongodb. ")
66
67 p.add_option('-H', '--host', action='store', type='string', dest='host', default='127.0.0.1', help=' -H : The hostname you want to connect to')
68 p.add_option('-P', '--port', action='store', type='string', dest='port', default='27017', help=' -P : The port mongodb is runnung on')
69 p.add_option('-W', '--warning', action='store', type='string', dest='warning', default='2', help=' -W : The warning threshold we want to set')
70 p.add_option('-C', '--critical', action='store', type='string', dest='critical', default='5', help=' -C : The critical threshold we want to set')
71 p.add_option('-A', '--action', action='store', type='string', dest='action', default='connect', help=' -A : The action you want to take')
72 options, arguments = p.parse_args()
73
74 host = options.host
75 port_string = options.port
76 warning_string = options.warning
77 critical_string = options.critical
78 action = options.action
79
80 sregex = re.compile('[a-zA-Z]+')
81
82 sresult = sregex.search(port_string)
83 if sresult:
84 port = 27017
85 else:
86 port = int(port_string)
87
88 sresult = sregex.search(warning_string)
89 if sresult:
90 warning = 2
91 else:
92 warning = int(warning_string)
93
94 sresult = sregex.search(critical_string)
95 if sresult:
96 critical = 5
97 else:
98 critical = int(critical_string)
99
100 if action == "connections":
101 check_connections(host, port, warning, critical)
102 elif action == "replication_lag":
103 check_rep_lag(host, port, warning, critical)
104 elif action == "replset_state":
105 check_replset_state(host, port)
106 elif action == "memory":
107 check_memory(host, port, warning, critical)
108 elif action == "lock":
109 check_lock(host, port, warning, critical)
180c06b added flushing check
Frank Brandewiede authored
110 elif action == "flushing":
111 check_flushing(host, port, warning, critical)
e1dc407 changed the argument parser
Frank Brandewiede authored
112 else:
113 check_connect(host, port, warning, critical)
1b7fe15 @mzupan first import. Connect working
mzupan authored
114
115
0b3253a @mzupan added a connection free check
mzupan authored
116 def check_connect(host, port, warning, critical):
1b7fe15 @mzupan first import. Connect working
mzupan authored
117 try:
118 start = time.time()
cb226c6 @mzupan made it ok for the driver to connect to the slave
mzupan authored
119 con = pymongo.Connection(host, port, slave_okay=True, network_timeout=critical)
1b7fe15 @mzupan first import. Connect working
mzupan authored
120
121 conn_time = time.time() - start
122 conn_time = round(conn_time, 0)
123
124 if conn_time >= warning:
125 print "WARNING - Connection took %i seconds" % int(conn_time)
126 sys.exit(1)
127 elif conn_time >= critical:
128 print "CRITICAL - Connection took %i seconds" % int(conn_time)
129 sys.exit(2)
130
131 print "OK - Connection accepted"
132 sys.exit(0)
133 except pymongo.errors.ConnectionFailure:
134 print "CRITICAL - Connection to MongoDB failed!"
135 sys.exit(2)
0b3253a @mzupan added a connection free check
mzupan authored
136
180c06b added flushing check
Frank Brandewiede authored
137
0b3253a @mzupan added a connection free check
mzupan authored
138 def check_connections(host, port, warning, critical):
139 try:
cb226c6 @mzupan made it ok for the driver to connect to the slave
mzupan authored
140 con = pymongo.Connection(host, port, slave_okay=True)
8e66f44 @mzupan some reason version is reporting funny.. so using a try/except catch.…
mzupan authored
141 try:
7fc6f37 @mzupan fixed up the checks to support pymongo 1.8+
mzupan authored
142 data = con.admin.command(pymongo.son_manipulator.SON([('serverStatus', 1), ('repl', 1)]))
8e66f44 @mzupan some reason version is reporting funny.. so using a try/except catch.…
mzupan authored
143 except:
7fc6f37 @mzupan fixed up the checks to support pymongo 1.8+
mzupan authored
144 data = con.admin.command(pymongo.son.SON([('serverStatus', 1), ('repl', 1)]))
145
0b3253a @mzupan added a connection free check
mzupan authored
146 current = float(data['connections']['current'])
147 available = float(data['connections']['available'])
148
149 left_percent = int(float(current / available) * 100)
150
151 if left_percent >= critical:
e1dc407 changed the argument parser
Frank Brandewiede authored
152 print "CRITICAL - %i percent \(%i of %i connections\) used" % (left_percent, current, available)
0b3253a @mzupan added a connection free check
mzupan authored
153 sys.exit(2)
154 elif left_percent >= warning:
e1dc407 changed the argument parser
Frank Brandewiede authored
155 print "WARNING - %i percent \(%i of %i connections\) used" % (left_percent, current, available)
0b3253a @mzupan added a connection free check
mzupan authored
156 sys.exit(1)
157 else:
e1dc407 changed the argument parser
Frank Brandewiede authored
158 print "OK - %i percent \(%i of %i connections\) used" % (left_percent, current, available)
0b3253a @mzupan added a connection free check
mzupan authored
159 sys.exit(0)
160
161 except pymongo.errors.ConnectionFailure:
162 print "CRITICAL - Connection to MongoDB failed!"
163 sys.exit(2)
164
17ebe04 @mzupan added a replication lag check
mzupan authored
165 def check_rep_lag(host, port, warning, critical):
166 try:
cb226c6 @mzupan made it ok for the driver to connect to the slave
mzupan authored
167 con = pymongo.Connection(host, port, slave_okay=True)
17ebe04 @mzupan added a replication lag check
mzupan authored
168
de3f28b fixed up the replication lag thanks to Sam Perman
Frank Brandewiede authored
169 isMasterStatus = con.admin.command("ismaster", "1")
170 if not isMasterStatus['ismaster']:
171 print "OK - This is a slave."
17ebe04 @mzupan added a replication lag check
mzupan authored
172 sys.exit(0)
173
de3f28b fixed up the replication lag thanks to Sam Perman
Frank Brandewiede authored
174 masterOpLog = con.local['oplog.rs']
175 lastMasterOpTime = masterOpLog.find_one(sort=[('$natural', -1)])['ts'].time
176 slaves = con.local.slaves.find()
177
178 lag = 0
179 for slave in slaves:
180 lastSlaveOpTime = slave['syncedTo'].time
181 replicationLag = lastMasterOpTime - lastSlaveOpTime
182 lag = max(lag, replicationLag)
183
184 if lag >= critical:
185 print "CRITICAL - Replication lag: %i" % lag
186 sys.exit(2)
187 elif lag >= warning:
188 print "WARNING - Replication lag: %i" % lag
189 sys.exit(1)
190 else:
191 print "OK - Replication lag: %i" % lag
192 sys.exit(0)
193
17ebe04 @mzupan added a replication lag check
mzupan authored
194
195 except pymongo.errors.ConnectionFailure:
196 print "CRITICAL - Connection to MongoDB failed!"
197 sys.exit(2)
e1dc407 changed the argument parser
Frank Brandewiede authored
198
de3f28b fixed up the replication lag thanks to Sam Perman
Frank Brandewiede authored
199
2260578 @mzupan added memory checking
mzupan authored
200 def check_memory(host, port, warning, critical):
201 try:
202 con = pymongo.Connection(host, port, slave_okay=True)
203
204 try:
ca4660f @mzupan added a lock percentage check
mzupan authored
205 data = con.admin.command(pymongo.son_manipulator.SON([('serverStatus', 1)]))
2260578 @mzupan added memory checking
mzupan authored
206 except:
ca4660f @mzupan added a lock percentage check
mzupan authored
207 data = con.admin.command(pymongo.son.SON([('serverStatus', 1)]))
2260578 @mzupan added memory checking
mzupan authored
208
209 #
210 # convert to gigs
211 #
212 mem = float(data['mem']['resident']) / 1000.0
213
214 warning = float(warning)
215 critical = float(critical)
216
217 if mem >= critical:
e1dc407 changed the argument parser
Frank Brandewiede authored
218 print "CRITICAL - Memory Usage: %f GByte" % mem
2260578 @mzupan added memory checking
mzupan authored
219 sys.exit(2)
220 elif mem >= warning:
e1dc407 changed the argument parser
Frank Brandewiede authored
221 print "WARNING - Memory Usage: %f GByte" % mem
2260578 @mzupan added memory checking
mzupan authored
222 sys.exit(1)
223 else:
e1dc407 changed the argument parser
Frank Brandewiede authored
224 print "OK - Memory Usage: %f GByte" % mem
2260578 @mzupan added memory checking
mzupan authored
225 sys.exit(0)
226
227
228 except pymongo.errors.ConnectionFailure:
229 print "CRITICAL - Connection to MongoDB failed!"
230 sys.exit(2)
ca4660f @mzupan added a lock percentage check
mzupan authored
231
232
233 def check_lock(host, port, warning, critical):
234 try:
235 con = pymongo.Connection(host, port, slave_okay=True)
236
237 try:
238 data = con.admin.command(pymongo.son_manipulator.SON([('serverStatus', 1)]))
239 except:
240 data = con.admin.command(pymongo.son.SON([('serverStatus', 1)]))
241
242 #
243 # convert to gigs
244 #
245 lock = float(data['globalLock']['lockTime']) / float(data['globalLock']['totalTime'])
246
247 warning = float(warning)
248 critical = float(critical)
249
250 if lock >= critical:
251 print "CRITICAL - Lock Percentage: %s" % ("%.2f" % round(lock,2))
252 sys.exit(2)
253 elif lock >= warning:
254 print "WARNING - Lock Percentage: %s" % ("%.2f" % round(lock,2))
255 sys.exit(1)
256 else:
257 print "OK - Lock Percentage: %s" % ("%.2f" % round(lock,2))
258 sys.exit(0)
259
260
261 except pymongo.errors.ConnectionFailure:
262 print "CRITICAL - Connection to MongoDB failed!"
263 sys.exit(2)
180c06b added flushing check
Frank Brandewiede authored
264
265
266 def check_flushing(host, port, warning, critical):
267 try:
268 con = pymongo.Connection(host, port, slave_okay=True)
269
270 try:
271 data = con.admin.command(pymongo.son_manipulator.SON([('serverStatus', 1)]))
272 except:
273 data = con.admin.command(pymongo.son.SON([('serverStatus', 1)]))
274
275 avg_flush = float(data['backgroundFlushing']['average_ms'])
276
277 warning = float(warning)
278 critical = float(critical)
279
280 if avg_flush >= critical:
281 print "CRITICAL - Avg Flush Time: %sms" % ("%.2f" % round(avg_flush,2))
282 sys.exit(2)
283 elif avg_flush >= warning:
284 print "WARNING - Avg Flush Time: %sms" % ("%.2f" % round(avg_flush,2))
285 sys.exit(1)
286 else:
287 print "OK - Avg Flush Time: %sms" % ("%.2f" % round(avg_flush,2))
288 sys.exit(0)
289
290
291 except pymongo.errors.ConnectionFailure:
292 print "CRITICAL - Connection to MongoDB failed!"
293 sys.exit(2)
294
295
296 def check_replset_state(host, port):
297 try:
298 con = pymongo.Connection(host, port, slave_okay=True)
299
300 try:
301 data = con.admin.command(pymongo.son_manipulator.SON([('replSetGetStatus', 1)]))
302 except:
303 data = con.admin.command(pymongo.son.SON([('replSetGetStatus', 1)]))
304
305 state = int(data['myState'])
306
307 if state == 8:
308 print "CRITICAL - State: %i \(Down\)" % state
309 sys.exit(2)
310 elif state == 4:
311 print "CRITICAL - State: %i \(Fatal error\)" % state
312 sys.exit(2)
313 elif state == 0:
314 print "WARNING - State: %i \(Starting up, phase1\)" % state
315 sys.exit(1)
316 elif state == 3:
317 print "WARNING - State: %i \(Recovering\)" % state
318 sys.exit(1)
319 elif state == 5:
320 print "WARNING - State: %i \(Starting up, phase2\)" % state
321 sys.exit(1)
322 elif state == 1:
323 print "OK - State: %i \(Primary\)" % state
324 sys.exit(0)
325 elif state == 2:
326 print "OK - State: %i \(Secondary\)" % state
327 sys.exit(0)
328 elif state == 7:
329 print "OK - State: %i \(Arbiter\)" % state
330 sys.exit(0)
331 else:
332 print "CRITICAL - State: %i \(Unknown state\)" % state
333 sys.exit(2)
ca4660f @mzupan added a lock percentage check
mzupan authored
334
180c06b added flushing check
Frank Brandewiede authored
335
336 except pymongo.errors.ConnectionFailure:
337 print "CRITICAL - Connection to MongoDB failed!"
338 sys.exit(2)
339
ca4660f @mzupan added a lock percentage check
mzupan authored
340
341
1b7fe15 @mzupan first import. Connect working
mzupan authored
342 #
343 # main app
344 #
345 if __name__ == "__main__":
e1dc407 changed the argument parser
Frank Brandewiede authored
346 main(sys.argv[1:])
Something went wrong with that request. Please try again.