From a5d038246a5bd4d9f9b35f742c10ab166d81031e Mon Sep 17 00:00:00 2001 From: Romain LE DISEZ Date: Tue, 27 Oct 2015 15:49:52 +0100 Subject: [PATCH 01/16] Fix initial_state mapping for hosts The initial_state "u" is declared twice, for UP and UNREACHABLE. Documentation says that UP state is "o". --- shinken/objects/host.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shinken/objects/host.py b/shinken/objects/host.py index 70e5c26be7..3e160034a0 100644 --- a/shinken/objects/host.py +++ b/shinken/objects/host.py @@ -643,7 +643,7 @@ class Host(SchedulingItem): def set_initial_state(self): mapping = { - "u": { + "o": { "state": "UP", "state_id": 0 }, From 67b1debccea049aff2aa3904725559abd28e11b3 Mon Sep 17 00:00:00 2001 From: KTI - Richard Clark Date: Fri, 20 Nov 2015 09:07:07 -0800 Subject: [PATCH 02/16] Documentation : Remove mention of shinken-admin from FAQ section --- .../troubleshooting-shinken.rst | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/doc/source/10_troubleshooting/troubleshooting-shinken.rst b/doc/source/10_troubleshooting/troubleshooting-shinken.rst index d3076a17e5..bc31f20b79 100644 --- a/doc/source/10_troubleshooting/troubleshooting-shinken.rst +++ b/doc/source/10_troubleshooting/troubleshooting-shinken.rst @@ -26,9 +26,9 @@ Frequently asked questions --------------------------- * :ref:`How to set my daemons in debug mode to review the logs? ` - * :ref:`I am getting an OSError read-only filesystem ` - * :ref:`I am getting an OSError [Errno 24] Too many open files ` - * :ref:`Notification emails have generic-host instead of host_name ` + * :ref:`I am getting an OSError read-only filesystem ` + * :ref:`I am getting an OSError [Errno 24] Too many open files ` + * :ref:`Notification emails have generic-host instead of host_name ` @@ -92,16 +92,6 @@ The log file will contain information on the Shinken process and any problems th .. _troubleshooting/troubleshooting-shinken#FAQ-2: -Changing the log level during runtime -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -shinken-admin is a command line script that can change the logging level of a running daemon. - -''linux-server# ./shinken-admin ...'' - - -.. _troubleshooting/troubleshooting-shinken#FAQ-3: - Changing the log level in the configuration ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -112,7 +102,7 @@ Possible values: DEBUG,INFO,WARNING,ERROR,CRITICAL Re-start the Shinken process. -.. _troubleshooting/troubleshooting-shinken#FAQ-4: +.. _troubleshooting/troubleshooting-shinken#FAQ-3: OSError read-only filesystem error ---------------------------------- @@ -125,7 +115,7 @@ Execute a 'mount' and verify if /tmp or /tmpfs is set to 'ro' (Read-only). As root modify your /etc/fstab to set the filesystem to read-write. -.. _troubleshooting/troubleshooting-shinken#FAQ-5: +.. _troubleshooting/troubleshooting-shinken#FAQ-4: OSError too many files open --------------------------- @@ -146,7 +136,7 @@ This typically changing a system wide file limit and potentially user specific f ulimit -n xxxxx now -.. _troubleshooting/troubleshooting-shinken#FAQ-6: +.. _troubleshooting/troubleshooting-shinken#FAQ-5: Notification emails have generic-host instead of host_name ---------------------------------------------------------- From ec772e1cd2cd7dac6d109ec95c5be24f1bbba814 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gab=C3=A8s=20Jean?= Date: Thu, 26 Nov 2015 11:09:07 +0100 Subject: [PATCH 03/16] Add: better logging for regenerator (hunting missing service after some times in livestatus) Add: first version for a online profiling system, not enabled currently --- shinken/daemon.py | 12 ++++- shinken/misc/regenerator.py | 99 +++++++++++++++++-------------------- shinken/profilermgr.py | 63 +++++++++++++++++++++++ 3 files changed, 118 insertions(+), 56 deletions(-) create mode 100644 shinken/profilermgr.py diff --git a/shinken/daemon.py b/shinken/daemon.py index c6fd78cca3..7944229aa6 100644 --- a/shinken/daemon.py +++ b/shinken/daemon.py @@ -55,7 +55,7 @@ from shinken.property import StringProp, BoolProp, PathProp, ConfigPathProp, IntegerProp,\ LogLevelProp from shinken.misc.common import setproctitle - +from shinken.profilermgr import profiler try: import pwd @@ -114,6 +114,14 @@ def ping(self): ping.need_lock = False ping.doc = doc + doc = 'Profiling data' + def profiling_data(self): + return profiler.output_stats() + profiling_data.need_lock = False + profiling_data.doc = doc + + + doc = 'Get the start time of the daemon' def get_start_time(self): return self.start_time @@ -682,6 +690,8 @@ def do_daemon_init_and_start(self, use_pyro=True, fake=False): self.http_thread.daemon = True self.http_thread.start() + #profiler.start() + # TODO: we do not use pyro anymore, change the function name.... def setup_pyro_daemon(self): diff --git a/shinken/misc/regenerator.py b/shinken/misc/regenerator.py index 0932e32803..9b8bafb2b4 100755 --- a/shinken/misc/regenerator.py +++ b/shinken/misc/regenerator.py @@ -42,6 +42,7 @@ from shinken.objects.receiverlink import ReceiverLink, ReceiverLinks from shinken.util import safe_print from shinken.message import Message +from shinken.log import logger # Class for a Regenerator. It will get broks, and "regenerate" real objects @@ -107,7 +108,7 @@ def load_from_scheduler(self, sched): self.manage_program_status_brok(b) # Now we will lie and directly map our objects :) - print "Regenerator::load_from_scheduler" + logger.debug("Regenerator::load_from_scheduler") self.hosts = c.hosts self.services = c.services self.notificationways = c.notificationways @@ -146,9 +147,17 @@ def manage_brok(self, brok): manage = getattr(self, 'manage_' + brok.type + '_brok', None) # If we can and want it, got for it :) if manage and self.want_brok(brok): + if brok.type not in ('service_next_schedule', 'host_next_schedule', 'service_check_result', + 'host_check_result', + 'update_service_status', 'update_host_status', 'update_poller_status', + 'update_broker_status', + 'update_receiver_status', + 'update_scheduler_status'): + logger.debug('REGEN: manage brok %s:%s' % (brok.type, brok.id)) return manage(brok) + def update_element(self, e, data): for prop in data: setattr(e, prop, data[prop]) @@ -159,16 +168,15 @@ def all_done_linking(self, inst_id): # In a scheduler we are already "linked" so we can skip this if self.in_scheduler_mode: - safe_print("Regenerator: We skip the all_done_linking phase " - "because we are in a scheduler") + logger.debug("Regenerator: We skip the all_done_linking phase because we are in a scheduler") return start = time.time() - safe_print("In ALL Done linking phase for instance", inst_id) + logger.debug("In ALL Done linking phase for instance %s" % inst_id) # check if the instance is really defined, so got ALL the # init phase if inst_id not in self.configs.keys(): - safe_print("Warning: the instance %d is not fully given, bailout" % inst_id) + logger.debug("Warning: the instance %d is not fully given, bailout" % inst_id) return # Try to load the in progress list and make them available for @@ -180,7 +188,7 @@ def all_done_linking(self, inst_id): inp_services = self.inp_services[inst_id] inp_servicegroups = self.inp_servicegroups[inst_id] except Exception, exp: - print "Warning all done: ", exp + logger.error("[Regen] Warning all done: %s" % exp) return # Link HOSTGROUPS with hosts @@ -349,7 +357,7 @@ def all_done_linking(self, inst_id): else: # else take the new one self.contactgroups.add_item(inpcg) - safe_print("ALL LINKING TIME" * 10, time.time() - start) + logger.debug("[Regen] ALL LINKING TIME %s" % (time.time() - start)) # clean old objects del self.inp_hosts[inst_id] @@ -442,6 +450,7 @@ def linkify_dict_srv_and_hosts(self, o, prop): new_v.append(h) setattr(o, prop, new_v) + def linkify_host_and_hosts(self, o, prop): v = getattr(o, prop) @@ -475,7 +484,7 @@ def before_after_hook(self, brok, obj): def manage_program_status_brok(self, b): data = b.data c_id = data['instance_id'] - safe_print("Regenerator: Creating config:", c_id) + logger.debug("[Regen] Creating config: %s" % c_id) # We get a real Conf object ,adn put our data c = Config() @@ -495,25 +504,25 @@ def manage_program_status_brok(self, b): # Clean the old "hard" objects # We should clean all previously added hosts and services - safe_print("Clean hosts/service of", c_id) + logger.debug("Clean hosts/service of %s" % c_id) to_del_h = [h for h in self.hosts if h.instance_id == c_id] to_del_srv = [s for s in self.services if s.instance_id == c_id] - safe_print("Cleaning host:%d srv:%d" % (len(to_del_h), len(to_del_srv))) + logger.debug("Cleaning host:%d srv:%d" % (len(to_del_h), len(to_del_srv))) # Clean hosts from hosts and hostgroups for h in to_del_h: - safe_print("Deleting", h.get_name()) + logger.debug("Deleting %s" % h.get_name()) del self.hosts[h.id] # Now clean all hostgroups too for hg in self.hostgroups: - safe_print("Cleaning hostgroup %s:%d" % (hg.get_name(), len(hg.members))) + logger.debug("Cleaning hostgroup %s:%d" % (hg.get_name(), len(hg.members))) # Exclude from members the hosts with this inst_id hg.members = [h for h in hg.members if h.instance_id != c_id] - safe_print("Len after", len(hg.members)) + logger.debug("Len after clean %s" % len(hg.members)) for s in to_del_srv: - safe_print("Deleting", s.get_full_name()) + logger.debug("Deleting %s" % s.get_full_name()) del self.services[s.id] # Now clean service groups @@ -531,9 +540,9 @@ def manage_initial_host_status_brok(self, b): try: inp_hosts = self.inp_hosts[inst_id] except Exception, exp: # not good. we will cry in theprogram update - print "Not good!", exp + logger.error("[Regen] host_check_result:: Not good! %s" % exp) return - # safe_print("Creating a host: %s in instance %d" % (hname, inst_id)) + # logger.debug("Creating a host: %s in instance %d" % (hname, inst_id)) h = Host({}) self.update_element(h, data) @@ -557,10 +566,10 @@ def manage_initial_hostgroup_status_brok(self, b): try: inp_hostgroups = self.inp_hostgroups[inst_id] except Exception, exp: # not good. we will cry in theprogram update - print "Not good!", exp + logger.error("[regen] host_check_result:: Not good! %s" % exp) return - safe_print("Creating a hostgroup: %s in instance %d" % (hgname, inst_id)) + logger.debug("Creating a hostgroup: %s in instance %d" % (hgname, inst_id)) # With void members hg = Hostgroup([]) @@ -583,9 +592,9 @@ def manage_initial_service_status_brok(self, b): try: inp_services = self.inp_services[inst_id] except Exception, exp: # not good. we will cry in theprogram update - print "Not good!", exp + logger.error("[Regen] host_check_result Not good! %s" % exp) return - # safe_print("Creating a service: %s/%s in instance %d" % (hname, sdesc, inst_id)) + # logger.debug("Creating a service: %s/%s in instance %d" % (hname, sdesc, inst_id)) s = Service({}) self.update_element(s, data) @@ -609,10 +618,10 @@ def manage_initial_servicegroup_status_brok(self, b): try: inp_servicegroups = self.inp_servicegroups[inst_id] except Exception, exp: # not good. we will cry in theprogram update - print "Not good!", exp + logger.error("[Regen] manage_initial_servicegroup_status_brok:: Not good! %s" % exp) return - safe_print("Creating a servicegroup: %s in instance %d" % (sgname, inst_id)) + logger.debug("Creating a servicegroup: %s in instance %d" % (sgname, inst_id)) # With void members sg = Servicegroup([]) @@ -632,12 +641,11 @@ def manage_initial_servicegroup_status_brok(self, b): def manage_initial_contact_status_brok(self, b): data = b.data cname = data['contact_name'] - safe_print("Contact with data", data) + c = self.contacts.find_by_name(cname) if c: self.update_element(c, data) else: - safe_print("Creating Contact:", cname) c = Contact({}) self.update_element(c, data) self.contacts.add_item(c) @@ -652,13 +660,12 @@ def manage_initial_contact_status_brok(self, b): # Same than for contacts. We create or # update nws = c.notificationways - safe_print("Got notif ways", nws) new_notifways = [] for cnw in nws: nwname = cnw.notificationway_name nw = self.notificationways.find_by_name(nwname) if not nw: - safe_print("Creating notif way", nwname) + logger.debug("Creating notif way %s" % nwname) nw = NotificationWay([]) self.notificationways.add_item(nw) # Now update it @@ -690,10 +697,10 @@ def manage_initial_contactgroup_status_brok(self, b): try: inp_contactgroups = self.inp_contactgroups[inst_id] except Exception, exp: # not good. we will cry in theprogram update - print "Not good!", exp + logger.error("[Regen] manage_initial_contactgroup_status_brok Not good! %s" % exp) return - safe_print("Creating an contactgroup: %s in instance %d" % (cgname, inst_id)) + logger.debug("Creating an contactgroup: %s in instance %d" % (cgname, inst_id)) # With void members cg = Contactgroup([]) @@ -746,66 +753,49 @@ def manage_initial_command_status_brok(self, b): def manage_initial_scheduler_status_brok(self, b): data = b.data scheduler_name = data['scheduler_name'] - print "Creating Scheduler:", scheduler_name, data sched = SchedulerLink({}) - print "Created a new scheduler", sched self.update_element(sched, data) - print "Updated scheduler" - # print "CMD:", c self.schedulers[scheduler_name] = sched - print "scheduler added" + def manage_initial_poller_status_brok(self, b): data = b.data poller_name = data['poller_name'] - print "Creating Poller:", poller_name, data poller = PollerLink({}) - print "Created a new poller", poller self.update_element(poller, data) - print "Updated poller" - # print "CMD:", c self.pollers[poller_name] = poller - print "poller added" def manage_initial_reactionner_status_brok(self, b): data = b.data reactionner_name = data['reactionner_name'] - print "Creating Reactionner:", reactionner_name, data reac = ReactionnerLink({}) - print "Created a new reactionner", reac self.update_element(reac, data) - print "Updated reactionner" - # print "CMD:", c self.reactionners[reactionner_name] = reac - print "reactionner added" + def manage_initial_broker_status_brok(self, b): data = b.data broker_name = data['broker_name'] - print "Creating Broker:", broker_name, data + broker = BrokerLink({}) - print "Created a new broker", broker + self.update_element(broker, data) - print "Updated broker" + # print "CMD:", c self.brokers[broker_name] = broker - print "broker added" + def manage_initial_receiver_status_brok(self, b): data = b.data receiver_name = data['receiver_name'] - print "Creating Receiver:", receiver_name, data receiver = ReceiverLink({}) - print "Created a new receiver", receiver self.update_element(receiver, data) - print "Updated receiver" - # print "CMD:", c self.receivers[receiver_name] = receiver - print "receiver added" + @@ -813,7 +803,6 @@ def manage_initial_receiver_status_brok(self, b): # So we got all data, we can link all together :) def manage_initial_broks_done_brok(self, b): inst_id = b.data['instance_id'] - print "Finish the configuration of instance", inst_id self.all_done_linking(inst_id) @@ -834,7 +823,7 @@ def manage_update_program_status_brok(self, b): # Do not ask data too quickly, very dangerous # one a minute if time.time() - self.last_need_data_send > 60 and self.from_q is not None: - print "I ask the broker for instance id data:", c_id + logger.debug("I ask the broker for instance id data: %s" % c_id) msg = Message(id=0, type='NeedData', data={'full_instance_id': c_id}) self.from_q.put(msg) self.last_need_data_send = time.time() @@ -877,7 +866,7 @@ def manage_update_host_status_brok(self, b): # If the topology change, update it if toplogy_change: - print "Topology change for", h.get_name(), h.parent_dependencies + logger.debug("Topology change for %s %s" % (h.get_name(), h.parent_dependencies)) self.linkify_host_and_hosts(h, 'parents') self.linkify_host_and_hosts(h, 'childs') self.linkify_dict_srv_and_hosts(h, 'parent_dependencies') diff --git a/shinken/profilermgr.py b/shinken/profilermgr.py new file mode 100644 index 0000000000..39fbae29e6 --- /dev/null +++ b/shinken/profilermgr.py @@ -0,0 +1,63 @@ +import collections +import signal +import time + + +class Sampler(object): + """ + A simple stack sampler for low-overhead CPU profiling: samples the call + stack every `interval` seconds and keeps track of counts by frame. Because + this uses signals, it only works on the main thread. + """ + def __init__(self, interval=0.005): + self.interval = interval + self._started = None + self._stack_counts = collections.defaultdict(int) + self.nb_sig = 0 + + def start(self): + self._started = time.time() + try: + signal.signal(signal.SIGVTALRM, self._sample) + except ValueError: + raise ValueError('Can only sample on the main thread') + + signal.setitimer(signal.ITIMER_VIRTUAL, self.interval, self.interval) + + + def _sample(self, signum, frame): + stack = [] + while frame is not None: + stack.append(self._format_frame(frame)) + frame = frame.f_back + self.nb_sig += 1 + stack = ';'.join(reversed(stack)) + + self._stack_counts[stack] += 1 + #print "STACK", stack,self._stack_counts[stack], self.nb_sig + #signal.setitimer(signal.ITIMER_REAL, self.interval, 0) + + + def _format_frame(self, frame): + return '{}({})'.format(frame.f_code.co_name, + frame.f_globals.get('__name__')) + + def output_stats(self): + if self._started is None: + return '' + elapsed = time.time() - self._started + lines = ['elapsed {}'.format(elapsed), + 'granularity {}'.format(self.interval)] + ordered_stacks = sorted(self._stack_counts.items(), + key=lambda kv: kv[1], reverse=True) + lines.extend(['{} {}'.format(frame, count) + for frame, count in ordered_stacks]) + return lines + + def reset(self): + self._started = time.time() + self._stack_counts = collections.defaultdict(int) + + +profiler = Sampler() + From 2ce9510e27277faefd9c4bd28533f3f9c9debf93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gab=C3=A8s=20Jean?= Date: Thu, 26 Nov 2015 11:12:19 +0100 Subject: [PATCH 04/16] Add: better logging for regenerator (hunting missing service after some times in livestatus) Add: first version for a online profiling system, not enabled currently --- test/test_external_mapping.py | 1 + test/test_scheduler_init.py | 1 + 2 files changed, 2 insertions(+) diff --git a/test/test_external_mapping.py b/test/test_external_mapping.py index 7a653621e6..0450cb82e0 100644 --- a/test/test_external_mapping.py +++ b/test/test_external_mapping.py @@ -66,6 +66,7 @@ def __setup(self, inputlines): inputfile.close() self.input_filename = inputfile.name + def __cleanup(self): """ Cleanup the temporary files. diff --git a/test/test_scheduler_init.py b/test/test_scheduler_init.py index 0cd921840a..c14771290a 100644 --- a/test/test_scheduler_init.py +++ b/test/test_scheduler_init.py @@ -89,6 +89,7 @@ def test_scheduler_init(self): # notice: set this process master with preexec_fn=os.setsid so when we kill it # it will also kill sons args = ["../bin/shinken-arbiter.py", "-c", daemons_config[Arbiter][0], "-d"] + print "Launching sub arbiter with", args proc = self.arb_proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, preexec_fn=os.setsid) From 56d689a511138171bcd72d43fd56c4e3712bbca6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gab=C3=A8s=20Jean?= Date: Thu, 26 Nov 2015 11:42:15 +0100 Subject: [PATCH 05/16] Fix: pep8 style --- shinken/daemon.py | 2 +- shinken/profilermgr.py | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/shinken/daemon.py b/shinken/daemon.py index 7944229aa6..7f74e73345 100644 --- a/shinken/daemon.py +++ b/shinken/daemon.py @@ -690,7 +690,7 @@ def do_daemon_init_and_start(self, use_pyro=True, fake=False): self.http_thread.daemon = True self.http_thread.start() - #profiler.start() + # profiler.start() # TODO: we do not use pyro anymore, change the function name.... diff --git a/shinken/profilermgr.py b/shinken/profilermgr.py index 39fbae29e6..b2734a3fd4 100644 --- a/shinken/profilermgr.py +++ b/shinken/profilermgr.py @@ -34,8 +34,6 @@ def _sample(self, signum, frame): stack = ';'.join(reversed(stack)) self._stack_counts[stack] += 1 - #print "STACK", stack,self._stack_counts[stack], self.nb_sig - #signal.setitimer(signal.ITIMER_REAL, self.interval, 0) def _format_frame(self, frame): @@ -60,4 +58,3 @@ def reset(self): profiler = Sampler() - From 3f0df53e9a53243b22dd37af48ade849b6238a67 Mon Sep 17 00:00:00 2001 From: Daniel Lawrence Date: Fri, 4 Dec 2015 20:08:04 -0800 Subject: [PATCH 06/16] Allow Shinken to run on new infra Also cache python packages, to speed up the builds. --- .travis.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.travis.yml b/.travis.yml index a3207e4b4f..a98933724f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,11 @@ language: python + +# Allow shinken to run on new travis infra +sudo: False + +# Allow cache of downloads +cache: pip + python: - "2.6" - "2.7" From b7f971357115e6e534822be1da97d6b0ee2b9b16 Mon Sep 17 00:00:00 2001 From: Fakhruddin Hamid Date: Sun, 6 Dec 2015 07:34:42 +0530 Subject: [PATCH 07/16] included option to set sender email --- libexec/notify_by_email.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/libexec/notify_by_email.py b/libexec/notify_by_email.py index 01fbb79ca4..7528819573 100755 --- a/libexec/notify_by_email.py +++ b/libexec/notify_by_email.py @@ -92,7 +92,10 @@ def get_shinken_url(): # Get current process user that will be the mail sender def get_user(): - return '@'.join((getpass.getuser(), socket.gethostname())) + if opts.sender: + return opts.sender + else: + return '@'.join((getpass.getuser(), socket.gethostname())) ############################################################################# @@ -266,6 +269,8 @@ def create_html_message(msg): help='Specify the $_SERVICEFIXACTIONS$ custom macros') group_general.add_option('-r', '--receivers', dest='receivers', help='Mail recipients comma-separated list') + group_general.add_option('-F', '--sender', dest='sender', + help='Sender email address, default is system user') group_general.add_option('-n', '--notification-object', dest='notification_object', type='choice', default='host', choices=['host', 'service'], help='Choose between host or service notification.') group_general.add_option('-S', '--SMTP', dest='smtp', default='localhost', From 21e189fba0a4b6359284ca064175bb7479fd5b2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20MOHIER?= Date: Fri, 11 Dec 2015 04:10:44 +0100 Subject: [PATCH 08/16] Hosts time templates --- etc/templates/time_templates.cfg | 90 +++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 25 deletions(-) diff --git a/etc/templates/time_templates.cfg b/etc/templates/time_templates.cfg index b114d2e0d3..f3afd4210f 100644 --- a/etc/templates/time_templates.cfg +++ b/etc/templates/time_templates.cfg @@ -1,13 +1,13 @@ ############################################################################## ############################################################################## -# -# Different Time Check Interval Services -# +# +# Different Time Check Interval for services and hosts +# ############################################################################## ############################################################################## ############################################################################## -# Purpose of time templates : +# Purpose of services time templates : # Simply define checks behavior of services with time template to avoid # false alerts. # There are three time template type : short, medium, long @@ -22,7 +22,7 @@ define service{ name 5min_short use generic-service max_check_attempts 1 - normal_check_interval 5 + check_interval 5 retry_interval 2 register 0 } @@ -32,7 +32,7 @@ define service{ name 5min_medium use generic-service max_check_attempts 2 - normal_check_interval 5 + check_interval 5 retry_interval 3 register 0 } @@ -42,7 +42,7 @@ define service{ name 5min_long use generic-service max_check_attempts 6 - normal_check_interval 5 + check_interval 5 retry_interval 5 register 0 } @@ -52,7 +52,7 @@ define service{ name 10min_short use generic-service max_check_attempts 1 - normal_check_interval 10 + check_interval 10 retry_interval 5 register 0 } @@ -62,7 +62,7 @@ define service{ name 10min_medium use generic-service max_check_attempts 2 - normal_check_interval 10 + check_interval 10 retry_interval 10 register 0 } @@ -72,7 +72,7 @@ define service{ name 10min_long use generic-service max_check_attempts 6 - normal_check_interval 10 + check_interval 10 retry_interval 10 register 0 } @@ -82,7 +82,7 @@ define service{ name 20min_short use generic-service max_check_attempts 1 - normal_check_interval 20 + check_interval 20 retry_interval 1 register 0 } @@ -92,7 +92,7 @@ define service{ name 20min_medium use generic-service max_check_attempts 2 - normal_check_interval 20 + check_interval 20 retry_interval 20 register 0 } @@ -102,7 +102,7 @@ define service{ name 20min_long use generic-service max_check_attempts 6 - normal_check_interval 20 + check_interval 20 retry_interval 20 register 0 } @@ -112,7 +112,7 @@ define service{ name 30min_short use generic-service max_check_attempts 1 - normal_check_interval 30 + check_interval 30 retry_interval 15 register 0 } @@ -122,7 +122,7 @@ define service{ name 30min_medium use generic-service max_check_attempts 2 - normal_check_interval 30 + check_interval 30 retry_interval 30 register 0 } @@ -132,7 +132,7 @@ define service{ name 30min_long use generic-service max_check_attempts 6 - normal_check_interval 30 + check_interval 30 retry_interval 30 register 0 } @@ -142,7 +142,7 @@ define service{ name 1hour_short use generic-service max_check_attempts 1 - normal_check_interval 60 + check_interval 60 retry_interval 20 register 0 @@ -153,7 +153,7 @@ define service{ name 1hour_medium use generic-service max_check_attempts 2 - normal_check_interval 60 + check_interval 60 retry_interval 60 register 0 @@ -164,7 +164,7 @@ define service{ name 1hour_long use generic-service max_check_attempts 6 - normal_check_interval 60 + check_interval 60 retry_interval 60 register 0 @@ -175,7 +175,7 @@ define service{ name 12hours_short use generic-service max_check_attempts 1 - normal_check_interval 720 + check_interval 720 retry_interval 360 register 0 } @@ -185,7 +185,7 @@ define service{ name 12hours_medium use generic-service max_check_attempts 2 - normal_check_interval 720 + check_interval 720 retry_interval 720 register 0 } @@ -195,7 +195,7 @@ define service{ name 12hours_long use generic-service max_check_attempts 6 - normal_check_interval 720 + check_interval 720 retry_interval 720 register 0 } @@ -205,7 +205,7 @@ define service{ name 1week_short use generic-service max_check_attempts 1 - normal_check_interval 10080 + check_interval 10080 retry_interval 10 register 0 } @@ -215,7 +215,7 @@ define service{ name 1week_medium use generic-service max_check_attempts 2 - normal_check_interval 10080 + check_interval 10080 retry_interval 10080 register 0 } @@ -225,7 +225,47 @@ define service{ name 1week_long use generic-service max_check_attempts 6 - normal_check_interval 10080 + check_interval 10080 retry_interval 10080 register 0 } + +############################################################################## +# Purpose of hosts time templates : +# Simply define checks behavior for hosts with time template to allow more or +# less fast polling. +# There are three time templates: +# - poll_short, every minute with 1 retry +# - poll_medium, let a time period in soft state for service that can have peak load +# - poll_long, let a greater time period in soft state, meant to service where +# great variation and long charge time period are usual. +############################################################################## + +# Check every 1min with immediate hard state +define host{ + name poll_short + use generic-host + max_check_attempts 2 + check_interval 1 + retry_interval 0 + register 0 +} + +define host{ + name poll_medium + use generic-host + max_check_attempts 2 + check_interval 5 + retry_interval 1 + register 0 +} + +define host{ + name poll_long + use generic-host + max_check_attempts 3 + check_interval 15 + retry_interval 3 + register 0 +} + From b96fd1fea52816a4dfa54054c3338fc7be25c912 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20MOHIER?= Date: Fri, 11 Dec 2015 05:00:27 +0100 Subject: [PATCH 09/16] Groups members property --- shinken/objects/contactgroup.py | 1 + shinken/objects/hostgroup.py | 15 ++++++++------- shinken/objects/servicegroup.py | 13 +++++++------ 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/shinken/objects/contactgroup.py b/shinken/objects/contactgroup.py index e32005b114..c39a7e0276 100644 --- a/shinken/objects/contactgroup.py +++ b/shinken/objects/contactgroup.py @@ -42,6 +42,7 @@ class Contactgroup(Itemgroup): properties.update({ 'id': IntegerProp(default=0, fill_brok=['full_status']), 'contactgroup_name': StringProp(fill_brok=['full_status']), + 'contactgroup_members': StringProp(fill_brok=['full_status']), 'alias': StringProp(fill_brok=['full_status']), }) diff --git a/shinken/objects/hostgroup.py b/shinken/objects/hostgroup.py index 98e356bc7e..d91b09e252 100644 --- a/shinken/objects/hostgroup.py +++ b/shinken/objects/hostgroup.py @@ -36,13 +36,14 @@ class Hostgroup(Itemgroup): properties = Itemgroup.properties.copy() properties.update({ - 'id': IntegerProp(default=0, fill_brok=['full_status']), - 'hostgroup_name': StringProp(fill_brok=['full_status']), - 'alias': StringProp(fill_brok=['full_status']), - 'notes': StringProp(default='', fill_brok=['full_status']), - 'notes_url': StringProp(default='', fill_brok=['full_status']), - 'action_url': StringProp(default='', fill_brok=['full_status']), - 'realm': StringProp(default='', fill_brok=['full_status'], + 'id': IntegerProp(default=0, fill_brok=['full_status']), + 'hostgroup_name': StringProp(fill_brok=['full_status']), + 'hostgroup_members': StringProp(fill_brok=['full_status']), + 'alias': StringProp(fill_brok=['full_status']), + 'notes': StringProp(default='', fill_brok=['full_status']), + 'notes_url': StringProp(default='', fill_brok=['full_status']), + 'action_url': StringProp(default='', fill_brok=['full_status']), + 'realm': StringProp(default='', fill_brok=['full_status'], conf_send_preparation=get_obj_name), }) diff --git a/shinken/objects/servicegroup.py b/shinken/objects/servicegroup.py index f11071558b..06fec5cde9 100644 --- a/shinken/objects/servicegroup.py +++ b/shinken/objects/servicegroup.py @@ -36,12 +36,13 @@ class Servicegroup(Itemgroup): properties = Itemgroup.properties.copy() properties.update({ - 'id': IntegerProp(default=0, fill_brok=['full_status']), - 'servicegroup_name': StringProp(fill_brok=['full_status']), - 'alias': StringProp(fill_brok=['full_status']), - 'notes': StringProp(default='', fill_brok=['full_status']), - 'notes_url': StringProp(default='', fill_brok=['full_status']), - 'action_url': StringProp(default='', fill_brok=['full_status']), + 'id': IntegerProp(default=0, fill_brok=['full_status']), + 'servicegroup_name': StringProp(fill_brok=['full_status']), + 'servicegroup_members': StringProp(fill_brok=['full_status']), + 'alias': StringProp(fill_brok=['full_status']), + 'notes': StringProp(default='', fill_brok=['full_status']), + 'notes_url': StringProp(default='', fill_brok=['full_status']), + 'action_url': StringProp(default='', fill_brok=['full_status']), }) macros = { From 45c5139978cc7b0656cbf35eb1c7ca4c4ee56e32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20MOHIER?= Date: Fri, 11 Dec 2015 05:08:45 +0100 Subject: [PATCH 10/16] Add unit tests --- test/test_properties_defaults.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_properties_defaults.py b/test/test_properties_defaults.py index 268d75a184..512890fae0 100644 --- a/test/test_properties_defaults.py +++ b/test/test_properties_defaults.py @@ -278,7 +278,7 @@ class TestContactgroup(PropertiesTester, ShinkenTest): unused_props = [] - without_default = ['contactgroup_name', 'alias'] + without_default = ['contactgroup_name', 'contactgroup_members', 'alias'] properties = dict([ ('members', None), @@ -487,7 +487,7 @@ class TestHostgroup(PropertiesTester, ShinkenTest): unused_props = [] - without_default = ['hostgroup_name', 'alias'] + without_default = ['hostgroup_name', 'hostgroup_members', 'alias'] properties = dict([ ('members', None), @@ -795,7 +795,7 @@ class TestServicegroup(PropertiesTester, ShinkenTest): unused_props = [] - without_default = ['servicegroup_name', 'alias'] + without_default = ['servicegroup_name', 'servicegroup_members', 'alias'] properties = dict([ ('members', None), From fc02d3919ce17419f3bc7dc2fc9a70292a235911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20MOHIER?= Date: Fri, 11 Dec 2015 05:15:24 +0100 Subject: [PATCH 11/16] Fix PEP8 --- shinken/objects/hostgroup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/shinken/objects/hostgroup.py b/shinken/objects/hostgroup.py index d91b09e252..4110721d97 100644 --- a/shinken/objects/hostgroup.py +++ b/shinken/objects/hostgroup.py @@ -44,7 +44,7 @@ class Hostgroup(Itemgroup): 'notes_url': StringProp(default='', fill_brok=['full_status']), 'action_url': StringProp(default='', fill_brok=['full_status']), 'realm': StringProp(default='', fill_brok=['full_status'], - conf_send_preparation=get_obj_name), + conf_send_preparation=get_obj_name), }) macros = { From c01a63936858175086908aed9dc4c61d3e240091 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fr=C3=A9d=C3=A9ric=20MOHIER?= Date: Fri, 11 Dec 2015 05:17:04 +0100 Subject: [PATCH 12/16] Fix PEP8 ! --- shinken/objects/hostgroup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/shinken/objects/hostgroup.py b/shinken/objects/hostgroup.py index 4110721d97..c5e9d438f1 100644 --- a/shinken/objects/hostgroup.py +++ b/shinken/objects/hostgroup.py @@ -43,8 +43,7 @@ class Hostgroup(Itemgroup): 'notes': StringProp(default='', fill_brok=['full_status']), 'notes_url': StringProp(default='', fill_brok=['full_status']), 'action_url': StringProp(default='', fill_brok=['full_status']), - 'realm': StringProp(default='', fill_brok=['full_status'], - conf_send_preparation=get_obj_name), + 'realm': StringProp(default='', fill_brok=['full_status'], conf_send_preparation=get_obj_name), }) macros = { From 1614b55f949bbe2d5b08a05623ccfe25fcff233c Mon Sep 17 00:00:00 2001 From: Romain Forlot Date: Wed, 30 Dec 2015 11:11:36 +0100 Subject: [PATCH 13/16] Enh: resolve #1429 Change TCP Connect scan to TCP Syn scan with nmap discovery scan. --- libexec/discovery/nmap_discovery_runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libexec/discovery/nmap_discovery_runner.py b/libexec/discovery/nmap_discovery_runner.py index ab605d415f..288482a95b 100755 --- a/libexec/discovery/nmap_discovery_runner.py +++ b/libexec/discovery/nmap_discovery_runner.py @@ -36,9 +36,9 @@ VERSION = '0.1.1' # Fred : command launched depending on os detection if os.name != 'nt': - DEFAULT_CMD = "sudo nmap %s -sU -sT --min-rate %d --max-retries %d -T4 -O -oX %s" + DEFAULT_CMD = "sudo nmap %s -sU -sS --min-rate %d --max-retries %d -T4 -O -oX %s" else: - DEFAULT_CMD = "nmap %s -sU -sT --min-rate %d --max-retries %d -T4 -O -oX %s" + DEFAULT_CMD = "nmap %s -sU -sS --min-rate %d --max-retries %d -T4 -O -oX %s" parser = optparse.OptionParser( "%prog [options] -t nmap scanning targets", From 28600f2553269359f63e386b09c0438615fe9291 Mon Sep 17 00:00:00 2001 From: Romain Forlot Date: Wed, 30 Dec 2015 11:49:10 +0100 Subject: [PATCH 14/16] Fix: add templates dir into discovery.cfg then we do not lacks time_templates definition and avoid warning --- etc/discovery/discovery.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/etc/discovery/discovery.cfg b/etc/discovery/discovery.cfg index efdcd2fe8a..14afff71c3 100644 --- a/etc/discovery/discovery.cfg +++ b/etc/discovery/discovery.cfg @@ -9,6 +9,9 @@ log_file=/var/log/shinken/discovery.log # like discoveryrules or runners cfg_dir=../packs +# Then some useful templates +cfg_dir=../templates + # Default discovery rules and runners. Should be AFTER # the packs ones ;) cfg_file=discovery_rules.cfg From 1fcc39fb6cf50bee02bfd5b59aad5e54cf8c6a83 Mon Sep 17 00:00:00 2001 From: Hung Nguyen Viet Date: Sun, 3 Jan 2016 11:42:38 +0800 Subject: [PATCH 15/16] fix wrong template name use --- doc/source/07_advanced/objectinheritance.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/07_advanced/objectinheritance.rst b/doc/source/07_advanced/objectinheritance.rst index 536e9ded81..1fbc4adeb3 100644 --- a/doc/source/07_advanced/objectinheritance.rst +++ b/doc/source/07_advanced/objectinheritance.rst @@ -163,13 +163,13 @@ It is possible to use incomplete object definitions as templates for use by othe define host{ host_name bighost1 address 192.168.1.3 - use generichosthosttemplate + use generichosttemplate } define host{ host_name bighost2 address 192.168.1.4 - use generichosthosttemplate + use generichosttemplate } Notice that the first host definition is incomplete because it is missing the required "host_name" variable. We don't need to supply a host name because we just want to use this definition as a generic host template. In order to prevent this definition from being registered with Shinken as a normal host, we set the "register" variable to 0. @@ -218,7 +218,7 @@ Any :ref:`custom object variables ` that you def define host{ host_name bighost1 address 192.168.1.3 - use generichosthosttemplate + use generichosttemplate } The host *bighost1* will inherit the custom host variables "_customvar1" and "_snmp_community", as well as their respective values, from the *generichosttemplate* definition. The effective result is a definition for *bighost1* that looks like this: @@ -254,7 +254,7 @@ In some cases you may not want your host, service, or contact definitions to inh host_name bighost1 address 192.168.1.3 event_handler null - use generichosthosttemplate + use generichosttemplate } In this case, the host *bighost1* will not inherit the value of the "event_handler" variable that is defined in the *generichosttemplate*. The resulting effective definition of *bighost1* is the following: @@ -291,7 +291,7 @@ This "additive inheritance" can be accomplished by prepending the local variable define host{ host_name linuxserver1 hostgroups +linux-servers,web-servers - use generichosthosttemplate + use generichosttemplate } In this case, the host *linuxserver1* will append the value of its local "hostgroups" variable to that from generichosttemplate. The resulting effective definition of *linuxserver1* is the following: From bac4b7ba2214baaecc4eadca5832f818e012b583 Mon Sep 17 00:00:00 2001 From: N-Mi Date: Wed, 6 Jan 2016 11:46:57 +0100 Subject: [PATCH 16/16] new option to notify_by_email.py for adding a subject prefix --- libexec/notify_by_email.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libexec/notify_by_email.py b/libexec/notify_by_email.py index 7528819573..295eafbcbb 100755 --- a/libexec/notify_by_email.py +++ b/libexec/notify_by_email.py @@ -133,8 +133,8 @@ def create_mail(format): msg['From'] = get_user() logging.debug('To: %s' % (opts.receivers)) msg['To'] = opts.receivers - logging.debug('Subject: %s' % (get_mail_subject(opts.notification_object))) - msg['Subject'] = get_mail_subject(opts.notification_object) + logging.debug('Subject: %s' % (opts.prefix + get_mail_subject(opts.notification_object))) + msg['Subject'] = opts.prefix + get_mail_subject(opts.notification_object) return msg @@ -275,6 +275,8 @@ def create_html_message(msg): choices=['host', 'service'], help='Choose between host or service notification.') group_general.add_option('-S', '--SMTP', dest='smtp', default='localhost', help='Target SMTP hostname. Default: localhost') + group_general.add_option('-p', '--prefix', dest='prefix', default='', + help='Mail subject prefix. Default is no prefix') parser.add_option_group(group_debug) parser.add_option_group(group_general)