Skip to content

Commit

Permalink
Fix: too high pack size for multi-schedulers environnements. Can redu…
Browse files Browse the repository at this point in the history
…ce a LOT the boot time for large envrionnements, and reduce the scheduler memory consumption too.
  • Loading branch information
naparuba committed Jan 8, 2015
1 parent 266999f commit a9a0ef1
Show file tree
Hide file tree
Showing 5 changed files with 319 additions and 9 deletions.
54 changes: 45 additions & 9 deletions shinken/objects/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -912,8 +912,7 @@ def prepare_for_sending(self):
self.hostgroups.prepare_for_sending()
t1 = time.time()
logger.info('[Arbiter] Serializing the configurations...')



# There are two ways of configuration serializing
# One if to use the serial way, the other is with use_multiprocesses_serializer
# to call to sub-wrokers to do the job.
Expand All @@ -926,13 +925,13 @@ def prepare_for_sending(self):
conf.hostgroups.prepare_for_sending()
logger.debug('[%s] Serializing the configuration %d', r.get_name(), i)
t0 = time.time()
r.serialized_confs[i] = cPickle.dumps(conf, cPickle.HIGHEST_PROTOCOL)
logger.debug("[config] time to serialize the conf %s:%s is %s", r.get_name(), i, time.time() - t0)
r.serialized_confs[i] = cPickle.dumps(conf, 0)#cPickle.HIGHEST_PROTOCOL)
logger.debug("[config] time to serialize the conf %s:%s is %s (size:%s)", r.get_name(), i, time.time() - t0, len(r.serialized_confs[i]))
logger.debug("PICKLE LEN : %d", len(r.serialized_confs[i]))
# Now pickle the whole conf, for easy and quick spare send
t0 = time.time()
whole_conf_pack = cPickle.dumps(self, cPickle.HIGHEST_PROTOCOL)
logger.debug("[config] time to serialize the global conf : %s", time.time() - t0)
logger.debug("[config] time to serialize the global conf : %s (size:%)", time.time() - t0, len(whole_conf_pack))
self.whole_conf_pack = whole_conf_pack
print "TOTAL serializing in", time.time() - t1

Expand All @@ -955,7 +954,7 @@ def Serialize_config(q, rname, i, conf):
logger.debug('[%s] Serializing the configuration %d', rname, i)
t0 = time.time()
res = cPickle.dumps(conf, cPickle.HIGHEST_PROTOCOL)
logger.debug("[config] time to serialize the conf %s:%s is %s", rname, i, time.time() - t0)
logger.debug("[config] time to serialize the conf %s:%s is %s (size:%s)", rname, i, time.time() - t0, len(res))
q.append((i, res))

# Prepare a sub-process that will manage the pickle computation
Expand Down Expand Up @@ -984,8 +983,7 @@ def Serialize_config(q, rname, i, conf):
# Now get the serialized configuration and saved them into self
for (i, cfg) in q:
r.serialized_confs[i] = cfg
print "TOTAL TIME", time.time() - t1


# Now pickle the whole configuration into one big pickle object, for the arbiter spares
whole_queue = m.list()
t0 = time.time()
Expand All @@ -1008,11 +1006,12 @@ def create_whole_conf_pack(whole_queue, self):

#Get it and save it
self.whole_conf_pack = whole_queue.pop()
logger.debug("[config] time to serialize the global conf : %s", time.time() - t0)
logger.debug("[config] time to serialize the global conf : %s (size:%s)", time.time() - t0, len(self.whole_conf_pack))

# Shutdown the manager, the sub-process should be gone now
m.shutdown()


# It's used to warn about useless parameter and print why it's not use.
def notice_about_useless_parameters(self):
if not self.disable_old_nagios_parameters_whining:
Expand Down Expand Up @@ -1043,10 +1042,12 @@ def warn_about_unmanaged_parameters(self):

logger.warning("Unmanaged configuration statement, do you really need it? Ask for it on the developer mailinglist %s or submit a pull request on the Shinken github ", mailing_list_uri)


# Overrides specific instances properties
def override_properties(self):
self.services.override_properties(self.hosts)


# Use to fill groups values on hosts and create new services
# (for host group ones)
def explode(self):
Expand Down Expand Up @@ -1089,12 +1090,14 @@ def explode(self):
#print "Realms"
self.realms.explode()


# Dependencies are important for scheduling
# This function create dependencies linked between elements.
def apply_dependencies(self):
self.hosts.apply_dependencies()
self.services.apply_dependencies()


# Use to apply inheritance (template and implicit ones)
# So elements will have their configured properties
def apply_inheritance(self):
Expand All @@ -1121,11 +1124,13 @@ def apply_inheritance(self):
self.hostescalations.apply_inheritance()
self.escalations.apply_inheritance()


# Use to apply implicit inheritance
def apply_implicit_inheritance(self):
#print "Services"
self.services.apply_implicit_inheritance(self.hosts)


# will fill properties for elements so they will have all theirs properties
def fill_default(self):
# Fill default for config (self)
Expand Down Expand Up @@ -1454,6 +1459,7 @@ def linkify_templates(self):
self.serviceescalations.linkify_templates()
self.hostescalations.linkify_templates()


# Some parameters are just not managed like O*HP commands
# and regexp capabilities
# True: OK
Expand All @@ -1471,6 +1477,7 @@ def check_error_on_hard_unmanaged_parameters(self):
# r &= False
return r


# check if elements are correct or not (fill with defaults, etc)
# Warning: this function call be called from a Arbiter AND
# from and scheduler. The first one got everything, the second
Expand Down Expand Up @@ -1587,6 +1594,7 @@ def explode_global_conf(self):
for cls in clss:
cls.load_global_conf(self)


# Clean useless elements like templates because they are not needed anymore
def remove_templates(self):
self.hosts.remove_templates()
Expand All @@ -1598,11 +1606,13 @@ def remove_templates(self):
self.discoveryrules.remove_templates()
self.discoveryruns.remove_templates()


# We will compute simple element md5hash, so we can know
# if they changed or not between the restart
def compute_hash(self):
self.hosts.compute_hash()


# Add an error in the configuration error list so we can print them
# all in one place
def add_error(self, txt):
Expand All @@ -1611,11 +1621,13 @@ def add_error(self, txt):

self.conf_is_correct = False


# Now it's time to show all configuration errors
def show_errors(self):
for err in self.configuration_errors:
logger.error(err)


# Create packs of hosts and services so in a pack,
# all dependencies are resolved
# It create a graph. All hosts are connected to their
Expand Down Expand Up @@ -1828,6 +1840,7 @@ def create_packs(self, nb_packs):
"been ignored"
% (len(self.hosts), nb_elements_all_realms))


# Use the self.conf and make nb_parts new confs.
# nbparts is equal to the number of schedulerlink
# New confs are independent with checks. The only communication
Expand Down Expand Up @@ -1929,6 +1942,17 @@ def cut_into_parts(self):
for h in cfg.hosts:
if h.id in mbrs_id:
hg.members.append(h)

# And also relink the hosts with the valid hostgroups
for h in cfg.hosts:
orig_hgs = h.hostgroups
nhgs = []
for ohg in orig_hgs:
nhg = cfg.hostgroups.find_by_name(ohg.get_name())
nhgs.append(nhg)
h.hostgroups = nhgs


# Fill servicegroup
for ori_sg in self.servicegroups:
sg = cfg.servicegroups.find_by_name(ori_sg.get_name())
Expand All @@ -1941,6 +1965,16 @@ def cut_into_parts(self):
if s.id in mbrs_id:
sg.members.append(s)

# And also relink the services with the valid servicegroups
for h in cfg.services:
orig_hgs = h.servicegroups
nhgs = []
for ohg in orig_hgs:
nhg = cfg.servicegroups.find_by_name(ohg.get_name())
nhgs.append(nhg)
h.servicegroups = nhgs


# Now we fill other_elements by host (service are with their host
# so they are not tagged)
for i in self.confs:
Expand All @@ -1953,6 +1987,7 @@ def cut_into_parts(self):
self.confs[i].instance_id = i
random.seed(time.time())


def dump(self, f=None):
dmp = {}

Expand Down Expand Up @@ -2004,6 +2039,7 @@ def dump(self, f=None):
if close is True:
f.close()


# ...
def lazy():
# let's compute the "USER" properties and macros..
Expand Down
30 changes: 30 additions & 0 deletions test/etc/groups_pickle/commands.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
define command{
command_name check-host-alive
command_line $USER1$/test_hostcheck.pl --type=$ARG1$ --failchance=2% --previous-state=$HOSTSTATE$ --state-duration=$HOSTDURATIONSEC$ --hostname $HOSTNAME$
}
define command{
command_name check-host-alive-parent
command_line $USER1$/test_hostcheck.pl --type=$ARG1$ --failchance=2% --previous-state=$HOSTSTATE$ --state-duration=$HOSTDURATIONSEC$ --parent-state=$ARG2$ --hostname $HOSTNAME$
}
define command{
command_name notify-host
#command_line sleep 1 && /bin/true
command_line $USER1$/notifier.pl --hostname $HOSTNAME$ --notificationtype $NOTIFICATIONTYPE$ --hoststate $HOSTSTATE$ --hostoutput $HOSTOUTPUT$ --longdatetime $LONGDATETIME$ --hostattempt $HOSTATTEMPT$ --hoststatetype $HOSTSTATETYPE$
}
define command{
command_name notify-service
command_line $USER1$/notifier.pl --hostname $HOSTNAME$ --servicedesc $SERVICEDESC$ --notificationtype $NOTIFICATIONTYPE$ --servicestate $SERVICESTATE$ --serviceoutput $SERVICEOUTPUT$ --longdatetime $LONGDATETIME$ --serviceattempt $SERVICEATTEMPT$ --servicestatetype $SERVICESTATETYPE$
#command_line sleep 1 && /bin/true
}
define command{
command_name check_service
command_line $USER1$/test_servicecheck.pl --type=$ARG1$ --failchance=5% --previous-state=$SERVICESTATE$ --state-duration=$SERVICEDURATIONSEC$ --total-critical-on-host=$TOTALHOSTSERVICESCRITICAL$ --total-warning-on-host=$TOTALHOSTSERVICESWARNING$ --hostname $HOSTNAME$ --servicedesc $SERVICEDESC$ --custom $_SERVICECUSTNAME$
}
define command{
command_name eventhandler
command_line $USER1$/test_eventhandler.pl $SERVICESTATE$ $SERVICESTATETYPE$ $SERVICEATTEMPT$
}
define command{
command_name special_macro
command_line $USER1$/nothing $ARG1$
}
56 changes: 56 additions & 0 deletions test/etc/groups_pickle/test_specific.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@

define realm{
realm_name World
default 1
realm_members R1, R2
}


define realm{
realm_name R1
}

define realm{
realm_name R2
}

define scheduler {
scheduler_name R1
address localhost
realm R1
}


define scheduler {
scheduler_name world
address localhsot
realm World
}

define scheduler {
scheduler_name R2
address localhsot
realm R2
}


define hostgroup{
hostgroup_name everyone
members *
}


define host{
use generic-host
host_name HR1
realm R1
hostgoups everyone
}


define host{
use generic-host
host_name HR2
realm R2
hostgroups everyone
}

2 comments on commit a9a0ef1

@gst
Copy link
Contributor

@gst gst commented on a9a0ef1 Jan 8, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

otherwise:

for non-trivial changes (docs, typo, etc..), please always create a dedicated branch..

otherwise we risk getting errors in tests , as it resulted here, that could be hard to fix afterwhile and so leave an unstable master (even if it's only in the tests)..

@naparuba
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I need to commit before go, so the current was good. I'm currently fixing the tests. But code is good, only tests must be fixed.

Please sign in to comment.