Skip to content

Commit

Permalink
Improvement: set FILL UP hosts
Browse files Browse the repository at this point in the history
Sets scheduler policy to fill up one host at a time such that
load_balancer can more easily remove unutilised exec hosts.  Default
SGE policy is to balance jobs across hosts.

adds method _get_number_of_slots, which could be further improved:
* more robust parsing of qconf output
* what happens if there are multiple queues / queue with different name
  • Loading branch information
scrappythekangaroo committed Jan 22, 2013
1 parent cc42fea commit fb54595
Showing 1 changed file with 45 additions and 0 deletions.
45 changes: 45 additions & 0 deletions starcluster/plugins/sge.py
Expand Up @@ -95,6 +95,49 @@ def _set_number_slots(self, master_slots, node_slots):
cmd += "cat /tmp/queue.conf.txt | sed s/master=[0-9]*]/master=" + str(master_slots) +"]/ > /tmp/queue.conf2.txt;"
cmd += "qconf -Mq /tmp/queue.conf2.txt;"
master.ssh.execute(cmd, log_output=True,source_profile=True,raise_on_failure=False)

def _get_number_of_slots(self,node):
"""
get number of slots for the given node by parsing qconf -sq all.q
"""

qconf_output = '\n'.join(self._master.ssh.execute('qconf -sq all.q'))
slots = int(re.search("%s=[0-9]+" % node.alias,qconf_output).group(0).split('=')[1])

return slots

def _set_fill_up_host(self):
"""
normally it's np_load_avg, this needs to be run per execution host
"""

log.info('Setting scheduler to FILL UP HOST')
master = self._master
nodes = self.nodes

print [master] + nodes
for node in [master] + nodes:
slots = self._get_number_of_slots(node)

qconf_str = """
hostname %s
load_scaling NONE
complex_values slots=%s
user_lists NONE
xuser_lists NONE
projects NONE
xprojects NONE
usage_scaling NONE
report_variables NONE
""" % (node.alias, slots)

cmd = 'echo "%s" > /tmp/host.conf.txt;' % qconf_str
cmd += 'qconf -Me /tmp/host.conf.txt;'
node.ssh.execute(cmd, log_output=False,source_profile=True,raise_on_failure=False)

cmd = 'qconf -ssconf | sed "s/load_formula.*/load_formula slots/" > /tmp/sched.conf.txt;'
cmd += 'qconf -Msconf /tmp/sched.conf.txt;'
master.ssh.execute(cmd, log_output=False,source_profile=True,raise_on_failure=False)
def _setup_sge(self):
"""
Install Sun Grid Engine with a default parallel
Expand Down Expand Up @@ -161,6 +204,7 @@ def run(self, nodes, master, user, user_shell, volumes):
self._set_scheduler_interval(self._scheduler_interval)
if self._master_slots:
self._set_number_slots(self._master_slots, self._node_slots)
self._set_fill_up_host()
def on_add_node(self, node, nodes, master, user, user_shell, volumes):
self._nodes = nodes
self._master = master
Expand All @@ -177,6 +221,7 @@ def on_add_node(self, node, nodes, master, user, user_shell, volumes):

if self._master_slots:
self._set_number_slots(self._master_slots, self._node_slots)
self._set_fill_up_host()
def on_remove_node(self, node, nodes, master, user, user_shell, volumes):
self._nodes = nodes
self._master = master
Expand Down

0 comments on commit fb54595

Please sign in to comment.