Skip to content

Commit

Permalink
updated sge to launch multiple engines - start cleanly but does not s…
Browse files Browse the repository at this point in the history
…hutdown cleanly
  • Loading branch information
Satrajit Ghosh committed Jul 17, 2010
1 parent e9f4279 commit 70725a5
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 10 deletions.
44 changes: 42 additions & 2 deletions IPython/kernel/scripts/ipcluster.py
Expand Up @@ -234,6 +234,7 @@ def __init__(self, extra_args=None):
def start(self, n):
dlist = []
for i in range(n):
print "starting engine:", i
el = EngineLauncher(extra_args=self.extra_args)
d = el.start()
self.launchers.append(el)
Expand Down Expand Up @@ -338,17 +339,56 @@ class SGEEngineSet(BatchEngineSet):

def __init__(self, template_file, **kwargs):
BatchEngineSet.__init__(self, template_file, **kwargs)
self.num_engines = None

def parse_job_id(self, output):
m = re.search(self.job_id_regexp, output)
if m is not None:
job_id = m.group()
else:
raise Exception("job id couldn't be determined: %s" % output)
self.job_id = job_id
self.job_id.append(job_id)
log.msg('Job started with job id: %r' % job_id)
return job_id


def kill_job(self, output):
log.msg(output)
return output

def write_batch_script(self, i):
context = {'eid':i}
template = open(self.template_file, 'r').read()
log.msg('Using template for batch script: %s' % self.template_file)
script_as_string = Itpl.itplns(template, context)
log.msg('Writing instantiated batch script: %s' % self.batch_file+str(i))
f = open(self.batch_file+str(i),'w')
f.write(script_as_string)
f.close()

def start(self, n):
dlist = []
self.num_engines = 0
self.job_id = []
for i in range(n):
log.msg("starting engine: %d"%i)
self.write_batch_script(i)
d = getProcessOutput(self.submit_command,
[self.batch_file+str(i)],env=os.environ)
d.addCallback(self.parse_job_id)
d.addErrback(self.handle_error)
dlist.append(d)
return gatherBoth(dlist, consumeErrors=True)

def kill(self):
dlist = []
for i in range(self.num_engines):
log.msg("killing job id: %d"%self.job_id[i])
d = getProcessOutput(self.delete_command,
[self.job_id[i]],env=os.environ)
d.addCallback(self.kill_job)
dlist.append(d)
return gatherBoth(dlist, consumeErrors=True)

sshx_template="""#!/bin/sh
"$@" &> /dev/null &
echo $!
Expand Down
13 changes: 5 additions & 8 deletions docs/source/parallel/parallel_process.txt
Expand Up @@ -179,25 +179,22 @@ The SGE mode uses the Sun Grid Engine [SGE]_ to start the engines. To use this

#!/bin/bash
#$ -V
#$ -cwd
#$ -m n
#$ -N satra-ipython
#$ -N ipengine-${eid}
#$ -r y
#$ -q sub
#$ -S /bin/bash

cd $$HOME/sge
ipengine --logfile=ipengine
ipengine --logfile=ipengine${eid}

There are a few important points about this template:

1. This template will be rendered at runtime using IPython's :mod:`Itpl`
template engine.

2. Instead of putting in the actual number of engines, use the notation
``${n}`` to indicate the number of engines to be started. You can also uses
expressions like ``${n/4}`` in the template to indicate the number of
nodes.
2. Instead of putting in the actual id of engines, use the notation
``${eid}`` to indicate where engine id should be inserted.

3. Because ``$`` is a special character used by the template engine, you must
escape any ``$`` by using ``$$``. This is important when referring to
Expand All @@ -211,7 +208,7 @@ There are a few important points about this template:

Once you have created such a script, save it with a name like :file:`sge.template`. Now you are ready to start your job::

$ ipcluster sge -n 128 --sge-script=sge.template
$ ipcluster sge -n 12 --sge-script=sge.template

Additional command line options for this mode can be found by doing::

Expand Down

0 comments on commit 70725a5

Please sign in to comment.