Permalink
Browse files

updated sge to launch multiple engines - start cleanly but does not s…

…hutdown cleanly
  • Loading branch information...
1 parent e9f4279 commit 70725a51ce0ac1f7ed2537134d7328920e24b3a4 Satrajit Ghosh committed Jul 17, 2010
Showing with 47 additions and 10 deletions.
  1. +42 −2 IPython/kernel/scripts/ipcluster.py
  2. +5 −8 docs/source/parallel/parallel_process.txt
@@ -234,6 +234,7 @@ def __init__(self, extra_args=None):
def start(self, n):
dlist = []
for i in range(n):
+ print "starting engine:", i
el = EngineLauncher(extra_args=self.extra_args)
d = el.start()
self.launchers.append(el)
@@ -338,17 +339,56 @@ class SGEEngineSet(BatchEngineSet):
def __init__(self, template_file, **kwargs):
BatchEngineSet.__init__(self, template_file, **kwargs)
+ self.num_engines = None
def parse_job_id(self, output):
m = re.search(self.job_id_regexp, output)
if m is not None:
job_id = m.group()
else:
raise Exception("job id couldn't be determined: %s" % output)
- self.job_id = job_id
+ self.job_id.append(job_id)
log.msg('Job started with job id: %r' % job_id)
return job_id
-
+
+ def kill_job(self, output):
+ log.msg(output)
+ return output
+
+ def write_batch_script(self, i):
+ context = {'eid':i}
+ template = open(self.template_file, 'r').read()
+ log.msg('Using template for batch script: %s' % self.template_file)
+ script_as_string = Itpl.itplns(template, context)
+ log.msg('Writing instantiated batch script: %s' % self.batch_file+str(i))
+ f = open(self.batch_file+str(i),'w')
+ f.write(script_as_string)
+ f.close()
+
+ def start(self, n):
+ dlist = []
+ self.num_engines = 0
+ self.job_id = []
+ for i in range(n):
+ log.msg("starting engine: %d"%i)
+ self.write_batch_script(i)
+ d = getProcessOutput(self.submit_command,
+ [self.batch_file+str(i)],env=os.environ)
+ d.addCallback(self.parse_job_id)
+ d.addErrback(self.handle_error)
+ dlist.append(d)
+ return gatherBoth(dlist, consumeErrors=True)
+
+ def kill(self):
+ dlist = []
+ for i in range(self.num_engines):
+ log.msg("killing job id: %d"%self.job_id[i])
+ d = getProcessOutput(self.delete_command,
+ [self.job_id[i]],env=os.environ)
+ d.addCallback(self.kill_job)
+ dlist.append(d)
+ return gatherBoth(dlist, consumeErrors=True)
+
sshx_template="""#!/bin/sh
"$@" &> /dev/null &
echo $!
@@ -179,25 +179,22 @@ The SGE mode uses the Sun Grid Engine [SGE]_ to start the engines. To use this
#!/bin/bash
#$ -V
- #$ -cwd
#$ -m n
- #$ -N satra-ipython
+ #$ -N ipengine-${eid}
#$ -r y
#$ -q sub
#$ -S /bin/bash
cd $$HOME/sge
- ipengine --logfile=ipengine
+ ipengine --logfile=ipengine${eid}
There are a few important points about this template:
1. This template will be rendered at runtime using IPython's :mod:`Itpl`
template engine.
-2. Instead of putting in the actual number of engines, use the notation
- ``${n}`` to indicate the number of engines to be started. You can also uses
- expressions like ``${n/4}`` in the template to indicate the number of
- nodes.
+2. Instead of putting in the actual id of engines, use the notation
+ ``${eid}`` to indicate where engine id should be inserted.
3. Because ``$`` is a special character used by the template engine, you must
escape any ``$`` by using ``$$``. This is important when referring to
@@ -211,7 +208,7 @@ There are a few important points about this template:
Once you have created such a script, save it with a name like :file:`sge.template`. Now you are ready to start your job::
- $ ipcluster sge -n 128 --sge-script=sge.template
+ $ ipcluster sge -n 12 --sge-script=sge.template
Additional command line options for this mode can be found by doing::

0 comments on commit 70725a5

Please sign in to comment.