Permalink
Browse files

initial sge compatibility attemp

  • Loading branch information...
1 parent 6ca792b commit b222b5cf95024a497984c74a65177a04dd26ab2c Satrajit Ghosh committed Jul 16, 2010
Showing with 102 additions and 1 deletion.
  1. +52 −0 IPython/kernel/scripts/ipcluster.py
  2. +50 −1 docs/source/parallel/parallel_process.txt
@@ -330,6 +330,15 @@ class PBSEngineSet(BatchEngineSet):
def __init__(self, template_file, **kwargs):
BatchEngineSet.__init__(self, template_file, **kwargs)
+class SGEEngineSet(BatchEngineSet):
+
+ submit_command = 'qsub'
+ delete_command = 'qdel'
+ job_id_regexp = '\d+'
+
+ def __init__(self, template_file, **kwargs):
+ BatchEngineSet.__init__(self, template_file, **kwargs)
+
sshx_template="""#!/bin/sh
"$@" &> /dev/null &
@@ -627,6 +636,35 @@ def shutdown(signum, frame):
dstart.addCallback(_delay_start, start_engines, furl_file, args.r)
dstart.addErrback(_err_and_stop)
+def main_sge(args):
+ cont_args = []
+ cont_args.append('--logfile=%s' % pjoin(args.logdir,'ipcontroller'))
+
+ # Check security settings before proceeding
+ if not check_security(args, cont_args):
+ return
+
+ # See if we are reusing FURL files
+ if not check_reuse(args, cont_args):
+ return
+
+ cl = ControllerLauncher(extra_args=cont_args)
+ dstart = cl.start()
+ def start_engines(r):
+ sge_set = SGEEngineSet(args.sgescript)
+ def shutdown(signum, frame):
+ log.msg('Stopping sge cluster')
+ d = sge_set.kill()
+ d.addBoth(lambda _: cl.interrupt_then_kill(1.0))
+ d.addBoth(lambda _: reactor.callLater(2.0, reactor.stop))
+ signal.signal(signal.SIGINT,shutdown)
+ d = sge_set.start(args.n)
+ return d
+ config = kernel_config_manager.get_config_obj()
+ furl_file = config['controller']['engine_furl_file']
+ dstart.addCallback(_delay_start, start_engines, furl_file, args.r)
+ dstart.addErrback(_err_and_stop)
+
def main_ssh(args):
"""Start a controller on localhost and engines using ssh.
@@ -780,6 +818,20 @@ def get_args():
)
parser_pbs.set_defaults(func=main_pbs)
+ parser_sge = subparsers.add_parser(
+ 'sge',
+ help='run a sge cluster',
+ parents=[base_parser]
+ )
+ parser_sge.add_argument(
+ '--sge-script',
+ type=str,
+ dest='sgescript',
+ help='SGE script template',
+ default='sge.template'
+ )
+ parser_sge.set_defaults(func=main_sge)
+
parser_ssh = subparsers.add_parser(
'ssh',
help='run a cluster using ssh, should have ssh-keys setup',
@@ -53,7 +53,8 @@ The :command:`ipcluster` command provides a simple way of starting a controller
2. When engines are started using the :command:`mpirun` command that comes
with most MPI [MPI]_ implementations
3. When engines are started using the PBS [PBS]_ batch system.
-4. When the controller is started on localhost and the engines are started on
+4. When engines are started using the SGE [SGE]_ batch system.
+5. When the controller is started on localhost and the engines are started on
remote nodes using :command:`ssh`.
.. note::
@@ -169,6 +170,53 @@ Additional command line options for this mode can be found by doing::
$ ipcluster pbs -h
+Using :command:`ipcluster` in SGE mode
+--------------------------------------
+
+The SGE mode uses the Sun Grid Engine [SGE]_ to start the engines. To use this mode, you first need to create a SGE script template that will be used to start the engines. Here is a sample SGE script template:
+
+.. sourcecode:: bash
+
+ #!/bin/bash
+ #$ -V
+ #$ -cwd
+ #$ -M joe@gmail.com
+ #$ -m beas
+ #$ -N ipython
+ #$ -r y
+ #$ -S /bin/bash
+
+ cd $$HOME/sge
+ /usr/local/bin/mpiexec -n ${n} ipengine --logfile=$$HOME/sge/ipengine
+
+There are a few important points about this template:
+
+1. This template will be rendered at runtime using IPython's :mod:`Itpl`
+ template engine.
+
+2. Instead of putting in the actual number of engines, use the notation
+ ``${n}`` to indicate the number of engines to be started. You can also uses
+ expressions like ``${n/4}`` in the template to indicate the number of
+ nodes.
+
+3. Because ``$`` is a special character used by the template engine, you must
+ escape any ``$`` by using ``$$``. This is important when referring to
+ environment variables in the template.
+
+4. Any options to :command:`ipengine` should be given in the batch script
+ template.
+
+5. Depending on the configuration of you system, you may have to set
+ environment variables in the script template.
+
+Once you have created such a script, save it with a name like :file:`sge.template`. Now you are ready to start your job::
+
+ $ ipcluster sge -n 128 --sge-script=sge.template
+
+Additional command line options for this mode can be found by doing::
+
+ $ ipcluster sge -h
+
Using :command:`ipcluster` in SSH mode
--------------------------------------
@@ -348,4 +396,5 @@ the log files to us will often help us to debug any problems.
.. [PBS] Portable Batch System. http://www.openpbs.org/
+.. [SGE] Sun Grid Engine. http://www.sun.com/software/sge/
.. [SSH] SSH-Agent http://en.wikipedia.org/wiki/Ssh-agent

0 comments on commit b222b5c

Please sign in to comment.