-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add BatchParallelTask to provide simple iteration #2
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,8 +12,8 @@ | |
import argparse | ||
import traceback | ||
import contextlib | ||
from lsst.pipe.base import CmdLineTask | ||
from .pool import startPool, NODE, abortOnError | ||
from lsst.pipe.base import CmdLineTask, TaskRunner | ||
from .pool import startPool, Pool, NODE, abortOnError | ||
from . import log # register pickle functions for pex_logging | ||
|
||
__all__ = ["Batch", "PbsBatch", "SlurmBatch", "SmpBatch", "BATCH_TYPES", "BatchArgumentParser", | ||
|
@@ -465,6 +465,10 @@ def logOperation(self, operation, catch=False, trace=True): | |
|
||
|
||
class BatchPoolTask(BatchCmdLineTask): | ||
"""Starts a BatchCmdLineTask with an MPI process pool | ||
|
||
Use this subclass of BatchCmdLineTask if you want to use the Pool directly. | ||
""" | ||
@classmethod | ||
@abortOnError | ||
def parseAndRun(cls, *args, **kwargs): | ||
|
@@ -473,3 +477,88 @@ def parseAndRun(cls, *args, **kwargs): | |
super(BatchPoolTask, cls).parseAndRun(*args, **kwargs) | ||
pool.exit() | ||
|
||
|
||
class BatchTaskRunner(TaskRunner): | ||
"""Run a Task individually on a list of inputs using the MPI process pool""" | ||
def __init__(self, *args, **kwargs): | ||
"""Constructor | ||
|
||
Warn if the user specified multiprocessing. | ||
""" | ||
TaskRunner.__init__(self, *args, **kwargs) | ||
if self.numProcesses > 1: | ||
self.log.warn("Multiprocessing arguments (-j %d) ignored since using batch processing" % | ||
self.numProcesses) | ||
self.numProcesses = 1 | ||
|
||
def run(self, parsedCmd): | ||
"""Run the task on all targets | ||
|
||
Sole input is the result of parsing the command-line with the ArgumentParser. | ||
|
||
Output is None if 'precall' failed; otherwise it is a list of calling ourself | ||
on each element of the target list from the 'getTargetList' method. | ||
""" | ||
resultList = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This can be either |
||
|
||
import multiprocessing | ||
self.prepareForMultiProcessing() | ||
pool = Pool() | ||
|
||
if self.precall(parsedCmd): | ||
targetList = self.getTargetList(parsedCmd) | ||
if len(targetList) > 0: | ||
parsedCmd.log.info("Processing %d targets with a pool of %d processes..." % | ||
(len(targetList), pool.size)) | ||
# Run the task using self.__call__ | ||
resultList = pool.map(self, targetList) | ||
else: | ||
log.warn("Not running the task because there is no data to process; " | ||
"you may preview data using \"--show data\"") | ||
resultList = [] | ||
|
||
return resultList | ||
|
||
@abortOnError | ||
def __call__(self, cache, args): | ||
"""Run the Task on a single target | ||
|
||
Strips out the process pool 'cache' argument. | ||
|
||
'args' are those arguments provided by the getTargetList method. | ||
|
||
Brings down the entire job if an exception is not caught (i.e., --doraise). | ||
""" | ||
return TaskRunner.__call__(self, args) | ||
|
||
|
||
class BatchParallelTask(BatchCmdLineTask): | ||
"""Runs the BatchCmdLineTask in parallel | ||
|
||
Use this subclass of BatchCmdLineTask if you don't need to use the Pool | ||
directly, but just want to iterate over many objects (like a multi-node | ||
version of the '-j' command-line argument). | ||
""" | ||
RunnerClass = BatchTaskRunner | ||
|
||
@classmethod | ||
def _makeArgumentParser(cls, *args, **kwargs): | ||
"""Build an ArgumentParser | ||
|
||
Removes the batch-specific parts in order to delegate to the parent classes. | ||
""" | ||
kwargs.pop("doBatch", False) | ||
kwargs.pop("add_help", False) | ||
return super(BatchCmdLineTask, cls)._makeArgumentParser(*args, **kwargs) | ||
|
||
@classmethod | ||
def parseAndRun(cls, *args, **kwargs): | ||
"""Parse an argument list and run the command | ||
|
||
This is the entry point when we run in earnest, so start the process pool | ||
so that the worker nodes don't go any further. | ||
""" | ||
pool = startPool() | ||
results = super(BatchParallelTask, cls).parseAndRun(*args, **kwargs) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe the coding style guideline suggests not using super, but since all the subclasses use it maybe it's o.k. in this case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried very hard not to use it! But I found that I must use it if I want to call the base class implementation of one method that calls another method I've implemented in my subclass. Consider the following simplified case:
I have a base class (
then I get the desired result:
In my case, I want |
||
pool.exit() | ||
return results |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
All methods need parameter documentation