Skip to content
Permalink
Browse files

Merge pull request #43 from nextstrain/specify-batch-resources

runner.aws_batch: Specify requested vCPU and memory
  • Loading branch information...
trvrb committed Feb 22, 2019
2 parents c5c20a8 + c764bc1 commit 9cec23a8bdf520b1ac2311fe8650e3c98bec44f9
Showing with 55 additions and 1 deletion.
  1. +26 −1 doc/aws-batch.md
  2. +25 −0 nextstrain/cli/runner/aws_batch/__init__.py
  3. +4 −0 nextstrain/cli/runner/aws_batch/jobs.py
@@ -24,6 +24,32 @@ directory.
[AWS Batch]: https://aws.amazon.com/batch/
[`zika-tutorial/` directory]: https://github.com/nextstrain/zika-tutorial

### Using and requesting resources

By default, each AWS Batch job will have available to it the number of vCPUs
and amount of memory configured in your [job definition](#job-definition). To
take full advantage of multiple CPUs available, [Snakemake's `--jobs` (or
`-j`)](https://snakemake.readthedocs.io/en/stable/executable.html#EXECUTION)
option should generally be matched to the configured number of vCPUs.

The resources configured in the job definition can be overridden on a per-build
basis using the `--aws-batch-cpus` and/or `--aws-batch-memory` options, for
example:

nextstrain build --aws-batch --aws-batch-cpus=8 --aws-batch-memory=14800 zika-tutorial/ --jobs 8

Alternatively, default resource overrides can be set via the
`~/.nextstrain/config` file:

[aws-batch]
cpus = ...
memory = ...

Or via the environment variables `NEXTSTRAIN_AWS_BATCH_CPUS` and
`NEXTSTRAIN_AWS_BATCH_MEMORY`.

Note that requesting more CPUs or memory than available in a compute
environment will result in a job that is queued but is never started.

## Configuration on your computer

@@ -76,7 +102,6 @@ or in the `~/.nextstrain/config` file

or passing the `--aws-batch-s3-bucket=...` option to `nextstrain build`.


# Setting up AWS to run Nextstrain builds

The rest of this document describes the one-time AWS configuration necessary to
@@ -26,6 +26,13 @@
or config.get("aws-batch", "s3-bucket") \
or "nextstrain-jobs"

# defaults to None if enviroment or config is not set
DEFAULT_CPUS = os.environ.get("NEXTSTRAIN_AWS_BATCH_CPUS") \
or config.get("aws-batch", "cpus")

# defaults to None if enviroment or config is not set
DEFAULT_MEMORY = os.environ.get("NEXTSTRAIN_AWS_BATCH_MEMORY") \
or config.get("aws-batch", "memory")

def register_arguments(parser) -> None:
# AWS Batch development options
@@ -54,6 +61,22 @@ def register_arguments(parser) -> None:
metavar = "<name>",
default = DEFAULT_S3_BUCKET)

development.add_argument(
"--aws-batch-cpus",
dest = "cpus",
help = "Number of vCPUs to request for job",
metavar = "<count>",
type = int,
default = DEFAULT_CPUS)

development.add_argument(
"--aws-batch-memory",
dest = "memory",
help = "Amount of memory in MB to request for job",
metavar = "<megabytes>",
type = int,
default = DEFAULT_MEMORY)


def run(opts, argv, working_volume = None) -> int:
# Generate our own unique run id since we can't know the AWS Batch job id
@@ -83,6 +106,8 @@ def run(opts, argv, working_volume = None) -> int:
name = run_id,
queue = opts.job_queue,
definition = opts.job_definition,
cpus = opts.cpus,
memory = opts.memory,
workdir = remote_workdir,
exec = argv)
except Exception as error:
@@ -119,6 +119,8 @@ def stop(self, reason = "stopped by user") -> None:
def submit(name: str,
queue: str,
definition: str,
cpus: Optional[int],
memory: Optional[int],
workdir: s3.S3Object,
exec: Iterable[str]) -> JobState:
"""
@@ -140,6 +142,8 @@ def submit(name: str,
},
*forwarded_environment(),
],
**({ "vcpus": cpus } if cpus else {}),
**({ "memory": memory } if memory else {}),
"command": [
"/sbin/entrypoint-aws-batch",
*exec

0 comments on commit 9cec23a

Please sign in to comment.
You can’t perform that action at this time.