Skip to content

Commit

Permalink
some early work to add spack
Browse files Browse the repository at this point in the history
I can see that spack is parsed into ruleinfo, but it is not
showing up further down the line (e.g., in dag.py there are no
spack enviroments) and I'm trying to figure out why.'

Signed-off-by: vsoch <vsoch@users.noreply.github.com>
  • Loading branch information
vsoch committed May 17, 2021
1 parent 99d2517 commit f7a5022
Show file tree
Hide file tree
Showing 21 changed files with 777 additions and 76 deletions.
12 changes: 9 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ jobs:
AWS_AVAILABLE: ${{ secrets.AWS_ACCESS_KEY_ID }}
GCP_AVAILABLE: ${{ secrets.GCP_SA_KEY }}
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v2

- name: Setup Snakemake environment
run: |
Expand All @@ -56,7 +56,12 @@ jobs:
mamba env create -q --name snakemake --file test-environment.yml
# additionally add singularity
mamba install -c conda-forge -n snakemake singularity
- name: Install spack
run: |
apt-get update && apt-get install -y curl build-essential gcc
git clone --depth 1 https://github.com/spack/spack /opt/spack
- name: Setup apt dependencies
run: |
sudo apt install -y stress git wget
Expand Down Expand Up @@ -91,8 +96,9 @@ jobs:
CI: true
run: |
# activate conda env
export PATH="/usr/share/miniconda/bin:$PATH"
export PATH="/usr/share/miniconda/bin:/opt/spack/bin:$PATH"
source activate snakemake
source /opt/spack/share/spack/setup-env.sh
pytest -v -x tests/test_expand.py tests/test_io.py tests/test_schema.py tests/test_linting.py tests/tests.py
Expand Down
88 changes: 86 additions & 2 deletions snakemake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def snakemake(
unlock=False,
cleanup_metadata=None,
conda_cleanup_envs=False,
spack_cleanup_envs=False,
cleanup_shadow=False,
cleanup_scripts=True,
force_incomplete=False,
Expand Down Expand Up @@ -132,18 +133,22 @@ def snakemake(
verbose=False,
force_use_threads=False,
use_conda=False,
use_spack=False,
use_singularity=False,
use_env_modules=False,
singularity_args="",
conda_frontend="conda",
conda_prefix=None,
conda_cleanup_pkgs=None,
spack_cleanup_pkgs=None,
spack_prefix=None,
list_conda_envs=False,
singularity_prefix=None,
shadow_prefix=None,
scheduler="ilp",
scheduler_ilp_solver=None,
conda_create_envs_only=False,
spack_create_envs_only=False,
mode=Mode.default,
wrapper_prefix=None,
kubernetes=None,
Expand Down Expand Up @@ -227,6 +232,7 @@ def snakemake(
cleanup_metadata (list): just cleanup metadata of given list of output files (default None)
drop_metadata (bool): drop metadata file tracking information after job finishes (--report and --list_x_changes information will be incomplete) (default False)
conda_cleanup_envs (bool): just cleanup unused conda environments (default False)
spack_cleanup_envs (bool): just cleanup unused spack environments (default False)
cleanup_shadow (bool): just cleanup old shadow directories (default False)
cleanup_scripts (bool): delete wrapper scripts used for execution (default True)
force_incomplete (bool): force the re-creation of incomplete files (default False)
Expand Down Expand Up @@ -264,13 +270,18 @@ def snakemake(
use_conda (bool): use conda environments for each job (defined with conda directive of rules)
use_singularity (bool): run jobs in singularity containers (if defined with singularity directive)
use_env_modules (bool): load environment modules if defined in rules
use_spack (bool): use spack packages (defined with spack directive of rules)
singularity_args (str): additional arguments to pass to singularity
conda_prefix (str): the directory in which conda environments will be created (default None)
spack_prefix (str): the directory in which spack environments will be created (default None)
conda_cleanup_pkgs (snakemake.deployment.conda.CondaCleanupMode):
whether to clean up conda tarballs after env creation (default None), valid values: "tarballs", "cache"
spack_cleanup_pkgs (snakemake.deployment.spack.SpackCleanupMode):
whether to clean up spack environments after env creation (default None), valid values: "tarballs", "cache"
singularity_prefix (str): the directory to which singularity images will be pulled (default None)
shadow_prefix (str): prefix for shadow directories. The job-specific shadow directories will be created in $SHADOW_PREFIX/shadow/ (default None)
conda_create_envs_only (bool): if specified, only builds the conda environments specified for each job, then exits.
spack_create_envs_only (bool): if specified, only builds the spack environments specified for each job, then exits.
list_conda_envs (bool): list conda environments and their location on disk.
mode (snakemake.common.Mode): execution mode
wrapper_prefix (str): prefix for wrapper script URLs (default None)
Expand Down Expand Up @@ -561,9 +572,12 @@ def snakemake(
use_conda=use_conda or list_conda_envs or conda_cleanup_envs,
use_singularity=use_singularity,
use_env_modules=use_env_modules,
use_spack=use_spack or spack_cleanup_envs,
conda_frontend=conda_frontend,
conda_prefix=conda_prefix,
spack_prefix=spack_prefix,
conda_cleanup_pkgs=conda_cleanup_pkgs,
spack_cleanup_pkgs=spack_cleanup_pkgs,
singularity_prefix=singularity_prefix,
shadow_prefix=shadow_prefix,
singularity_args=singularity_args,
Expand Down Expand Up @@ -638,6 +652,7 @@ def snakemake(
unlock=unlock,
cleanup_metadata=cleanup_metadata,
conda_cleanup_envs=conda_cleanup_envs,
spack_cleanup_envs=spack_cleanup_envs,
cleanup_shadow=cleanup_shadow,
cleanup_scripts=cleanup_scripts,
force_incomplete=force_incomplete,
Expand All @@ -659,8 +674,11 @@ def snakemake(
use_conda=use_conda,
use_singularity=use_singularity,
use_env_modules=use_env_modules,
use_spack=use_spack,
conda_prefix=conda_prefix,
spack_prefix=spack_prefix,
conda_cleanup_pkgs=conda_cleanup_pkgs,
spack_cleanup_pkgs=spack_cleanup_pkgs,
singularity_prefix=singularity_prefix,
shadow_prefix=shadow_prefix,
singularity_args=singularity_args,
Expand All @@ -670,6 +688,7 @@ def snakemake(
kubernetes=kubernetes,
container_image=container_image,
conda_create_envs_only=conda_create_envs_only,
spack_create_envs_only=spack_create_envs_only,
default_remote_provider=default_remote_provider,
default_remote_prefix=default_remote_prefix,
tibanna=tibanna,
Expand Down Expand Up @@ -760,6 +779,7 @@ def snakemake(
keep_target_files=keep_target_files,
cleanup_metadata=cleanup_metadata,
conda_cleanup_envs=conda_cleanup_envs,
spack_cleanup_envs=spack_cleanup_envs,
cleanup_shadow=cleanup_shadow,
cleanup_scripts=cleanup_scripts,
subsnakemake=subsnakemake,
Expand All @@ -768,8 +788,9 @@ def snakemake(
greediness=greediness,
no_hooks=no_hooks,
force_use_threads=use_threads,
conda_create_envs_only=conda_create_envs_only,
assume_shared_fs=assume_shared_fs,
conda_create_envs_only=conda_create_envs_only,
spack_create_envs_only=spack_create_envs_only,
cluster_status=cluster_status,
report=report,
report_stylesheet=report_stylesheet,
Expand Down Expand Up @@ -2147,6 +2168,56 @@ def get_argument_parser(profile=None):
help="Send workflow tasks to GA4GH TES server specified by url.",
)

group_spack = parser.add_argument_group("SPACK")

group_spack.add_argument(
"--use-spack",
action="store_true",
help="If defined in the rule, run job in a spack environment.",
)

group_spack.add_argument(
"--spack-create-envs-only",
action="store_true",
help="If specified, only creates the job-specific "
"spack environments then exits. The `--use-spack` "
"flag must also be set.",
)

group_spack.add_argument(
"--spack-cleanup-envs",
action="store_true",
help="Cleanup unused spack environments.",
)

group_spack.add_argument(
"--spack-prefix",
metavar="DIR",
default=os.environ.get("SNAKEMAKE_SPACK_PREFIX", None),
help="Specify a directory in which the 'spack' and 'spack-archive' "
"directories are created. These are used to store spack environments "
"and their archives, respectively. If not supplied, the value is set "
"to the '.snakemake' directory relative to the invocation directory. "
"If supplied, the `--use-spack` flag must also be set. The value may "
"be given as a relative path, which will be extrapolated to the "
"invocation directory, or as an absolute path. The value can also be "
"provided via the environment variable $SNAKEMAKE_SPACK_PREFIX.",
)

from snakemake.deployment.spack import SpackCleanupMode

group_spack.add_argument(
"--spack-cleanup-pkgs",
type=SpackCleanupMode,
const=SpackCleanupMode.tarballs,
choices=list(SpackCleanupMode),
nargs="?",
help="Cleanup spack packages after creating environments. "
"In case of 'tarballs' mode, will clean up all downloaded package tarballs. "
"In case of 'cache' mode, will additionally clean up unused package caches. "
"If mode is omitted, will default to only cleaning up the tarballs.",
)

group_conda = parser.add_argument_group("CONDA")

group_conda.add_argument(
Expand Down Expand Up @@ -2429,10 +2500,15 @@ def adjust_path(f):
)
sys.exit(1)

# You can't use both conda and spack!
if args.use_conda and args.use_spack:
print("You can only use one of spack or conda, but not both.")
sys.exit(1)

if (args.conda_prefix or args.conda_create_envs_only) and not args.use_conda:
print(
"Error: --use-conda must be set if --conda-prefix or "
"--create-envs-only is set.",
"--conda-create-envs-only is set.",
file=sys.stderr,
)
sys.exit(1)
Expand All @@ -2453,6 +2529,10 @@ def adjust_path(f):
)
sys.exit(1)

if args.spack_create_envs_only and not args.use_spack:
print("Error: --use-spack must be set if --spack-create-envs-only is set.")
sys.exit(1)

if args.singularity_prefix and not args.use_singularity:
print(
"Error: --use_singularity must be set if --singularity-prefix " "is set.",
Expand Down Expand Up @@ -2685,6 +2765,7 @@ def open_browser():
unlock=args.unlock,
cleanup_metadata=args.cleanup_metadata,
conda_cleanup_envs=args.conda_cleanup_envs,
spack_cleanup_envs=args.spack_cleanup_envs,
cleanup_shadow=args.cleanup_shadow,
cleanup_scripts=not args.skip_script_cleanup,
force_incomplete=args.rerun_incomplete,
Expand Down Expand Up @@ -2718,9 +2799,11 @@ def open_browser():
attempt=args.attempt,
force_use_threads=args.force_use_threads,
use_conda=args.use_conda,
use_spack=args.use_spack,
conda_frontend=args.conda_frontend,
conda_prefix=args.conda_prefix,
conda_cleanup_pkgs=args.conda_cleanup_pkgs,
spack_cleanup_pkgs=args.spack_cleanup_pkgs,
list_conda_envs=args.list_conda_envs,
use_singularity=args.use_singularity,
use_env_modules=args.use_envmodules,
Expand All @@ -2730,6 +2813,7 @@ def open_browser():
scheduler=args.scheduler,
scheduler_ilp_solver=args.scheduler_ilp_solver,
conda_create_envs_only=args.conda_create_envs_only,
spack_create_envs_only=args.spack_create_envs_only,
mode=args.mode,
wrapper_prefix=args.wrapper_prefix,
default_remote_provider=args.default_remote_provider,
Expand Down
37 changes: 34 additions & 3 deletions snakemake/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from snakemake.exceptions import InputFunctionException
from snakemake.logging import logger
from snakemake.common import DYNAMIC_FILL, group_into_chunks
from snakemake.deployment import conda, singularity
from snakemake.deployment import conda, singularity, spack
from snakemake.output_index import OutputIndex
from snakemake import workflow

Expand Down Expand Up @@ -123,6 +123,7 @@ def __init__(
self._jobid = dict()
self.job_cache = dict()
self.conda_envs = dict()
self.spack_envs = dict()
self.container_imgs = dict()
self._progress = 0
self._group = dict()
Expand Down Expand Up @@ -290,6 +291,28 @@ def create_conda_envs(
if not dryrun or not quiet:
env.create(dryrun)

def create_spack_envs(
self, dryrun=False, forceall=False, init_only=False, quiet=False
):
# First deduplicate based on job.conda_env_file
jobs = self.jobs if forceall else self.needrun_jobs
env_set = {job.spack_env_file for job in jobs if job.spack_env_file}

# Then based on md5sum values
self.spack_envs = dict()
for env_file in env_set:
env = spack.Env(
env_file,
self.workflow,
cleanup=self.workflow.spack_cleanup_pkgs,
)
self.spack_envs[env_file] = env

if not init_only:
for env in self.spack_envs.values():
if not dryrun or not quiet:
env.create(dryrun)

def pull_container_imgs(self, dryrun=False, forceall=False, quiet=False):
# First deduplicate based on job.conda_env_file
jobs = self.jobs if forceall else self.needrun_jobs
Expand Down Expand Up @@ -1378,6 +1401,8 @@ def finish(self, job, update_dynamic=True):
self.pull_container_imgs()
if self.workflow.use_conda:
self.create_conda_envs()
if self.workflow.use_spack:
self.create_spack_envs()
potential_new_ready_jobs = True

return potential_new_ready_jobs
Expand Down Expand Up @@ -1989,7 +2014,10 @@ def archive(self, path):
if os.path.exists(path):
raise WorkflowError("Archive already exists:\n" + path)

self.create_conda_envs(forceall=True)
if self.workflow.use_spack:
self.create_spack_envs(forceall=True)
else:
self.create_conda_envs(forceall=True)

try:
workdir = Path(os.path.abspath(os.getcwd()))
Expand Down Expand Up @@ -2026,12 +2054,15 @@ def add(path):
# this is an input file that is not created by any job
add(f)

logger.info("Archiving conda environments...")
logger.info("Archiving environments...")
envs = set()
for job in self.jobs:
if job.conda_env_file:
env_archive = job.archive_conda_env()
envs.add(env_archive)
elif job.spack_env_file:
env_archive = job.archive_spack_env()
envs.add(env_archive)
for env in envs:
add(env)

Expand Down

0 comments on commit f7a5022

Please sign in to comment.