diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 4f8301d01..ccde92195 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,8 +43,7 @@ jobs: testing: runs-on: ubuntu-latest - # TODO comment in once slurm works again - # needs: formatting + needs: formatting services: mysql: image: mysql:8.0 @@ -169,8 +168,12 @@ jobs: shell: bash -el {0} run: | conda config --set channel_priority strict + mamba env update -q -n snakemake --file test-environment.yml + # TODO remove and add as regular dependency once released + pip install git+https://github.com/snakemake/snakemake-interface-executor-plugins.git + # additionally add singularity # TODO remove version constraint: needed because 3.8.7 fails with missing libz: @@ -310,6 +313,9 @@ jobs: run: | conda config --set channel_priority strict mamba env update -q --file test-environment.yml + + # TODO remove and add as regular dependency once released + pip install git+https://github.com/snakemake/snakemake-interface-executor-plugins.git - name: Run tests env: CI: true diff --git a/.github/workflows/test-flux.yaml b/.github/workflows/test-flux.yaml index 3c24c5f4b..2bc5b77aa 100644 --- a/.github/workflows/test-flux.yaml +++ b/.github/workflows/test-flux.yaml @@ -40,6 +40,8 @@ jobs: run: | conda config --set channel_priority strict mamba install python>=3.9 pip + # TODO remove and add as regular dependency once released + pip install git+https://github.com/snakemake/snakemake-interface-executor-plugins.git pip install . - name: Start Flux and Test Workflow diff --git a/docs/project_info/contributing.rst b/docs/project_info/contributing.rst index 1d9a1026d..27c9ffe07 100644 --- a/docs/project_info/contributing.rst +++ b/docs/project_info/contributing.rst @@ -57,7 +57,6 @@ Below you find a skeleton quiet=False, printshellcmds=False, latency_wait=3, - cluster_config=None, local_input=None, restart_times=None, exec_job=None, @@ -70,7 +69,6 @@ Below you find a skeleton quiet=quiet, printshellcmds=printshellcmds, latency_wait=latency_wait, - cluster_config=cluster_config, local_input=local_input, restart_times=restart_times, assume_shared_fs=False, # if your executor relies on a shared file system, set this to True diff --git a/docs/snakefiles/configuration.rst b/docs/snakefiles/configuration.rst index e0e214149..4c3492dc6 100644 --- a/docs/snakefiles/configuration.rst +++ b/docs/snakefiles/configuration.rst @@ -206,91 +206,6 @@ Validating PEPs Using the ``pepschema`` directive leads to an automatic parsing of the provided schema *and* PEP validation with the PEP validation tool -- `eido `_. Eido schemas extend `JSON Schema `_ vocabulary to accommodate the powerful PEP features. Follow the `How to write a PEP schema `_ guide to learn more. -.. _snakefiles-cluster_configuration: - ----------------------------------- -Cluster Configuration (deprecated) ----------------------------------- - -While still being possible, **cluster configuration has been deprecated** by the introduction of :ref:`profiles`. - -Snakemake supports a separate configuration file for execution on a cluster. -A cluster config file allows you to specify cluster submission parameters outside the Snakefile. -The cluster config is a JSON- or YAML-formatted file that contains objects that match names of rules in the Snakefile. -The parameters in the cluster config are then accessed by the ``cluster.*`` wildcard when you are submitting jobs. -Note that a workflow shall never depend on a cluster configuration, because this would limit its portability. -Therefore, it is also not intended to access the cluster configuration from **within** the workflow. - -For example, say that you have the following Snakefile: - -.. code-block:: python - - rule all: - input: "input1.txt", "input2.txt" - - rule compute1: - output: "input1.txt" - shell: "touch input1.txt" - - rule compute2: - output: "input2.txt" - shell: "touch input2.txt" - -This Snakefile can then be configured by a corresponding cluster config, say "cluster.json": - - -.. code-block:: json - - { - "__default__" : - { - "account" : "my account", - "time" : "00:15:00", - "n" : 1, - "partition" : "core" - }, - "compute1" : - { - "time" : "00:20:00" - } - } - -Any string in the cluster configuration can be formatted in the same way as shell commands, e.g. ``{rule}.{wildcards.sample}`` is formatted to ``a.xy`` if the rulename is ``a`` and the wildcard value is ``xy``. -Here ``__default__`` is a special object that specifies default parameters, these will be inherited by the other configuration objects. The ``compute1`` object here changes the ``time`` parameter, but keeps the other parameters from ``__default__``. The rule ``compute2`` does not have any configuration, and will therefore use the default configuration. You can then run the Snakefile with the following command on a SLURM system. - -.. code-block:: console - - $ snakemake -j 999 --cluster-config cluster.json --cluster "sbatch -A {cluster.account} -p {cluster.partition} -n {cluster.n} -t {cluster.time}" - - -For cluster systems using LSF/BSUB, a cluster config may look like this: - -.. code-block:: json - - { - "__default__" : - { - "queue" : "medium_priority", - "nCPUs" : "16", - "memory" : 20000, - "resources" : "\"select[mem>20000] rusage[mem=20000] span[hosts=1]\"", - "name" : "JOBNAME.{rule}.{wildcards}", - "output" : "logs/cluster/{rule}.{wildcards}.out", - "error" : "logs/cluster/{rule}.{wildcards}.err" - }, - - - "trimming_PE" : - { - "memory" : 30000, - "resources" : "\"select[mem>30000] rusage[mem=30000] span[hosts=1]\"", - } - } - -The advantage of this setup is that it is already pretty general by exploiting the wildcard possibilities that Snakemake provides via ``{rule}`` and ``{wildcards}``. So job names, output and error files all have reasonable and trackable default names, only the directies (``logs/cluster``) and job names (``JOBNAME``) have to adjusted accordingly. -If a rule named ``bamCoverage`` is executed with the wildcard ``basename = sample1``, for example, the output and error files will be ``bamCoverage.basename=sample1.out`` and ``bamCoverage.basename=sample1.err``, respectively. - - --------------------------- Configure Working Directory --------------------------- @@ -302,3 +217,12 @@ All paths in the snakefile are interpreted relative to the directory snakemake i workdir: "path/to/workdir" Usually, it is preferred to only set the working directory via the command line, because above directive limits the portability of Snakemake workflows. + + +.. _snakefiles-cluster_configuration: + +--------------------------------------------- +Cluster Configuration (not supported anymore) +--------------------------------------------- + +The previously supported cluster configuration has been replaced by configuration profiles (see :ref:`profiles`). diff --git a/docs/tutorial/additional_features.rst b/docs/tutorial/additional_features.rst index 68adeb64d..940c8e3de 100644 --- a/docs/tutorial/additional_features.rst +++ b/docs/tutorial/additional_features.rst @@ -77,8 +77,9 @@ For this, Snakemake provides the ``include`` directive to include another Snakef .. code:: python - include: "path/to/other.snakefile" + include: "path/to/other.smk" +As can be seen, the default file extensions for snakefiles other than the main snakefile is ``.smk``. Alternatively, Snakemake allows to **define sub-workflows**. A sub-workflow refers to a working directory with a complete Snakemake workflow. Output files of that sub-workflow can be used in the current Snakefile. @@ -235,7 +236,7 @@ The **DRMAA support** can be activated by invoking Snakemake as follows: $ snakemake --drmaa --jobs 100 If available, **DRMAA is preferable over the generic cluster modes** because it provides better control and error handling. -To support additional cluster specific parametrization, a Snakefile can be complemented by a :ref:`snakefiles-cluster_configuration` file. +To support additional cluster specific parametrization, a Snakefile can be complemented by a workflow specific profile (see :ref:`profiles`). Using --cluster-status :::::::::::::::::::::: diff --git a/setup.cfg b/setup.cfg index 114889047..5fa20aa34 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,6 +50,7 @@ install_requires = requests reretry smart_open >=3.0 + snakemake-interface-executor-plugins stopit tabulate throttler @@ -78,8 +79,8 @@ reports = pygments [options.entry_points] console_scripts = - snakemake = snakemake:main - snakemake-bash-completion = snakemake:bash_completion + snakemake = snakemake.cli:main + snakemake-bash-completion = snakemake.cli:bash_completion [options.packages.find] include = snakemake, snakemake.* diff --git a/snakemake/__init__.py b/snakemake/__init__.py index f6bb8198e..7db1142ae 100644 --- a/snakemake/__init__.py +++ b/snakemake/__init__.py @@ -1,3353 +1,9 @@ __author__ = "Johannes Köster" -__copyright__ = "Copyright 2022, Johannes Köster" -__email__ = "johannes.koester@uni-due.de" +__copyright__ = "Copyright 2023, Johannes Köster" +__email__ = "johannes.koester@protonmail.com" __license__ = "MIT" -import sys -from snakemake.common import MIN_PY_VERSION +from snakemake.common import __version__ -if sys.version_info < MIN_PY_VERSION: - raise ValueError( - f"Snakemake requires at least Python {MIN_PY_VERSION}. Please ensure to execute it in a compatible Python environment.", - ) - -import os -import glob -from argparse import ArgumentDefaultsHelpFormatter -import logging as _logging -from pathlib import Path -import re -import threading -import webbrowser -from functools import partial -import importlib -import shlex -from importlib.machinery import SourceFileLoader -from snakemake.executors.common import url_can_parse -from snakemake.target_jobs import parse_target_jobs_cli_args -from snakemake.executors.common import url_can_parse - -from snakemake.workflow import Workflow -from snakemake.dag import Batch -from snakemake.exceptions import ( - CliException, - ResourceScopesException, - print_exception, - WorkflowError, -) -from snakemake.logging import setup_logger, logger, SlackLogger, WMSLogger -from snakemake.io import load_configfile, wait_for_files +# Reexports that are part of the public API: from snakemake.shell import shell -from snakemake.utils import update_config, available_cpu_count -from snakemake.common import ( - Mode, - __version__, - MIN_PY_VERSION, - get_appdirs, - dict_to_key_value_args, - parse_key_value_arg, -) -from snakemake.resources import ResourceScopes, parse_resources, DefaultResources - - -SNAKEFILE_CHOICES = [ - "Snakefile", - "snakefile", - "workflow/Snakefile", - "workflow/snakefile", -] - -RERUN_TRIGGERS = ["mtime", "params", "input", "software-env", "code"] - - -def snakemake( - snakefile, - batch=None, - cache=None, - report=None, - report_stylesheet=None, - containerize=False, - lint=None, - generate_unit_tests=None, - listrules=False, - list_target_rules=False, - cores=1, - nodes=None, - local_cores=1, - max_threads=None, - resources=dict(), - overwrite_threads=None, - overwrite_scatter=None, - overwrite_resource_scopes=None, - default_resources=None, - overwrite_resources=None, - config=dict(), - configfiles=None, - config_args=None, - workdir=None, - targets=None, - target_jobs=None, - dryrun=False, - touch=False, - forcetargets=False, - forceall=False, - forcerun=[], - until=[], - omit_from=[], - prioritytargets=[], - stats=None, - printreason=True, - printshellcmds=False, - debug_dag=False, - printdag=False, - printrulegraph=False, - printfilegraph=False, - printd3dag=False, - nocolor=False, - quiet=False, - keepgoing=False, - slurm=None, - slurm_jobstep=None, - rerun_triggers=RERUN_TRIGGERS, - cluster=None, - cluster_config=None, - cluster_sync=None, - drmaa=None, - drmaa_log_dir=None, - jobname="snakejob.{rulename}.{jobid}.sh", - immediate_submit=False, - standalone=False, - ignore_ambiguity=False, - snakemakepath=None, - lock=True, - unlock=False, - cleanup_metadata=None, - conda_cleanup_envs=False, - cleanup_shadow=False, - cleanup_scripts=True, - cleanup_containers=False, - force_incomplete=False, - ignore_incomplete=False, - list_version_changes=False, - list_code_changes=False, - list_input_changes=False, - list_params_changes=False, - list_untracked=False, - list_resources=False, - summary=False, - archive=None, - delete_all_output=False, - delete_temp_output=False, - detailed_summary=False, - latency_wait=3, - wait_for_files=None, - print_compilation=False, - debug=False, - notemp=False, - all_temp=False, - keep_remote_local=False, - nodeps=False, - keep_target_files=False, - allowed_rules=None, - jobscript=None, - greediness=None, - no_hooks=False, - overwrite_shellcmd=None, - updated_files=None, - log_handler=[], - keep_logger=False, - max_jobs_per_second=None, - max_status_checks_per_second=100, - restart_times=0, - attempt=1, - verbose=False, - force_use_threads=False, - use_conda=False, - use_singularity=False, - use_env_modules=False, - singularity_args="", - conda_frontend="conda", - conda_prefix=None, - conda_cleanup_pkgs=None, - list_conda_envs=False, - singularity_prefix=None, - shadow_prefix=None, - scheduler="ilp", - scheduler_ilp_solver=None, - conda_create_envs_only=False, - mode=Mode.default, - wrapper_prefix=None, - kubernetes=None, - container_image=None, - k8s_cpu_scalar=1.0, - k8s_service_account_name=None, - flux=False, - tibanna=False, - tibanna_sfn=None, - az_batch=False, - az_batch_enable_autoscale=False, - az_batch_account_url=None, - google_lifesciences=False, - google_lifesciences_regions=None, - google_lifesciences_location=None, - google_lifesciences_cache=False, - google_lifesciences_service_account_email=None, - google_lifesciences_network=None, - google_lifesciences_subnetwork=None, - tes=None, - preemption_default=None, - preemptible_rules=None, - precommand="", - default_remote_provider=None, - default_remote_prefix="", - tibanna_config=False, - assume_shared_fs=True, - cluster_status=None, - cluster_cancel=None, - cluster_cancel_nargs=None, - cluster_sidecar=None, - export_cwl=None, - show_failed_logs=False, - keep_incomplete=False, - keep_metadata=True, - messaging=None, - edit_notebook=None, - envvars=None, - overwrite_groups=None, - group_components=None, - max_inventory_wait_time=20, - execute_subworkflows=True, - conda_not_block_search_path_envvars=False, - scheduler_solver_path=None, - conda_base_path=None, - local_groupid="local", -): - """Run snakemake on a given snakefile. - - This function provides access to the whole snakemake functionality. It is not thread-safe. - - Args: - snakefile (str): the path to the snakefile - batch (Batch): whether to compute only a partial DAG, defined by the given Batch object (default None) - report (str): create an HTML report for a previous run at the given path - lint (str): print lints instead of executing (None, "plain" or "json", default None) - listrules (bool): list rules (default False) - list_target_rules (bool): list target rules (default False) - cores (int): the number of provided cores (ignored when using cluster support) (default 1) - nodes (int): the number of provided cluster nodes (ignored without cluster support) (default 1) - local_cores (int): the number of provided local cores if in cluster mode (ignored without cluster support) (default 1) - resources (dict): provided resources, a dictionary assigning integers to resource names, e.g. {gpu=1, io=5} (default {}) - default_resources (DefaultResources): default values for resources not defined in rules (default None) - config (dict): override values for workflow config - workdir (str): path to the working directory (default None) - targets (list): list of targets, e.g. rule or file names (default None) - target_jobs (dict): list of snakemake.target_jobs.TargetSpec objects directly targeting specific jobs (default None) - dryrun (bool): only dry-run the workflow (default False) - touch (bool): only touch all output files if present (default False) - forcetargets (bool): force given targets to be re-created (default False) - forceall (bool): force all output files to be re-created (default False) - forcerun (list): list of files and rules that shall be re-created/re-executed (default []) - execute_subworkflows (bool): execute subworkflows if present (default True) - prioritytargets (list): list of targets that shall be run with maximum priority (default []) - stats (str): path to file that shall contain stats about the workflow execution (default None) - printreason (bool): print the reason for the execution of each job (default false) - printshellcmds (bool): print the shell command of each job (default False) - printdag (bool): print the dag in the graphviz dot language (default False) - printrulegraph (bool): print the graph of rules in the graphviz dot language (default False) - printfilegraph (bool): print the graph of rules with their input and output files in the graphviz dot language (default False) - printd3dag (bool): print a D3.js compatible JSON representation of the DAG (default False) - nocolor (bool): do not print colored output (default False) - quiet (bool): do not print any default job information (default False) - keepgoing (bool): keep going upon errors (default False) - cluster (str): submission command of a cluster or batch system to use, e.g. qsub (default None) - cluster_config (str,list): configuration file for cluster options, or list thereof (default None) - cluster_sync (str): blocking cluster submission command (like SGE 'qsub -sync y') (default None) - drmaa (str): if not None use DRMAA for cluster support, str specifies native args passed to the cluster when submitting a job - drmaa_log_dir (str): the path to stdout and stderr output of DRMAA jobs (default None) - jobname (str): naming scheme for cluster job scripts (default "snakejob.{rulename}.{jobid}.sh") - immediate_submit (bool): immediately submit all cluster jobs, regardless of dependencies (default False) - standalone (bool): kill all processes very rudely in case of failure (do not use this if you use this API) (default False) (deprecated) - ignore_ambiguity (bool): ignore ambiguous rules and always take the first possible one (default False) - snakemakepath (str): deprecated parameter whose value is ignored. Do not use. - lock (bool): lock the working directory when executing the workflow (default True) - unlock (bool): just unlock the working directory (default False) - cleanup_metadata (list): just cleanup metadata of given list of output files (default None) - drop_metadata (bool): drop metadata file tracking information after job finishes (--report and --list_x_changes information will be incomplete) (default False) - conda_cleanup_envs (bool): just cleanup unused conda environments (default False) - cleanup_shadow (bool): just cleanup old shadow directories (default False) - cleanup_scripts (bool): delete wrapper scripts used for execution (default True) - cleanup_containers (bool): delete unused (singularity) containers (default False) - force_incomplete (bool): force the re-creation of incomplete files (default False) - ignore_incomplete (bool): ignore incomplete files (default False) - list_version_changes (bool): list output files with changed rule version (default False) - list_code_changes (bool): list output files with changed rule code (default False) - list_input_changes (bool): list output files with changed input files (default False) - list_params_changes (bool): list output files with changed params (default False) - list_untracked (bool): list files in the workdir that are not used in the workflow (default False) - summary (bool): list summary of all output files and their status (default False) - archive (str): archive workflow into the given tarball - delete_all_output (bool): remove all files generated by the workflow (default False) - delete_temp_output (bool): remove all temporary files generated by the workflow (default False) - latency_wait (int): how many seconds to wait for an output file to appear after the execution of a job, e.g. to handle filesystem latency (default 3) - wait_for_files (list): wait for given files to be present before executing the workflow - list_resources (bool): list resources used in the workflow (default False) - summary (bool): list summary of all output files and their status (default False). If no option is specified a basic summary will be output. If 'detailed' is added as an option e.g --summary detailed, extra info about the input and shell commands will be included - detailed_summary (bool): list summary of all input and output files and their status (default False) - print_compilation (bool): print the compilation of the snakefile (default False) - debug (bool): allow to use the debugger within rules - notemp (bool): ignore temp file flags, e.g. do not delete output files marked as a temp after use (default False) - keep_remote_local (bool): keep local copies of remote files (default False) - nodeps (bool): ignore dependencies (default False) - keep_target_files (bool): do not adjust the paths of given target files relative to the working directory. - allowed_rules (set): restrict allowed rules to the given set. If None or empty, all rules are used. - jobscript (str): path to a custom shell script template for cluster jobs (default None) - greediness (float): set the greediness of scheduling. This value between 0 and 1 determines how careful jobs are selected for execution. The default value (0.5 if prioritytargets are used, 1.0 else) provides the best speed and still acceptable scheduling quality. - overwrite_shellcmd (str): a shell command that shall be executed instead of those given in the workflow. This is for debugging purposes only. - updated_files(list): a list that will be filled with the files that are updated or created during the workflow execution - verbose (bool): show additional debug output (default False) - max_jobs_per_second (int): maximal number of cluster/drmaa jobs per second, None to impose no limit (default None) - restart_times (int): number of times to restart failing jobs (default 0) - attempt (int): initial value of Job.attempt. This is intended for internal use only (default 1). - force_use_threads: whether to force the use of threads over processes. helpful if shared memory is full or unavailable (default False) - use_conda (bool): use conda environments for each job (defined with conda directive of rules) - use_singularity (bool): run jobs in singularity containers (if defined with singularity directive) - use_env_modules (bool): load environment modules if defined in rules - singularity_args (str): additional arguments to pass to a singularity - conda_prefix (str): the directory in which conda environments will be created (default None) - conda_cleanup_pkgs (snakemake.deployment.conda.CondaCleanupMode): - whether to clean up conda tarballs after env creation (default None), valid values: "tarballs", "cache" - singularity_prefix (str): the directory to which singularity images will be pulled (default None) - shadow_prefix (str): prefix for shadow directories. The job-specific shadow directories will be created in $SHADOW_PREFIX/shadow/ (default None) - conda_create_envs_only (bool): if specified, only builds the conda environments specified for each job, then exits. - list_conda_envs (bool): list conda environments and their location on disk. - mode (snakemake.common.Mode): execution mode - wrapper_prefix (str): prefix for wrapper script URLs (default None) - kubernetes (str): submit jobs to Kubernetes, using the given namespace. - container_image (str): Docker image to use, e.g., for Kubernetes. - k8s_cpu_scalar (float): What proportion of each k8s node's CPUs are availabe to snakemake? - k8s_service_account_name (str): Custom k8s service account, needed for workload identity. - flux (bool): Launch workflow to flux cluster. - default_remote_provider (str): default remote provider to use instead of local files (e.g. S3, GS) - default_remote_prefix (str): prefix for default remote provider (e.g. name of the bucket). - tibanna (bool): submit jobs to AWS cloud using Tibanna. - tibanna_sfn (str): Step function (Unicorn) name of Tibanna (e.g. tibanna_unicorn_monty). This must be deployed first using tibanna cli. - az_batch (bool): Submit jobs to azure batch. - az_batch_enable_autoscale (bool): Enable autoscaling of the azure batch pool nodes. This sets the initial dedicated node pool count to zero and resizes the pool only after 5 minutes. So this flag is only recommended for relatively long running jobs., - az_batch_account_url (str): Azure batch account url. - google_lifesciences (bool): submit jobs to Google Cloud Life Sciences (pipelines API). - google_lifesciences_regions (list): a list of regions (e.g., us-east1) - google_lifesciences_location (str): Life Sciences API location (e.g., us-central1) - google_lifesciences_cache (bool): save a cache of the compressed working directories in Google Cloud Storage for later usage. - google_lifesciences_service_account_email (str): Service account to install on Google pipelines API VM instance. - google_lifesciences_network (str): Network name for Google VM instances. - google_lifesciences_subnetwork (str): Subnetwork name for Google VM instances. - tes (str): Execute workflow tasks on GA4GH TES server given by URL. - precommand (str): commands to run on AWS cloud before the snakemake command (e.g. wget, git clone, unzip, etc). Use with --tibanna. - preemption_default (int): set a default number of preemptible instance retries (for Google Life Sciences executor only) - preemptible_rules (list): define custom preemptible instance retries for specific rules (for Google Life Sciences executor only) - tibanna_config (list): Additional tibanna config e.g. --tibanna-config spot_instance=true subnet= security group= - assume_shared_fs (bool): assume that cluster nodes share a common filesystem (default true). - cluster_status (str): status command for cluster execution. If None, Snakemake will rely on flag files. Otherwise, it expects the command to return "success", "failure" or "running" when executing with a cluster jobid as a single argument. - cluster_cancel (str): command to cancel multiple job IDs (like SLURM 'scancel') (default None) - cluster_cancel_nargs (int): maximal number of job ids to pass to cluster_cancel (default 1000) - cluster_sidecar (str): command that starts a sidecar process, see cluster documentation (default None) - export_cwl (str): Compile workflow to CWL and save to given file - log_handler (function): redirect snakemake output to this custom log handler, a function that takes a log message dictionary (see below) as its only argument (default None). The log message dictionary for the log handler has to following entries: - keep_incomplete (bool): keep incomplete output files of failed jobs - edit_notebook (object): "notebook.EditMode" object to configure notebook server for interactive editing of a rule notebook. If None, do not edit. - scheduler (str): Select scheduling algorithm (default ilp) - scheduler_ilp_solver (str): Set solver for ilp scheduler. - overwrite_groups (dict): Rule to group assignments (default None) - group_components (dict): Number of connected components given groups shall span before being split up (1 by default if empty) - conda_not_block_search_path_envvars (bool): Do not block search path envvars (R_LIBS, PYTHONPATH, ...) when using conda environments. - scheduler_solver_path (str): Path to Snakemake environment (this can be used to e.g. overwrite the search path for the ILP solver used during scheduling). - conda_base_path (str): Path to conda base environment (this can be used to overwrite the search path for conda, mamba, and activate). - local_groupid (str): Local groupid to use as a placeholder for groupid-referrring input functions of local jobs (internal use only, default: local). - log_handler (list): redirect snakemake output to this list of custom log handlers, each a function that takes a log message dictionary (see below) as its only argument (default []). The log message dictionary for the log handler has to following entries: - - :level: - the log level ("info", "error", "debug", "progress", "job_info") - - :level="info", "error" or "debug": - :msg: - the log message - :level="progress": - :done: - number of already executed jobs - - :total: - number of total jobs - - :level="job_info": - :input: - list of input files of a job - - :output: - list of output files of a job - - :log: - path to log file of a job - - :local: - whether a job is executed locally (i.e. ignoring cluster) - - :msg: - the job message - - :reason: - the job reason - - :priority: - the job priority - - :threads: - the threads of the job - - - Returns: - bool: True if workflow execution was successful. - - """ - assert not immediate_submit or ( - immediate_submit and notemp - ), "immediate_submit has to be combined with notemp (it does not support temp file handling)" - - if tibanna: - assume_shared_fs = False - default_remote_provider = "S3" - default_remote_prefix = default_remote_prefix.rstrip("/") - assert ( - default_remote_prefix - ), "default_remote_prefix needed if tibanna is specified" - assert tibanna_sfn, "tibanna_sfn needed if tibanna is specified" - if tibanna_config: - tibanna_config_dict = dict() - for cf in tibanna_config: - k, v = cf.split("=") - if v == "true": - v = True - elif v == "false": - v = False - elif v.isnumeric(): - v = int(v) - else: - try: - v = float(v) - except ValueError: - pass - tibanna_config_dict.update({k: v}) - tibanna_config = tibanna_config_dict - - # Azure batch uses compute engine and storage - if az_batch: - assume_shared_fs = False - default_remote_provider = "AzBlob" - - # Google Cloud Life Sciences API uses compute engine and storage - if google_lifesciences: - assume_shared_fs = False - default_remote_provider = "GS" - default_remote_prefix = default_remote_prefix.rstrip("/") - if kubernetes: - assume_shared_fs = False - - # Currently preemptible instances only supported for Google LifeSciences Executor - if preemption_default or preemptible_rules and not google_lifesciences: - logger.warning( - "Preemptible instances are only available for the Google Life Sciences Executor." - ) - - if updated_files is None: - updated_files = list() - - if isinstance(cluster_config, str): - # Loading configuration from one file is still supported for - # backward compatibility - cluster_config = [cluster_config] - if cluster_config: - # Load all configuration files - configs = [load_configfile(f) for f in cluster_config] - # Merge in the order as specified, overriding earlier values with - # later ones - cluster_config_content = configs[0] - for other in configs[1:]: - update_config(cluster_config_content, other) - else: - cluster_config_content = dict() - - run_local = not ( - cluster - or cluster_sync - or drmaa - or kubernetes - or tibanna - or az_batch - or google_lifesciences - or tes - or slurm - or slurm_jobstep - ) - if run_local: - if not dryrun: - # clean up all previously recorded jobids. - shell.cleanup() - else: - if default_resources is None: - # use full default resources if in cluster or cloud mode - default_resources = DefaultResources(mode="full") - if edit_notebook: - raise WorkflowError( - "Notebook edit mode is only allowed with local execution." - ) - - shell.conda_block_conflicting_envvars = not conda_not_block_search_path_envvars - - # force thread use for any kind of cluster - use_threads = ( - force_use_threads - or (os.name not in ["posix", "nt"]) - or cluster - or cluster_sync - or drmaa - ) - - if not keep_logger: - stdout = ( - ( - dryrun - and not (printdag or printd3dag or printrulegraph or printfilegraph) - ) - or listrules - or list_target_rules - or list_resources - ) - - setup_logger( - handler=log_handler, - quiet=quiet, - printreason=printreason, - printshellcmds=printshellcmds, - debug_dag=debug_dag, - nocolor=nocolor, - stdout=stdout, - debug=verbose, - use_threads=use_threads, - mode=mode, - show_failed_logs=show_failed_logs, - dryrun=dryrun, - ) - - if greediness is None: - greediness = 0.5 if prioritytargets else 1.0 - else: - if not (0 <= greediness <= 1.0): - logger.error("Error: greediness must be a float between 0 and 1.") - return False - - if not os.path.exists(snakefile): - logger.error(f'Error: Snakefile "{snakefile}" not found.') - return False - snakefile = os.path.abspath(snakefile) - - cluster_mode = ( - (cluster is not None) + (cluster_sync is not None) + (drmaa is not None) - ) - if cluster_mode > 1: - logger.error("Error: cluster and drmaa args are mutually exclusive") - return False - - if debug and (cluster_mode or cores is not None and cores > 1): - logger.error( - "Error: debug mode cannot be used with more than one core or cluster execution." - ) - return False - - overwrite_config = dict() - if configfiles is None: - configfiles = [] - for f in configfiles: - # get values to override. Later configfiles override earlier ones. - update_config(overwrite_config, load_configfile(f)) - # convert provided paths to absolute paths - configfiles = list(map(os.path.abspath, configfiles)) - - # directly specified elements override any configfiles - if config: - update_config(overwrite_config, config) - if config_args is None: - config_args = dict_to_key_value_args(config) - - if workdir: - olddir = os.getcwd() - if not os.path.exists(workdir): - logger.info(f"Creating specified working directory {workdir}.") - os.makedirs(workdir) - workdir = os.path.abspath(workdir) - os.chdir(workdir) - - logger.setup_logfile() - - try: - # handle default remote provider - _default_remote_provider = None - if default_remote_provider is not None: - try: - rmt = importlib.import_module( - "snakemake.remote." + default_remote_provider - ) - except ImportError as e: - raise WorkflowError("Unknown default remote provider.") - if rmt.RemoteProvider.supports_default: - _default_remote_provider = rmt.RemoteProvider( - keep_local=keep_remote_local, is_default=True - ) - else: - raise WorkflowError( - "Remote provider {} does not (yet) support to " - "be used as default provider." - ) - - workflow = Workflow( - snakefile=snakefile, - rerun_triggers=rerun_triggers, - jobscript=jobscript, - overwrite_shellcmd=overwrite_shellcmd, - overwrite_config=overwrite_config, - overwrite_workdir=workdir, - overwrite_configfiles=configfiles, - overwrite_clusterconfig=cluster_config_content, - overwrite_threads=overwrite_threads, - max_threads=max_threads, - overwrite_scatter=overwrite_scatter, - overwrite_groups=overwrite_groups, - overwrite_resources=overwrite_resources, - overwrite_resource_scopes=overwrite_resource_scopes, - group_components=group_components, - config_args=config_args, - debug=debug, - verbose=verbose, - use_conda=use_conda or list_conda_envs or conda_cleanup_envs, - use_singularity=use_singularity, - use_env_modules=use_env_modules, - conda_frontend=conda_frontend, - conda_prefix=conda_prefix, - conda_cleanup_pkgs=conda_cleanup_pkgs, - singularity_prefix=singularity_prefix, - shadow_prefix=shadow_prefix, - singularity_args=singularity_args, - scheduler_type=scheduler, - scheduler_ilp_solver=scheduler_ilp_solver, - mode=mode, - wrapper_prefix=wrapper_prefix, - printshellcmds=printshellcmds, - restart_times=restart_times, - attempt=attempt, - default_remote_provider=_default_remote_provider, - default_remote_prefix=default_remote_prefix, - run_local=run_local, - assume_shared_fs=assume_shared_fs, - default_resources=default_resources, - cache=cache, - cores=cores, - nodes=nodes, - resources=resources, - edit_notebook=edit_notebook, - envvars=envvars, - max_inventory_wait_time=max_inventory_wait_time, - conda_not_block_search_path_envvars=conda_not_block_search_path_envvars, - execute_subworkflows=execute_subworkflows, - scheduler_solver_path=scheduler_solver_path, - conda_base_path=conda_base_path, - check_envvars=not lint, # for linting, we do not need to check whether requested envvars exist - all_temp=all_temp, - local_groupid=local_groupid, - keep_metadata=keep_metadata, - latency_wait=latency_wait, - cleanup_scripts=cleanup_scripts, - immediate_submit=immediate_submit, - ) - success = True - - workflow.include( - snakefile, - overwrite_default_target=True, - print_compilation=print_compilation, - ) - workflow.check() - - if not print_compilation: - if lint: - success = not workflow.lint(json=lint == "json") - elif listrules: - workflow.list_rules() - elif list_target_rules: - workflow.list_rules(only_targets=True) - elif list_resources: - workflow.list_resources() - else: - # if not printdag and not printrulegraph: - # handle subworkflows - subsnakemake = partial( - snakemake, - local_cores=local_cores, - max_threads=max_threads, - cache=cache, - overwrite_threads=overwrite_threads, - overwrite_scatter=overwrite_scatter, - overwrite_resources=overwrite_resources, - overwrite_resource_scopes=overwrite_resource_scopes, - default_resources=default_resources, - dryrun=dryrun, - touch=touch, - printreason=printreason, - printshellcmds=printshellcmds, - debug_dag=debug_dag, - nocolor=nocolor, - quiet=quiet, - keepgoing=keepgoing, - cluster=cluster, - cluster_sync=cluster_sync, - drmaa=drmaa, - drmaa_log_dir=drmaa_log_dir, - jobname=jobname, - immediate_submit=immediate_submit, - standalone=standalone, - ignore_ambiguity=ignore_ambiguity, - restart_times=restart_times, - attempt=attempt, - lock=lock, - unlock=unlock, - cleanup_metadata=cleanup_metadata, - conda_cleanup_envs=conda_cleanup_envs, - cleanup_containers=cleanup_containers, - cleanup_shadow=cleanup_shadow, - cleanup_scripts=cleanup_scripts, - force_incomplete=force_incomplete, - ignore_incomplete=ignore_incomplete, - latency_wait=latency_wait, - verbose=verbose, - notemp=notemp, - all_temp=all_temp, - keep_remote_local=keep_remote_local, - nodeps=nodeps, - jobscript=jobscript, - greediness=greediness, - no_hooks=no_hooks, - overwrite_shellcmd=overwrite_shellcmd, - config=config, - config_args=config_args, - cluster_config=cluster_config, - keep_logger=True, - force_use_threads=use_threads, - use_conda=use_conda, - use_singularity=use_singularity, - use_env_modules=use_env_modules, - conda_prefix=conda_prefix, - conda_cleanup_pkgs=conda_cleanup_pkgs, - conda_frontend=conda_frontend, - singularity_prefix=singularity_prefix, - shadow_prefix=shadow_prefix, - singularity_args=singularity_args, - scheduler=scheduler, - scheduler_ilp_solver=scheduler_ilp_solver, - list_conda_envs=list_conda_envs, - kubernetes=kubernetes, - container_image=container_image, - k8s_cpu_scalar=k8s_cpu_scalar, - k8s_service_account_name=k8s_service_account_name, - conda_create_envs_only=conda_create_envs_only, - default_remote_provider=default_remote_provider, - default_remote_prefix=default_remote_prefix, - tibanna=tibanna, - tibanna_sfn=tibanna_sfn, - az_batch=az_batch, - az_batch_enable_autoscale=az_batch_enable_autoscale, - az_batch_account_url=az_batch_account_url, - google_lifesciences=google_lifesciences, - google_lifesciences_regions=google_lifesciences_regions, - google_lifesciences_location=google_lifesciences_location, - google_lifesciences_cache=google_lifesciences_cache, - google_lifesciences_service_account_email=google_lifesciences_service_account_email, - google_lifesciences_network=google_lifesciences_network, - google_lifesciences_subnetwork=google_lifesciences_subnetwork, - flux=flux, - tes=tes, - precommand=precommand, - preemption_default=preemption_default, - preemptible_rules=preemptible_rules, - tibanna_config=tibanna_config, - assume_shared_fs=assume_shared_fs, - cluster_status=cluster_status, - cluster_cancel=cluster_cancel, - cluster_cancel_nargs=cluster_cancel_nargs, - cluster_sidecar=cluster_sidecar, - max_jobs_per_second=max_jobs_per_second, - max_status_checks_per_second=max_status_checks_per_second, - overwrite_groups=overwrite_groups, - group_components=group_components, - max_inventory_wait_time=max_inventory_wait_time, - conda_not_block_search_path_envvars=conda_not_block_search_path_envvars, - local_groupid=local_groupid, - ) - success = workflow.execute( - targets=targets, - target_jobs=target_jobs, - dryrun=dryrun, - generate_unit_tests=generate_unit_tests, - touch=touch, - scheduler_type=scheduler, - scheduler_ilp_solver=scheduler_ilp_solver, - local_cores=local_cores, - forcetargets=forcetargets, - forceall=forceall, - forcerun=forcerun, - prioritytargets=prioritytargets, - until=until, - omit_from=omit_from, - quiet=quiet, - keepgoing=keepgoing, - printshellcmds=printshellcmds, - printreason=printreason, - printrulegraph=printrulegraph, - printfilegraph=printfilegraph, - printdag=printdag, - slurm=slurm, - slurm_jobstep=slurm_jobstep, - cluster=cluster, - cluster_sync=cluster_sync, - jobname=jobname, - drmaa=drmaa, - drmaa_log_dir=drmaa_log_dir, - kubernetes=kubernetes, - container_image=container_image, - k8s_cpu_scalar=k8s_cpu_scalar, - k8s_service_account_name=k8s_service_account_name, - tibanna=tibanna, - tibanna_sfn=tibanna_sfn, - az_batch=az_batch, - az_batch_enable_autoscale=az_batch_enable_autoscale, - az_batch_account_url=az_batch_account_url, - google_lifesciences=google_lifesciences, - google_lifesciences_regions=google_lifesciences_regions, - google_lifesciences_location=google_lifesciences_location, - google_lifesciences_cache=google_lifesciences_cache, - google_lifesciences_service_account_email=google_lifesciences_service_account_email, - google_lifesciences_network=google_lifesciences_network, - google_lifesciences_subnetwork=google_lifesciences_subnetwork, - tes=tes, - flux=flux, - precommand=precommand, - preemption_default=preemption_default, - preemptible_rules=preemptible_rules, - tibanna_config=tibanna_config, - max_jobs_per_second=max_jobs_per_second, - max_status_checks_per_second=max_status_checks_per_second, - printd3dag=printd3dag, - ignore_ambiguity=ignore_ambiguity, - stats=stats, - force_incomplete=force_incomplete, - ignore_incomplete=ignore_incomplete, - list_version_changes=list_version_changes, - list_code_changes=list_code_changes, - list_input_changes=list_input_changes, - list_params_changes=list_params_changes, - list_untracked=list_untracked, - list_conda_envs=list_conda_envs, - summary=summary, - archive=archive, - delete_all_output=delete_all_output, - delete_temp_output=delete_temp_output, - wait_for_files=wait_for_files, - detailed_summary=detailed_summary, - nolock=not lock, - unlock=unlock, - notemp=notemp, - keep_remote_local=keep_remote_local, - nodeps=nodeps, - keep_target_files=keep_target_files, - cleanup_metadata=cleanup_metadata, - conda_cleanup_envs=conda_cleanup_envs, - cleanup_containers=cleanup_containers, - cleanup_shadow=cleanup_shadow, - subsnakemake=subsnakemake, - updated_files=updated_files, - allowed_rules=allowed_rules, - greediness=greediness, - no_hooks=no_hooks, - force_use_threads=use_threads, - conda_create_envs_only=conda_create_envs_only, - cluster_status=cluster_status, - cluster_cancel=cluster_cancel, - cluster_cancel_nargs=cluster_cancel_nargs, - cluster_sidecar=cluster_sidecar, - report=report, - report_stylesheet=report_stylesheet, - export_cwl=export_cwl, - batch=batch, - keepincomplete=keep_incomplete, - containerize=containerize, - ) - - except BrokenPipeError: - # ignore this exception and stop. It occurs if snakemake output is piped into less and less quits before reading the whole output. - # in such a case, snakemake shall stop scheduling and quit with error 1 - success = False - except (Exception, BaseException) as ex: - if "workflow" in locals(): - print_exception(ex, workflow.linemaps) - else: - print_exception(ex, dict()) - success = False - - if workdir: - os.chdir(olddir) - if "workflow" in locals() and workflow.persistence: - workflow.persistence.unlock() - if not keep_logger: - logger.cleanup() - return success - - -def parse_set_threads(args): - return parse_set_ints( - args.set_threads, - "Invalid threads definition: entries have to be defined as RULE=THREADS pairs " - "(with THREADS being a positive integer).", - ) - - -def parse_set_resources(args): - errmsg = ( - "Invalid resource definition: entries have to be defined as RULE:RESOURCE=VALUE, with " - "VALUE being a positive integer or a string." - ) - - from collections import defaultdict - - assignments = defaultdict(dict) - if args.set_resources is not None: - for entry in args.set_resources: - key, value = parse_key_value_arg(entry, errmsg=errmsg) - key = key.split(":") - if not len(key) == 2: - raise ValueError(errmsg) - rule, resource = key - try: - value = int(value) - except ValueError: - assignments[rule][resource] = value - continue - if value < 0: - raise ValueError(errmsg) - assignments[rule][resource] = value - return assignments - - -def parse_set_scatter(args): - return parse_set_ints( - args.set_scatter, - "Invalid scatter definition: entries have to be defined as NAME=SCATTERITEMS pairs " - "(with SCATTERITEMS being a positive integer).", - ) - - -def parse_set_resource_scope(args): - err_msg = ( - "Invalid resource scopes: entries must be defined as RESOURCE=SCOPE pairs, " - "where SCOPE is either 'local', 'global', or 'excluded'" - ) - if args.set_resource_scopes is not None: - try: - return ResourceScopes( - parse_key_value_arg(entry, errmsg=err_msg) - for entry in args.set_resource_scopes - ) - except ResourceScopesException as err: - invalid_resources = ", ".join( - f"'{res}={scope}'" for res, scope in err.invalid_resources.items() - ) - raise ValueError(f"{err.msg} (got {invalid_resources})") - - return ResourceScopes() - - -def parse_set_ints(arg, errmsg): - assignments = dict() - if arg is not None: - for entry in arg: - key, value = parse_key_value_arg(entry, errmsg=errmsg) - try: - value = int(value) - except ValueError: - raise ValueError(errmsg) - if value < 0: - raise ValueError(errmsg) - assignments[key] = value - return assignments - - -def parse_batch(args): - errmsg = "Invalid batch definition: batch entry has to be defined as RULE=BATCH/BATCHES (with integers BATCH <= BATCHES, BATCH >= 1)." - if args.batch is not None: - rule, batchdef = parse_key_value_arg(args.batch, errmsg=errmsg) - try: - batch, batches = batchdef.split("/") - batch = int(batch) - batches = int(batches) - except ValueError: - raise ValueError(errmsg) - if batch > batches or batch < 1: - raise ValueError(errmsg) - return Batch(rule, batch, batches) - return None - - -def parse_groups(args): - errmsg = "Invalid groups definition: entries have to be defined as RULE=GROUP pairs" - overwrite_groups = dict() - if args.groups is not None: - for entry in args.groups: - rule, group = parse_key_value_arg(entry, errmsg=errmsg) - overwrite_groups[rule] = group - return overwrite_groups - - -def parse_group_components(args): - errmsg = "Invalid group components definition: entries have to be defined as GROUP=COMPONENTS pairs (with COMPONENTS being a positive integer)" - group_components = dict() - if args.group_components is not None: - for entry in args.group_components: - group, count = parse_key_value_arg(entry, errmsg=errmsg) - try: - count = int(count) - except ValueError: - raise ValueError(errmsg) - if count <= 0: - raise ValueError(errmsg) - group_components[group] = count - return group_components - - -def _bool_parser(value): - if value == "True": - return True - elif value == "False": - return False - raise ValueError - - -def parse_config(args): - """Parse config from args.""" - import yaml - - yaml_base_load = lambda s: yaml.load(s, Loader=yaml.loader.BaseLoader) - parsers = [int, float, _bool_parser, yaml_base_load, str] - config = dict() - if args.config is not None: - valid = re.compile(r"[a-zA-Z_]\w*$") - for entry in args.config: - key, val = parse_key_value_arg( - entry, - errmsg="Invalid config definition: Config entries have to be defined as name=value pairs.", - ) - if not valid.match(key): - raise ValueError( - "Invalid config definition: Config entry must start with a valid identifier." - ) - v = None - if val == "": - update_config(config, {key: v}) - continue - for parser in parsers: - try: - v = parser(val) - # avoid accidental interpretation as function - if not callable(v): - break - except: - pass - assert v is not None - update_config(config, {key: v}) - return config - - -def parse_cores(cores, allow_none=False): - if cores is None: - if allow_none: - return cores - raise CliException( - "Error: you need to specify the maximum number of CPU cores to " - "be used at the same time. If you want to use N cores, say --cores N " - "or -cN. For all cores on your system (be sure that this is " - "appropriate) use --cores all. For no parallelization use --cores 1 or " - "-c1." - ) - if cores == "all": - return available_cpu_count() - try: - return int(cores) - except ValueError: - raise CliException( - "Error parsing number of cores (--cores, -c, -j): must be integer, " - "empty, or 'all'." - ) - - -def parse_jobs(jobs, allow_none=False): - if jobs is None: - if allow_none: - return jobs - raise CliException( - "Error: you need to specify the maximum number of jobs to " - "be queued or executed at the same time with --jobs or -j." - ) - if jobs == "unlimited": - return sys.maxsize - try: - return int(jobs) - except ValueError: - raise CliException( - "Error parsing number of jobs (--jobs, -j): must be integer." - ) - - -def parse_cores_jobs(cores, jobs, no_exec, non_local_exec, dryrun): - if no_exec or dryrun: - cores = parse_cores(cores, allow_none=True) or 1 - jobs = parse_jobs(jobs, allow_none=True) or 1 - elif non_local_exec: - cores = parse_cores(cores, allow_none=True) - jobs = parse_jobs(jobs) - else: - cores = parse_cores(cores or jobs) - jobs = None - - return cores, jobs - - -def get_profile_file(profile, file, return_default=False): - dirs = get_appdirs() - if os.path.exists(profile): - search_dirs = [os.path.dirname(profile)] - profile = os.path.basename(profile) - else: - search_dirs = [os.getcwd(), dirs.user_config_dir, dirs.site_config_dir] - get_path = lambda d: os.path.join(d, profile, file) - for d in search_dirs: - p = get_path(d) - # "file" can actually be a full command. If so, `p` won't exist as the - # below would check if e.g. '/path/to/profile/script --arg1 val --arg2' - # exists. To fix this, we use shlex.split() to get the path to the - # script. We check for both, in case the path contains spaces or some - # other thing that would cause shlex.split() to mangle the path - # inaccurately. - if os.path.exists(p) or os.path.exists(shlex.split(p)[0]): - return p - - if return_default: - return file - return None - - -def get_argument_parser(profiles=None): - """Generate and return argument parser.""" - import configargparse - from snakemake.profiles import ProfileConfigFileParser - - dirs = get_appdirs() - config_files = [] - if profiles: - for profile in profiles: - if profile == "": - print("Error: invalid profile name.", file=sys.stderr) - exit(1) - - config_file = get_profile_file(profile, "config.yaml") - if config_file is None: - print( - "Error: profile given but no config.yaml found. " - "Profile has to be given as either absolute path, relative " - "path or name of a directory available in either " - "{site} or {user}.".format( - site=dirs.site_config_dir, user=dirs.user_config_dir - ), - file=sys.stderr, - ) - exit(1) - config_files.append(config_file) - - parser = configargparse.ArgumentParser( - description="Snakemake is a Python based language and execution " - "environment for GNU Make-like workflows.", - formatter_class=ArgumentDefaultsHelpFormatter, - default_config_files=config_files, - config_file_parser_class=ProfileConfigFileParser, - ) - - group_exec = parser.add_argument_group("EXECUTION") - - group_exec.add_argument( - "target", - nargs="*", - default=None, - help="Targets to build. May be rules or files.", - ) - - group_exec.add_argument( - "--dry-run", - "--dryrun", - "-n", - dest="dryrun", - action="store_true", - help="Do not execute anything, and display what would be done. " - "If you have a very large workflow, use --dry-run --quiet to just " - "print a summary of the DAG of jobs.", - ) - - group_exec.add_argument( - "--profile", - help=f""" - Name of profile to use for configuring - Snakemake. Snakemake will search for a corresponding - folder in {dirs.site_config_dir} and {dirs.user_config_dir}. Alternatively, this can be an - absolute or relative path. - The profile folder has to contain a file 'config.yaml'. - This file can be used to set default values for command - line options in YAML format. For example, - '--cluster qsub' becomes 'cluster: qsub' in the YAML - file. Profiles can be obtained from - https://github.com/snakemake-profiles. - The profile can also be set via the environment variable $SNAKEMAKE_PROFILE. - To override this variable and use no profile at all, provide the value 'none' - to this argument. - """, - env_var="SNAKEMAKE_PROFILE", - ) - - group_exec.add_argument( - "--workflow-profile", - help=""" - Path (relative to current directory) to workflow specific profile - folder to use for configuring Snakemake with parameters specific for this - workflow (like resources). - If this flag is not used, Snakemake will by default use - 'profiles/default' if present (searched both relative to current directory - and relative to Snakefile, in this order). - For skipping any workflow specific profile provide the special value 'none'. - Settings made in the workflow profile will override settings made in the - general profile (see --profile). - The profile folder has to contain a file 'config.yaml'. - This file can be used to set default values for command - line options in YAML format. For example, - '--cluster qsub' becomes 'cluster: qsub' in the YAML - file. It is advisable to use the workflow profile to set - or overwrite e.g. workflow specific resources like the amount of threads - of a particular rule or the amount of memory needed. - Note that in such cases, the arguments may be given as nested YAML mappings - in the profile, e.g. 'set-threads: myrule: 4' instead of 'set-threads: myrule=4'. - """, - ) - - group_exec.add_argument( - "--cache", - nargs="*", - metavar="RULE", - help="Store output files of given rules in a central cache given by the environment " - "variable $SNAKEMAKE_OUTPUT_CACHE. Likewise, retrieve output files of the given rules " - "from this cache if they have been created before (by anybody writing to the same cache), " - "instead of actually executing the rules. Output files are identified by hashing all " - "steps, parameters and software stack (conda envs or containers) needed to create them.", - ) - - group_exec.add_argument( - "--snakefile", - "-s", - metavar="FILE", - help=( - "The workflow definition in form of a snakefile. " - "Usually, you should not need to specify this. " - "By default, Snakemake will search for {} " - "beneath the current working " - "directory, in this order. " - "Only if you definitely want a different layout, " - "you need to use this parameter." - ).format(", ".join(map("'{}'".format, SNAKEFILE_CHOICES))), - ) - group_exec.add_argument( - "--cores", - "-c", - action="store", - const=available_cpu_count(), - nargs="?", - metavar="N", - help=( - "Use at most N CPU cores/jobs in parallel. " - "If N is omitted or 'all', the limit is set to the number of " - "available CPU cores. " - "In case of cluster/cloud execution, this argument sets the maximum number " - "of cores requested from the cluster or cloud scheduler. (See " - "https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#" - "resources-remote-execution for more info)" - "This number is available to rules via workflow.cores." - ), - ) - group_exec.add_argument( - "--jobs", - "-j", - metavar="N", - nargs="?", - const=available_cpu_count(), - action="store", - help=( - "Use at most N CPU cluster/cloud jobs in parallel. For local execution this is " - "an alias for --cores. Note: Set to 'unlimited' in case, this does not play a role." - ), - ) - group_exec.add_argument( - "--local-cores", - action="store", - default=available_cpu_count(), - metavar="N", - type=int, - help=( - "In cluster/cloud mode, use at most N cores of the host machine in parallel " - "(default: number of CPU cores of the host). The cores are used to execute " - "local rules. This option is ignored when not in cluster/cloud mode." - ), - ) - group_exec.add_argument( - "--resources", - "--res", - nargs="*", - metavar="NAME=INT", - help=( - "Define additional resources that shall constrain the scheduling " - "analogously to --cores (see above). A resource is defined as " - "a name and an integer value. E.g. --resources mem_mb=1000. Rules can " - "use resources by defining the resource keyword, e.g. " - "resources: mem_mb=600. If now two rules require 600 of the resource " - "'mem_mb' they won't be run in parallel by the scheduler. In " - "cluster/cloud mode, this argument will also constrain the amount of " - "resources requested from the server. (See " - "https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#" - "resources-remote-execution for more info)" - ), - ) - group_exec.add_argument( - "--set-threads", - metavar="RULE=THREADS", - nargs="+", - help="Overwrite thread usage of rules. This allows to fine-tune workflow " - "parallelization. In particular, this is helpful to target certain cluster nodes " - "by e.g. shifting a rule to use more, or less threads than defined in the workflow. " - "Thereby, THREADS has to be a positive integer, and RULE has to be the name of the rule.", - ) - group_exec.add_argument( - "--max-threads", - type=int, - help="Define a global maximum number of threads available to any rule. Rules " - "requesting more threads (via the threads keyword) will have their values " - "reduced to the maximum. This can be useful when you want to restrict the " - "maximum number of threads without modifying the workflow definition or " - "overwriting rules individually with --set-threads.", - ) - group_exec.add_argument( - "--set-resources", - metavar="RULE:RESOURCE=VALUE", - nargs="+", - help="Overwrite resource usage of rules. This allows to fine-tune workflow " - "resources. In particular, this is helpful to target certain cluster nodes " - "by e.g. defining a certain partition for a rule, or overriding a temporary directory. " - "Thereby, VALUE has to be a positive integer or a string, RULE has to be the name of the " - "rule, and RESOURCE has to be the name of the resource.", - ) - group_exec.add_argument( - "--set-scatter", - metavar="NAME=SCATTERITEMS", - nargs="+", - help="Overwrite number of scatter items of scattergather processes. This allows to fine-tune " - "workflow parallelization. Thereby, SCATTERITEMS has to be a positive integer, and NAME has to be " - "the name of the scattergather process defined via a scattergather directive in the workflow.", - ) - group_exec.add_argument( - "--set-resource-scopes", - metavar="RESOURCE=[global|local]", - nargs="+", - help="Overwrite resource scopes. A scope determines how a constraint is " - "reckoned in cluster execution. With RESOURCE=local, a constraint applied to " - "RESOURCE using --resources will be considered the limit for each group " - "submission. With RESOURCE=global, the constraint will apply across all groups " - "cumulatively. By default, only `mem_mb` and `disk_mb` are considered local, " - "all other resources are global. This may be modified in the snakefile using " - "the `resource_scopes:` directive. Note that number of threads, specified via " - "--cores, is always considered local. (See " - "https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#" - "resources-remote-execution for more info)", - ) - group_exec.add_argument( - "--default-resources", - "--default-res", - nargs="*", - metavar="NAME=INT", - help=( - "Define default values of resources for rules that do not define their own values. " - "In addition to plain integers, python expressions over inputsize are allowed (e.g. '2*input.size_mb'). " - "The inputsize is the sum of the sizes of all input files of a rule. " - "By default, Snakemake assumes a default for mem_mb, disk_mb, and tmpdir (see below). " - "This option allows to add further defaults (e.g. account and partition for slurm) or to overwrite these default values. " - "The defaults are 'mem_mb=max(2*input.size_mb, 1000)', " - "'disk_mb=max(2*input.size_mb, 1000)' " - "(i.e., default disk and mem usage is twice the input file size but at least 1GB), and " - "the system temporary directory (as given by $TMPDIR, $TEMP, or $TMP) is used for the tmpdir resource. " - "The tmpdir resource is automatically used by shell commands, scripts and wrappers to store temporary data (as it is " - "mirrored into $TMPDIR, $TEMP, and $TMP for the executed subprocesses). " - "If this argument is not specified at all, Snakemake just uses the tmpdir resource as outlined above." - ), - ) - - group_exec.add_argument( - "--preemption-default", - type=int, - default=None, - help=( - "A preemptible instance can be requested when using the Google Life Sciences API. If you set a --preemption-default, " - "all rules will be subject to the default. Specifically, this integer is the number of restart attempts that will be " - "made given that the instance is killed unexpectedly. Note that preemptible instances have a maximum running time of 24 " - "hours. If you want to set preemptible instances for only a subset of rules, use --preemptible-rules instead." - ), - ) - - group_exec.add_argument( - "--preemptible-rules", - nargs="+", - default=None, - help=( - "A preemptible instance can be requested when using the Google Life Sciences API. If you want to use these instances " - "for a subset of your rules, you can use --preemptible-rules and then specify a list of rule and integer pairs, where " - "each integer indicates the number of restarts to use for the rule's instance in the case that the instance is " - "terminated unexpectedly. --preemptible-rules can be used in combination with --preemption-default, and will take " - "priority. Note that preemptible instances have a maximum running time of 24. If you want to apply a consistent " - "number of retries across all your rules, use --preemption-default instead. " - "Example: snakemake --preemption-default 10 --preemptible-rules map_reads=3 call_variants=0" - ), - ) - - group_exec.add_argument( - "--config", - "-C", - nargs="*", - metavar="KEY=VALUE", - help=( - "Set or overwrite values in the workflow config object. " - "The workflow config object is accessible as variable config inside " - "the workflow. Default values can be set by providing a JSON file " - "(see Documentation)." - ), - ) - group_exec.add_argument( - "--configfile", - "--configfiles", - nargs="+", - metavar="FILE", - help=( - "Specify or overwrite the config file of the workflow (see the docs). " - "Values specified in JSON or YAML format are available in the global config " - "dictionary inside the workflow. Multiple files overwrite each other in " - "the given order. Thereby missing keys in previous config files are extended by " - "following configfiles. Note that this order also includes a config file defined " - "in the workflow definition itself (which will come first)." - ), - ) - group_exec.add_argument( - "--envvars", - nargs="+", - metavar="VARNAME", - help="Environment variables to pass to cloud jobs.", - ) - group_exec.add_argument( - "--directory", - "-d", - metavar="DIR", - action="store", - help=( - "Specify working directory (relative paths in " - "the snakefile will use this as their origin)." - ), - ) - group_exec.add_argument( - "--touch", - "-t", - action="store_true", - help=( - "Touch output files (mark them up to date without really " - "changing them) instead of running their commands. This is " - "used to pretend that the rules were executed, in order to " - "fool future invocations of snakemake. Fails if a file does " - "not yet exist. Note that this will only touch files that would " - "otherwise be recreated by Snakemake (e.g. because their input " - "files are newer). For enforcing a touch, combine this with " - "--force, --forceall, or --forcerun. Note however that you lose " - "the provenance information when the files have been created in " - "reality. Hence, this should be used only as a last resort." - ), - ) - group_exec.add_argument( - "--keep-going", - "-k", - action="store_true", - help="Go on with independent jobs if a job fails.", - ) - group_exec.add_argument( - "--rerun-triggers", - nargs="+", - choices=RERUN_TRIGGERS, - default=RERUN_TRIGGERS, - help="Define what triggers the rerunning of a job. By default, " - "all triggers are used, which guarantees that results are " - "consistent with the workflow code and configuration. If you " - "rather prefer the traditional way of just considering " - "file modification dates, use '--rerun-trigger mtime'.", - ) - group_exec.add_argument( - "--force", - "-f", - action="store_true", - help=( - "Force the execution of the selected target or the first rule " - "regardless of already created output." - ), - ) - group_exec.add_argument( - "--forceall", - "-F", - action="store_true", - help=( - "Force the execution of the selected (or the first) rule and " - "all rules it is dependent on regardless of already created " - "output." - ), - ) - group_exec.add_argument( - "--forcerun", - "-R", - nargs="*", - metavar="TARGET", - help=( - "Force the re-execution or creation of the given rules or files." - " Use this option if you changed a rule and want to have all its " - "output in your workflow updated." - ), - ) - group_exec.add_argument( - "--prioritize", - "-P", - nargs="+", - metavar="TARGET", - help=( - "Tell the scheduler to assign creation of given targets " - "(and all their dependencies) highest priority. (EXPERIMENTAL)" - ), - ) - group_exec.add_argument( - "--batch", - metavar="RULE=BATCH/BATCHES", - help=( - "Only create the given BATCH of the input files of the given RULE. " - "This can be used to iteratively run parts of very large workflows. " - "Only the execution plan of the relevant part of the workflow has to " - "be calculated, thereby speeding up DAG computation. " - "It is recommended to provide the most suitable rule for batching when " - "documenting a workflow. It should be some aggregating rule that " - "would be executed only once, and has a large number of input files. " - "For example, it can be a rule that aggregates over samples." - ), - ) - group_exec.add_argument( - "--until", - "-U", - nargs="+", - metavar="TARGET", - help=( - "Runs the pipeline until it reaches the specified rules or " - "files. Only runs jobs that are dependencies of the specified " - "rule or files, does not run sibling DAGs. " - ), - ) - group_exec.add_argument( - "--omit-from", - "-O", - nargs="+", - metavar="TARGET", - help=( - "Prevent the execution or creation of the given rules or files " - "as well as any rules or files that are downstream of these targets " - "in the DAG. Also runs jobs in sibling DAGs that are independent of the " - "rules or files specified here." - ), - ) - group_exec.add_argument( - "--rerun-incomplete", - "--ri", - action="store_true", - help=("Re-run all jobs the output of which is recognized as incomplete."), - ) - group_exec.add_argument( - "--shadow-prefix", - metavar="DIR", - help=( - "Specify a directory in which the 'shadow' directory is created. " - "If not supplied, the value is set to the '.snakemake' directory relative " - "to the working directory." - ), - ) - - try: - import pulp - - lp_solvers = pulp.list_solvers(onlyAvailable=True) - except ImportError: - # Dummy list for the case that pulp is not available - # This only happened when building docs. - lp_solvers = ["COIN_CMD"] - recommended_lp_solver = "COIN_CMD" - - group_exec.add_argument( - "--scheduler", - default="greedy" if recommended_lp_solver not in lp_solvers else "ilp", - nargs="?", - choices=["ilp", "greedy"], - help=( - "Specifies if jobs are selected by a greedy algorithm or by solving an ilp. " - "The ilp scheduler aims to reduce runtime and hdd usage by best possible use of resources." - ), - ) - group_exec.add_argument( - "--wms-monitor", - action="store", - nargs="?", - help=( - "IP and port of workflow management system to monitor the execution of snakemake (e.g. http://127.0.0.1:5000)" - " Note that if your service requires an authorization token, you must export WMS_MONITOR_TOKEN in the environment." - ), - ) - group_exec.add_argument( - "--wms-monitor-arg", - nargs="*", - metavar="NAME=VALUE", - help=( - "If the workflow management service accepts extra arguments, provide." - " them in key value pairs with --wms-monitor-arg. For example, to run" - " an existing workflow using a wms monitor, you can provide the pair " - " id=12345 and the arguments will be provided to the endpoint to " - " first interact with the workflow" - ), - ) - group_exec.add_argument( - "--scheduler-ilp-solver", - default=recommended_lp_solver, - choices=lp_solvers, - help=("Specifies solver to be utilized when selecting ilp-scheduler."), - ) - group_exec.add_argument( - "--scheduler-solver-path", - help="Set the PATH to search for scheduler solver binaries (internal use only).", - ) - group_exec.add_argument( - "--conda-base-path", - help="Path of conda base installation (home of conda, mamba, activate) (internal use only).", - ) - - group_exec.add_argument( - "--no-subworkflows", - "--nosw", - action="store_true", - help=("Do not evaluate or execute subworkflows."), - ) - - # TODO add group_partitioning, allowing to define --group rulename=groupname. - # i.e. setting groups via the CLI for improving cluster performance given - # available resources. - # TODO add an additional flag --group-components groupname=3, allowing to set the - # number of connected components a group is allowed to span. By default, this is 1 - # (as now), but the flag allows to extend this. This can be used to run e.g. - # 3 jobs of the same rule in the same group, although they are not connected. - # Can be helpful for putting together many small jobs or benefitting of shared memory - # setups. - - group_group = parser.add_argument_group("GROUPING") - group_group.add_argument( - "--groups", - nargs="+", - help="Assign rules to groups (this overwrites any " - "group definitions from the workflow).", - ) - group_group.add_argument( - "--group-components", - nargs="+", - help="Set the number of connected components a group is " - "allowed to span. By default, this is 1, but this flag " - "allows to extend this. This can be used to run e.g. 3 " - "jobs of the same rule in the same group, although they " - "are not connected. It can be helpful for putting together " - "many small jobs or benefitting of shared memory setups.", - ) - - group_report = parser.add_argument_group("REPORTS") - - group_report.add_argument( - "--report", - nargs="?", - const="report.html", - metavar="FILE", - help="Create an HTML report with results and statistics. " - "This can be either a .html file or a .zip file. " - "In the former case, all results are embedded into the .html (this only works for small data). " - "In the latter case, results are stored along with a file report.html in the zip archive. " - "If no filename is given, an embedded report.html is the default.", - ) - group_report.add_argument( - "--report-stylesheet", - metavar="CSSFILE", - help="Custom stylesheet to use for report. In particular, this can be used for " - "branding the report with e.g. a custom logo, see docs.", - ) - - group_notebooks = parser.add_argument_group("NOTEBOOKS") - - group_notebooks.add_argument( - "--draft-notebook", - metavar="TARGET", - help="Draft a skeleton notebook for the rule used to generate the given target file. This notebook " - "can then be opened in a jupyter server, executed and implemented until ready. After saving, it " - "will automatically be reused in non-interactive mode by Snakemake for subsequent jobs.", - ) - group_notebooks.add_argument( - "--edit-notebook", - metavar="TARGET", - help="Interactively edit the notebook associated with the rule used to generate the given target file. " - "This will start a local jupyter notebook server. " - "Any changes to the notebook should be saved, and the server has to be stopped by " - "closing the notebook and hitting the 'Quit' button on the jupyter dashboard. " - "Afterwards, the updated notebook will be automatically stored in the path defined in the rule. " - "If the notebook is not yet present, this will create an empty draft. ", - ) - group_notebooks.add_argument( - "--notebook-listen", - metavar="IP:PORT", - default="localhost:8888", - help="The IP address and PORT the notebook server used for editing the notebook (--edit-notebook) will listen on.", - ) - - group_utils = parser.add_argument_group("UTILITIES") - group_utils.add_argument( - "--lint", - nargs="?", - const="text", - choices=["text", "json"], - help="Perform linting on the given workflow. This will print snakemake " - "specific suggestions to improve code quality (work in progress, more lints " - "to be added in the future). If no argument is provided, plain text output is used.", - ) - group_utils.add_argument( - "--generate-unit-tests", - nargs="?", - const=".tests/unit", - metavar="TESTPATH", - help="Automatically generate unit tests for each workflow rule. " - "This assumes that all input files of each job are already present. " - "Rules without a job with present input files will be skipped (a warning will be issued). " - "For each rule, one test case will be " - "created in the specified test folder (.tests/unit by default). After " - "successful execution, tests can be run with " - "'pytest TESTPATH'.", - ) - group_utils.add_argument( - "--containerize", - action="store_true", - help="Print a Dockerfile that provides an execution environment for the workflow, including all " - "conda environments.", - ) - group_utils.add_argument( - "--export-cwl", - action="store", - metavar="FILE", - help="Compile workflow to CWL and store it in given FILE.", - ) - group_utils.add_argument( - "--list", - "-l", - action="store_true", - help="Show available rules in given Snakefile.", - ) - group_utils.add_argument( - "--list-target-rules", - "--lt", - action="store_true", - help="Show available target rules in given Snakefile.", - ) - group_utils.add_argument( - "--dag", - action="store_true", - help="Do not execute anything and print the directed " - "acyclic graph of jobs in the dot language. Recommended " - "use on Unix systems: snakemake --dag | dot | display. " - "Note print statements in your Snakefile may interfere " - "with visualization.", - ) - group_utils.add_argument( - "--rulegraph", - action="store_true", - help="Do not execute anything and print the dependency graph " - "of rules in the dot language. This will be less " - "crowded than above DAG of jobs, but also show less information. " - "Note that each rule is displayed once, hence the displayed graph will be " - "cyclic if a rule appears in several steps of the workflow. " - "Use this if above option leads to a DAG that is too large. " - "Recommended use on Unix systems: snakemake --rulegraph | dot | display. " - "Note print statements in your Snakefile may interfere " - "with visualization.", - ) - group_utils.add_argument( - "--filegraph", - action="store_true", - help="Do not execute anything and print the dependency graph " - "of rules with their input and output files in the dot language. " - "This is an intermediate solution between above DAG of jobs and the rule graph. " - "Note that each rule is displayed once, hence the displayed graph will be " - "cyclic if a rule appears in several steps of the workflow. " - "Use this if above option leads to a DAG that is too large. " - "Recommended use on Unix systems: snakemake --filegraph | dot | display. " - "Note print statements in your Snakefile may interfere " - "with visualization.", - ) - group_utils.add_argument( - "--d3dag", - action="store_true", - help="Print the DAG in D3.js compatible JSON format.", - ) - group_utils.add_argument( - "--summary", - "-S", - action="store_true", - help="Print a summary of all files created by the workflow. The " - "has the following columns: filename, modification time, " - "rule version, status, plan.\n" - "Thereby rule version contains the version" - "the file was created with (see the version keyword of rules), and " - "status denotes whether the file is missing, its input files are " - "newer or if version or implementation of the rule changed since " - "file creation. Finally the last column denotes whether the file " - "will be updated or created during the next workflow execution.", - ) - group_utils.add_argument( - "--detailed-summary", - "-D", - action="store_true", - help="Print a summary of all files created by the workflow. The " - "has the following columns: filename, modification time, " - "rule version, input file(s), shell command, status, plan.\n" - "Thereby rule version contains the version " - "the file was created with (see the version keyword of rules), and " - "status denotes whether the file is missing, its input files are " - "newer or if version or implementation of the rule changed since " - "file creation. The input file and shell command columns are self " - "explanatory. Finally the last column denotes whether the file " - "will be updated or created during the next workflow execution.", - ) - group_utils.add_argument( - "--archive", - metavar="FILE", - help="Archive the workflow into the given tar archive FILE. The archive " - "will be created such that the workflow can be re-executed on a vanilla " - "system. The function needs conda and git to be installed. " - "It will archive every file that is under git version control. " - "Note that it is best practice to have the Snakefile, config files, and " - "scripts under version control. Hence, they will be included in the archive. " - "Further, it will add input files that are not generated by " - "by the workflow itself and conda environments. Note that symlinks are " - "dereferenced. Supported " - "formats are .tar, .tar.gz, .tar.bz2 and .tar.xz.", - ) - group_utils.add_argument( - "--cleanup-metadata", - "--cm", - nargs="+", - metavar="FILE", - help="Cleanup the metadata " - "of given files. That means that snakemake removes any tracked " - "version info, and any marks that files are incomplete.", - ) - group_utils.add_argument( - "--cleanup-shadow", - action="store_true", - help="Cleanup old shadow directories which have not been deleted due " - "to failures or power loss.", - ) - group_utils.add_argument( - "--skip-script-cleanup", - action="store_true", - help="Don't delete wrapper scripts used for execution", - ) - group_utils.add_argument( - "--unlock", action="store_true", help="Remove a lock on the working directory." - ) - group_utils.add_argument( - "--list-version-changes", - "--lv", - action="store_true", - help="List all output files that have been created with " - "a different version (as determined by the version keyword).", - ) - group_utils.add_argument( - "--list-code-changes", - "--lc", - action="store_true", - help="List all output files for which the rule body (run or shell) have " - "changed in the Snakefile.", - ) - group_utils.add_argument( - "--list-input-changes", - "--li", - action="store_true", - help="List all output files for which the defined input files have changed " - "in the Snakefile (e.g. new input files were added in the rule " - "definition or files were renamed). For listing input file " - "modification in the filesystem, use --summary.", - ) - group_utils.add_argument( - "--list-params-changes", - "--lp", - action="store_true", - help="List all output files for which the defined params have changed " - "in the Snakefile.", - ) - group_utils.add_argument( - "--list-untracked", - "--lu", - action="store_true", - help="List all files in the working directory that are not used in the " - "workflow. This can be used e.g. for identifying leftover files. Hidden files " - "and directories are ignored.", - ) - group_utils.add_argument( - "--delete-all-output", - action="store_true", - help="Remove all files generated by the workflow. Use together with --dry-run " - "to list files without actually deleting anything. Note that this will " - "not recurse into subworkflows. Write-protected files are not removed. " - "Nevertheless, use with care!", - ) - group_utils.add_argument( - "--delete-temp-output", - action="store_true", - help="Remove all temporary files generated by the workflow. Use together " - "with --dry-run to list files without actually deleting anything. Note " - "that this will not recurse into subworkflows.", - ) - group_utils.add_argument( - "--bash-completion", - action="store_true", - help="Output code to register bash completion for snakemake. Put the " - "following in your .bashrc (including the accents): " - "`snakemake --bash-completion` or issue it in an open terminal " - "session.", - ) - group_utils.add_argument( - "--keep-incomplete", - action="store_true", - help="Do not remove incomplete output files by failed jobs.", - ) - group_utils.add_argument( - "--drop-metadata", - action="store_true", - help="Drop metadata file tracking information after job finishes. " - "Provenance-information based reports (e.g. --report and the " - "--list_x_changes functions) will be empty or incomplete.", - ) - group_utils.add_argument("--version", "-v", action="version", version=__version__) - - group_output = parser.add_argument_group("OUTPUT") - group_output.add_argument( - "--reason", - "-r", - action="store_true", - help="Print the reason for each executed rule (deprecated, always true now).", - ) - group_output.add_argument( - "--gui", - nargs="?", - const="8000", - metavar="PORT", - type=str, - help="Serve an HTML based user interface to the given network and " - "port e.g. 168.129.10.15:8000. By default Snakemake is only " - "available in the local network (default port: 8000). To make " - "Snakemake listen to all ip addresses add the special host address " - "0.0.0.0 to the url (0.0.0.0:8000). This is important if Snakemake " - "is used in a virtualised environment like Docker. If possible, a " - "browser window is opened.", - ) - group_output.add_argument( - "--printshellcmds", - "-p", - action="store_true", - help="Print out the shell commands that will be executed.", - ) - group_output.add_argument( - "--debug-dag", - action="store_true", - help="Print candidate and selected jobs (including their wildcards) while " - "inferring DAG. This can help to debug unexpected DAG topology or errors.", - ) - group_output.add_argument( - "--stats", - metavar="FILE", - help="Write stats about Snakefile execution in JSON format to the given file.", - ) - group_output.add_argument( - "--nocolor", action="store_true", help="Do not use a colored output." - ) - group_output.add_argument( - "--quiet", - "-q", - nargs="*", - choices=["progress", "rules", "all"], - default=None, - help="Do not output certain information. " - "If used without arguments, do not output any progress or rule " - "information. Defining 'all' results in no information being " - "printed at all.", - ) - group_output.add_argument( - "--print-compilation", - action="store_true", - help="Print the python representation of the workflow.", - ) - - group_output.add_argument( - "--verbose", action="store_true", help="Print debugging output." - ) - - group_behavior = parser.add_argument_group("BEHAVIOR") - group_behavior.add_argument( - "--force-use-threads", - dest="force_use_threads", - action="store_true", - help="Force threads rather than processes. Helpful if shared memory (/dev/shm) is full or unavailable.", - ) - group_behavior.add_argument( - "--allow-ambiguity", - "-a", - action="store_true", - help=( - "Don't check for ambiguous rules and simply use the first if " - "several can produce the same file. This allows the user to " - "prioritize rules by their order in the snakefile." - ), - ) - group_behavior.add_argument( - "--nolock", action="store_true", help="Do not lock the working directory" - ) - group_behavior.add_argument( - "--ignore-incomplete", - "--ii", - action="store_true", - help="Do not check for incomplete output files.", - ) - group_behavior.add_argument( - "--max-inventory-time", - type=int, - default=20, - metavar="SECONDS", - help="Spend at most SECONDS seconds to create a file inventory for the working directory. " - "The inventory vastly speeds up file modification and existence checks when computing " - "which jobs need to be executed. However, creating the inventory itself can be slow, e.g. on " - "network file systems. Hence, we do not spend more than a given amount of time and fall back " - "to individual checks for the rest.", - ) - group_behavior.add_argument( - "--latency-wait", - "--output-wait", - "-w", - type=int, - default=5, - metavar="SECONDS", - help="Wait given seconds if an output file of a job is not present after " - "the job finished. This helps if your filesystem " - "suffers from latency (default 5).", - ) - group_behavior.add_argument( - "--wait-for-files", - nargs="*", - metavar="FILE", - help="Wait --latency-wait seconds for these " - "files to be present before executing the workflow. " - "This option is used internally to handle filesystem latency in cluster " - "environments.", - ) - group_behavior.add_argument( - "--wait-for-files-file", - metavar="FILE", - help="Same behaviour as --wait-for-files, but file list is " - "stored in file instead of being passed on the commandline. " - "This is useful when the list of files is too long to be " - "passed on the commandline.", - ) - group_behavior.add_argument( - "--notemp", - "--nt", - action="store_true", - help="Ignore temp() declarations. This is useful when running only " - "a part of the workflow, since temp() would lead to deletion of " - "probably needed files by other parts of the workflow.", - ) - group_behavior.add_argument( - "--all-temp", - action="store_true", - help="Mark all output files as temp files. This can be useful for CI testing, " - "in order to save space.", - ) - group_behavior.add_argument( - "--keep-remote", - action="store_true", - help="Keep local copies of remote input files.", - ) - group_behavior.add_argument( - "--keep-target-files", - action="store_true", - help="Do not adjust the paths of given target files relative to the working directory.", - ) - group_behavior.add_argument( - "--allowed-rules", - nargs="+", - help="Only consider given rules. If omitted, all rules in Snakefile are " - "used. Note that this is intended primarily for internal use and may " - "lead to unexpected results otherwise.", - ) - group_behavior.add_argument( - "--target-jobs", - nargs="+", - help="Target particular jobs by RULE:WILDCARD1=VALUE,WILDCARD2=VALUE,... " - "This is meant for internal use by Snakemake itself only.", - ) - group_behavior.add_argument( - "--local-groupid", - default="local", - help="Name for local groupid, meant for internal use only.", - ) - group_behavior.add_argument( - "--max-jobs-per-second", - default=10, - type=float, - help="Maximal number of cluster/drmaa jobs per second, default is 10, " - "fractions allowed.", - ) - group_behavior.add_argument( - "--max-status-checks-per-second", - default=10, - type=float, - help="Maximal number of job status checks per second, default is 10, " - "fractions allowed.", - ) - group_behavior.add_argument( - "-T", - "--retries", - "--restart-times", - default=0, - type=int, - help="Number of times to restart failing jobs (defaults to 0).", - ) - group_behavior.add_argument( - "--attempt", - default=1, - type=int, - help="Internal use only: define the initial value of the attempt " - "parameter (default: 1).", - ) - group_behavior.add_argument( - "--wrapper-prefix", - default="https://github.com/snakemake/snakemake-wrappers/raw/", - help="Prefix for URL created from wrapper directive (default: " - "https://github.com/snakemake/snakemake-wrappers/raw/). Set this to " - "a different URL to use your fork or a local clone of the repository, " - "e.g., use a git URL like 'git+file://path/to/your/local/clone@'.", - ) - group_behavior.add_argument( - "--default-remote-provider", - choices=[ - "S3", - "GS", - "FTP", - "SFTP", - "S3Mocked", - "gfal", - "gridftp", - "iRODS", - "AzBlob", - "XRootD", - ], - help="Specify default remote provider to be used for " - "all input and output files that don't yet specify " - "one.", - ) - group_behavior.add_argument( - "--default-remote-prefix", - default="", - help="Specify prefix for default remote provider. E.g. a bucket name.", - ) - group_behavior.add_argument( - "--no-shared-fs", - action="store_true", - help="Do not assume that jobs share a common file " - "system. When this flag is activated, Snakemake will " - "assume that the filesystem on a cluster node is not " - "shared with other nodes. For example, this will lead " - "to downloading remote files on each cluster node " - "separately. Further, it won't take special measures " - "to deal with filesystem latency issues. This option " - "will in most cases only make sense in combination with " - "--default-remote-provider. Further, when using --cluster " - "you will have to also provide --cluster-status. " - "Only activate this if you " - "know what you are doing.", - ) - group_behavior.add_argument( - "--greediness", - type=float, - default=None, - help="Set the greediness of scheduling. This value between 0 and 1 " - "determines how careful jobs are selected for execution. The default " - "value (1.0) provides the best speed and still acceptable scheduling " - "quality.", - ) - group_behavior.add_argument( - "--no-hooks", - action="store_true", - help="Do not invoke onstart, onsuccess or onerror hooks after execution.", - ) - group_behavior.add_argument( - "--overwrite-shellcmd", - help="Provide a shell command that shall be executed instead of those " - "given in the workflow. " - "This is for debugging purposes only.", - ) - group_behavior.add_argument( - "--debug", - action="store_true", - help="Allow to debug rules with e.g. PDB. This flag " - "allows to set breakpoints in run blocks.", - ) - group_behavior.add_argument( - "--runtime-profile", - metavar="FILE", - help="Profile Snakemake and write the output to FILE. This requires yappi " - "to be installed.", - ) - group_behavior.add_argument( - "--mode", - choices=[Mode.default, Mode.subprocess, Mode.cluster], - default=Mode.default, - type=int, - help="Set execution mode of Snakemake (internal use only).", - ) - group_behavior.add_argument( - "--show-failed-logs", - action="store_true", - help="Automatically display logs of failed jobs.", - ) - group_behavior.add_argument( - "--log-handler-script", - metavar="FILE", - default=None, - help="Provide a custom script containing a function 'def log_handler(msg):'. " - "Snakemake will call this function for every logging output (given as a dictionary msg) " - "allowing to e.g. send notifications in the form of e.g. slack messages or emails.", - ) - group_behavior.add_argument( - "--log-service", - default=None, - choices=["none", "slack", "wms"], - help="Set a specific messaging service for logging output. " - "Snakemake will notify the service on errors and completed execution. " - "Currently slack and workflow management system (wms) are supported.", - ) - - group_slurm = parser.add_argument_group("SLURM") - slurm_mode_group = group_slurm.add_mutually_exclusive_group() - - slurm_mode_group.add_argument( - "--slurm", - action="store_true", - help=( - "Execute snakemake rules as SLURM batch jobs according" - " to their 'resources' definition. SLURM resources as " - " 'partition', 'ntasks', 'cpus', etc. need to be defined" - " per rule within the 'resources' definition. Note, that" - " memory can only be defined as 'mem_mb' or 'mem_mb_per_cpu'" - " as analogous to the SLURM 'mem' and 'mem-per-cpu' flags" - " to sbatch, respectively. Here, the unit is always 'MiB'." - " In addition '--default_resources' should contain the" - " SLURM account." - ), - ), - slurm_mode_group.add_argument( - "--slurm-jobstep", - action="store_true", - help=configargparse.SUPPRESS, # this should be hidden and only be used - # for snakemake to be working in jobscript- - # mode - ) - - group_cluster = parser.add_argument_group("CLUSTER") - - # TODO extend below description to explain the wildcards that can be used - cluster_mode_group = group_cluster.add_mutually_exclusive_group() - cluster_mode_group.add_argument( - "--cluster", - metavar="CMD", - help=( - "Execute snakemake rules with the given submit command, " - "e.g. qsub. Snakemake compiles jobs into scripts that are " - "submitted to the cluster with the given command, once all input " - "files for a particular job are present.\n" - "The submit command can be decorated to make it aware of certain " - "job properties (name, rulename, input, output, params, wildcards, log, threads " - "and dependencies (see the argument below)), e.g.:\n" - "$ snakemake --cluster 'qsub -pe threaded {threads}'." - ), - ), - cluster_mode_group.add_argument( - "--cluster-sync", - metavar="CMD", - help=( - "cluster submission command will block, returning the remote exit " - "status upon remote termination (for example, this should be used " - "if the cluster command is 'qsub -sync y' (SGE)" - ), - ), - cluster_mode_group.add_argument( - "--drmaa", - nargs="?", - const="", - metavar="ARGS", - help="Execute snakemake on a cluster accessed via DRMAA, " - "Snakemake compiles jobs into scripts that are " - "submitted to the cluster with the given command, once all input " - "files for a particular job are present. ARGS can be used to " - "specify options of the underlying cluster system, " - "thereby using the job properties name, rulename, input, output, params, wildcards, log, " - "threads and dependencies, e.g.: " - "--drmaa ' -pe threaded {threads}'. Note that ARGS must be given in quotes and " - "with a leading whitespace.", - ) - - group_cluster.add_argument( - "--cluster-config", - "-u", - metavar="FILE", - default=[], - action="append", - help=( - "A JSON or YAML file that defines the wildcards used in 'cluster' " - "for specific rules, instead of having them specified in the Snakefile. " - "For example, for rule 'job' you may define: " - "{ 'job' : { 'time' : '24:00:00' } } to specify the time for rule 'job'. " - "You can specify more than one file. The configuration files are merged " - "with later values overriding earlier ones. This option is deprecated in favor " - "of using --profile, see docs." - ), - ), - group_cluster.add_argument( - "--immediate-submit", - "--is", - action="store_true", - help="Immediately submit all jobs to the cluster instead of waiting " - "for present input files. This will fail, unless you make " - "the cluster aware of job dependencies, e.g. via:\n" - "$ snakemake --cluster 'sbatch --dependency {dependencies}.\n" - "Assuming that your submit script (here sbatch) outputs the " - "generated job id to the first stdout line, {dependencies} will " - "be filled with space separated job ids this job depends on. " - "Does not work for workflows that contain checkpoint rules.", - ) - group_cluster.add_argument( - "--jobscript", - "--js", - metavar="SCRIPT", - help="Provide a custom job script for submission to the cluster. " - "The default script resides as 'jobscript.sh' in the " - "installation directory.", - ) - group_cluster.add_argument( - "--jobname", - "--jn", - default="snakejob.{name}.{jobid}.sh", - metavar="NAME", - help="Provide a custom name for the jobscript that is submitted to the " - 'cluster (see --cluster). NAME is "snakejob.{name}.{jobid}.sh" ' - "per default. The wildcard {jobid} has to be present in the name.", - ) - group_cluster.add_argument( - "--cluster-status", - help="Status command for cluster execution. This is only considered " - "in combination with the --cluster flag. If provided, Snakemake will " - "use the status command to determine if a job has finished successfully " - "or failed. For this it is necessary that the submit command provided " - "to --cluster returns the cluster job id. Then, the status command " - "will be invoked with the job id. Snakemake expects it to return " - "'success' if the job was successful, 'failed' if the job failed and " - "'running' if the job still runs.", - ) - group_cluster.add_argument( - "--cluster-cancel", - default=None, - help="Specify a command that allows to stop currently running jobs. " - "The command will be passed a single argument, the job id.", - ) - group_cluster.add_argument( - "--cluster-cancel-nargs", - type=int, - default=1000, - help="Specify maximal number of job ids to pass to --cluster-cancel " - "command, defaults to 1000.", - ) - group_cluster.add_argument( - "--cluster-sidecar", - default=None, - help="Optional command to start a sidecar process during cluster " - "execution. Only active when --cluster is given as well.", - ) - group_cluster.add_argument( - "--drmaa-log-dir", - metavar="DIR", - help="Specify a directory in which stdout and stderr files of DRMAA " - " jobs will be written. The value may be given as a relative path," - " in which case Snakemake will use the current invocation directory" - " as the origin. If given, this will override any given '-o' and/or" - " '-e' native specification. If not given, all DRMAA stdout and" - " stderr files are written to the current working directory.", - ) - - group_cloud = parser.add_argument_group("CLOUD") - group_flux = parser.add_argument_group("FLUX") - group_kubernetes = parser.add_argument_group("KUBERNETES") - group_google_life_science = parser.add_argument_group("GOOGLE_LIFE_SCIENCE") - group_kubernetes = parser.add_argument_group("KUBERNETES") - group_tes = parser.add_argument_group("TES") - group_tibanna = parser.add_argument_group("TIBANNA") - - group_kubernetes.add_argument( - "--kubernetes", - metavar="NAMESPACE", - nargs="?", - const="default", - help="Execute workflow in a kubernetes cluster (in the cloud). " - "NAMESPACE is the namespace you want to use for your job (if nothing " - "specified: 'default'). " - "Usually, this requires --default-remote-provider and " - "--default-remote-prefix to be set to a S3 or GS bucket where your . " - "data shall be stored. It is further advisable to activate conda " - "integration via --use-conda.", - ) - group_kubernetes.add_argument( - "--container-image", - metavar="IMAGE", - help="Docker image to use, e.g., when submitting jobs to kubernetes " - "Defaults to 'https://hub.docker.com/r/snakemake/snakemake', tagged with " - "the same version as the currently running Snakemake instance. " - "Note that overwriting this value is up to your responsibility. " - "Any used image has to contain a working snakemake installation " - "that is compatible with (or ideally the same as) the currently " - "running version.", - ) - group_kubernetes.add_argument( - "--k8s-cpu-scalar", - metavar="FLOAT", - default=0.95, - type=float, - help="K8s reserves some proportion of available CPUs for its own use. " - "So, where an underlying node may have 8 CPUs, only e.g. 7600 milliCPUs " - "are allocatable to k8s pods (i.e. snakemake jobs). As 8 > 7.6, k8s can't " - "find a node with enough CPU resource to run such jobs. This argument acts " - "as a global scalar on each job's CPU request, so that e.g. a job whose " - "rule definition asks for 8 CPUs will request 7600m CPUs from k8s, " - "allowing it to utilise one entire node. N.B: the job itself would still " - "see the original value, i.e. as the value substituted in {threads}.", - ) - - group_kubernetes.add_argument( - "--k8s-service-account-name", - metavar="SERVICEACCOUNTNAME", - default=None, - help="This argument allows the use of customer service accounts for " - "kubernetes pods. If specified serviceAccountName will be added to the " - "pod specs. This is needed when using workload identity which is enforced " - "when using Google Cloud GKE Autopilot.", - ) - - group_tibanna.add_argument( - "--tibanna", - action="store_true", - help="Execute workflow on AWS cloud using Tibanna. This requires " - "--default-remote-prefix to be set to S3 bucket name and prefix" - " (e.g. 'bucketname/subdirectory') where input is already stored" - " and output will be sent to. Using --tibanna implies --default-resources" - " is set as default. Optionally, use --precommand to" - " specify any preparation command to run before snakemake command" - " on the cloud (inside snakemake container on Tibanna VM)." - " Also, --use-conda, --use-singularity, --config, --configfile are" - " supported and will be carried over.", - ) - group_tibanna.add_argument( - "--tibanna-sfn", - help="Name of Tibanna Unicorn step function (e.g. tibanna_unicorn_monty). " - "This works as serverless scheduler/resource allocator and must be " - "deployed first using tibanna cli. (e.g. tibanna deploy_unicorn --usergroup=" - "monty --buckets=bucketname)", - ) - group_tibanna.add_argument( - "--precommand", - help="Any command to execute before snakemake command on AWS cloud " - "such as wget, git clone, unzip, etc. This is used with --tibanna." - "Do not include input/output download/upload commands - file transfer" - " between S3 bucket and the run environment (container) is automatically" - " handled by Tibanna.", - ) - group_tibanna.add_argument( - "--tibanna-config", - nargs="+", - help="Additional tibanna config e.g. --tibanna-config spot_instance=true subnet=" - " security group=", - ) - group_google_life_science.add_argument( - "--google-lifesciences", - action="store_true", - help="Execute workflow on Google Cloud cloud using the Google Life. " - " Science API. This requires default application credentials (json) " - " to be created and export to the environment to use Google Cloud " - " Storage, Compute Engine, and Life Sciences. The credential file " - " should be exported as GOOGLE_APPLICATION_CREDENTIALS for snakemake " - " to discover. Also, --use-conda, --use-singularity, --config, " - "--configfile are supported and will be carried over.", - ) - group_google_life_science.add_argument( - "--google-lifesciences-regions", - nargs="+", - default=["us-east1", "us-west1", "us-central1"], - help="Specify one or more valid instance regions (defaults to US)", - ) - group_google_life_science.add_argument( - "--google-lifesciences-location", - help="The Life Sciences API service used to schedule the jobs. " - " E.g., us-centra1 (Iowa) and europe-west2 (London) " - " Watch the terminal output to see all options found to be available. " - " If not specified, defaults to the first found with a matching prefix " - " from regions specified with --google-lifesciences-regions.", - ) - group_google_life_science.add_argument( - "--google-lifesciences-keep-cache", - action="store_true", - help="Cache workflows in your Google Cloud Storage Bucket specified " - "by --default-remote-prefix/{source}/{cache}. Each workflow working " - "directory is compressed to a .tar.gz, named by the hash of the " - "contents, and kept in Google Cloud Storage. By default, the caches " - "are deleted at the shutdown step of the workflow.", - ) - group_google_life_science.add_argument( - "--google-lifesciences-service-account-email", - help="Specify a service account email address", - ) - group_google_life_science.add_argument( - "--google-lifesciences-network", - help="Specify a network for a Google Compute Engine VM instance", - ) - group_google_life_science.add_argument( - "--google-lifesciences-subnetwork", - help="Specify a subnetwork for a Google Compute Engine VM instance", - ) - - group_azure_batch = parser.add_argument_group("AZURE_BATCH") - - group_azure_batch.add_argument( - "--az-batch", - action="store_true", - help="Execute workflow on azure batch", - ) - - group_azure_batch.add_argument( - "--az-batch-enable-autoscale", - action="store_true", - help="Enable autoscaling of the azure batch pool nodes, this option will set the initial dedicated node count to zero, and requires five minutes to resize the cluster, so is only recommended for longer running jobs.", - ) - - group_azure_batch.add_argument( - "--az-batch-account-url", - nargs="?", - help="Azure batch account url, requires AZ_BATCH_ACCOUNT_KEY environment variable to be set.", - ) - - group_flux.add_argument( - "--flux", - action="store_true", - help="Execute your workflow on a flux cluster. " - "Flux can work with both a shared network filesystem (like NFS) or without. " - "If you don't have a shared filesystem, additionally specify --no-shared-fs.", - ) - - group_tes.add_argument( - "--tes", - metavar="URL", - help="Send workflow tasks to GA4GH TES server specified by url.", - ) - - group_conda = parser.add_argument_group("CONDA") - - group_conda.add_argument( - "--use-conda", - action="store_true", - help="If defined in the rule, run job in a conda environment. " - "If this flag is not set, the conda directive is ignored.", - ) - group_conda.add_argument( - "--conda-not-block-search-path-envvars", - action="store_true", - help="Do not block environment variables that modify the search path " - "(R_LIBS, PYTHONPATH, PERL5LIB, PERLLIB) when using conda environments.", - ) - group_conda.add_argument( - "--list-conda-envs", - action="store_true", - help="List all conda environments and their location on disk.", - ) - group_conda.add_argument( - "--conda-prefix", - metavar="DIR", - default=os.environ.get("SNAKEMAKE_CONDA_PREFIX", None), - help="Specify a directory in which the 'conda' and 'conda-archive' " - "directories are created. These are used to store conda environments " - "and their archives, respectively. If not supplied, the value is set " - "to the '.snakemake' directory relative to the invocation directory. " - "If supplied, the `--use-conda` flag must also be set. The value may " - "be given as a relative path, which will be extrapolated to the " - "invocation directory, or as an absolute path. The value can also be " - "provided via the environment variable $SNAKEMAKE_CONDA_PREFIX.", - ) - group_conda.add_argument( - "--conda-cleanup-envs", - action="store_true", - help="Cleanup unused conda environments.", - ) - - from snakemake.deployment.conda import CondaCleanupMode - - group_conda.add_argument( - "--conda-cleanup-pkgs", - type=CondaCleanupMode, - const=CondaCleanupMode.tarballs, - choices=list(CondaCleanupMode), - nargs="?", - help="Cleanup conda packages after creating environments. " - "In case of 'tarballs' mode, will clean up all downloaded package tarballs. " - "In case of 'cache' mode, will additionally clean up unused package caches. " - "If mode is omitted, will default to only cleaning up the tarballs.", - ) - group_conda.add_argument( - "--conda-create-envs-only", - action="store_true", - help="If specified, only creates the job-specific " - "conda environments then exits. The `--use-conda` " - "flag must also be set.", - ) - group_conda.add_argument( - "--conda-frontend", - default="mamba", - choices=["conda", "mamba"], - help="Choose the conda frontend for installing environments. " - "Mamba is much faster and highly recommended.", - ) - - group_singularity = parser.add_argument_group("SINGULARITY") - - group_singularity.add_argument( - "--use-singularity", - action="store_true", - help="If defined in the rule, run job within a singularity container. " - "If this flag is not set, the singularity directive is ignored.", - ) - group_singularity.add_argument( - "--singularity-prefix", - metavar="DIR", - help="Specify a directory in which singularity images will be stored. " - "If not supplied, the value is set " - "to the '.snakemake' directory relative to the invocation directory. " - "If supplied, the `--use-singularity` flag must also be set. The value " - "may be given as a relative path, which will be extrapolated to the " - "invocation directory, or as an absolute path.", - ) - group_singularity.add_argument( - "--singularity-args", - default="", - metavar="ARGS", - help="Pass additional args to singularity.", - ) - group_singularity.add_argument( - "--cleanup-containers", - action="store_true", - help="Remove unused (singularity) containers", - ) - - group_env_modules = parser.add_argument_group("ENVIRONMENT MODULES") - - group_env_modules.add_argument( - "--use-envmodules", - action="store_true", - help="If defined in the rule, run job within the given environment " - "modules, loaded in the given order. This can be combined with " - "--use-conda and --use-singularity, which will then be only used as a " - "fallback for rules which don't define environment modules.", - ) - - return parser - - -def generate_parser_metadata(parser, args): - """Given a populated parser, generate the original command along with - metadata that can be handed to a logger to use as needed. - """ - command = "snakemake %s" % " ".join( - parser._source_to_settings["command_line"][""][1] - ) - workdir = os.getcwd() - metadata = args.__dict__ - metadata.update({"command": command}) - return metadata - - -def main(argv=None): - """Main entry point.""" - - if sys.version_info < MIN_PY_VERSION: - print( - f"Snakemake requires at least Python {MIN_PY_VERSION}.", - file=sys.stderr, - ) - exit(1) - - parser = get_argument_parser() - args = parser.parse_args(argv) - - snakefile = args.snakefile - if snakefile is None: - for p in SNAKEFILE_CHOICES: - if os.path.exists(p): - snakefile = p - break - if snakefile is None: - print( - "Error: no Snakefile found, tried {}.".format( - ", ".join(SNAKEFILE_CHOICES) - ), - file=sys.stderr, - ) - sys.exit(1) - - workflow_profile = None - if args.workflow_profile != "none": - if args.workflow_profile: - workflow_profile = args.workflow_profile - else: - # checking for default profile - default_path = Path("profiles/default") - workflow_profile_candidates = [ - default_path, - Path(snakefile).parent.joinpath(default_path), - ] - for profile in workflow_profile_candidates: - if profile.exists(): - workflow_profile = profile - break - - if args.profile == "none": - args.profile = None - - if (args.profile or workflow_profile) and args.mode == Mode.default: - # Reparse args while inferring config file from profile. - # But only do this if the user has invoked Snakemake (Mode.default) - profiles = [] - if args.profile: - profiles.append(args.profile) - if workflow_profile: - workflow_profile_stmt = f" and workflow specific profile {workflow_profile}" - profiles.append(workflow_profile) - else: - workflow_profile_stmt = "" - - print( - f"Using profile{'s' if len(profiles) > 1 else ''} " - f"{' and '.join(map(str, profiles))}{workflow_profile_stmt} for setting default command line arguments.", - file=sys.stderr, - ) - - parser = get_argument_parser(profiles=profiles) - args = parser.parse_args(argv) - - def adjust_path(f): - if os.path.exists(f) or os.path.isabs(f): - return f - else: - return get_profile_file(args.profile, f, return_default=True) - - # update file paths to be relative to the profile - # (if they do not exist relative to CWD) - if args.jobscript: - args.jobscript = adjust_path(args.jobscript) - if args.cluster: - args.cluster = adjust_path(args.cluster) - if args.cluster_config: - if isinstance(args.cluster_config, list): - args.cluster_config = [adjust_path(cfg) for cfg in args.cluster_config] - else: - args.cluster_config = adjust_path(args.cluster_config) - if args.cluster_sync: - args.cluster_sync = adjust_path(args.cluster_sync) - for key in "cluster_status", "cluster_cancel", "cluster_sidecar": - if getattr(args, key): - setattr(args, key, adjust_path(getattr(args, key))) - if args.report_stylesheet: - args.report_stylesheet = adjust_path(args.report_stylesheet) - - if args.quiet is not None and len(args.quiet) == 0: - # default case, set quiet to progress and rule - args.quiet = ["progress", "rules"] - - if args.bash_completion: - cmd = b"complete -o bashdefault -C snakemake-bash-completion snakemake" - sys.stdout.buffer.write(cmd) - sys.exit(0) - - if args.batch is not None and args.forceall: - print( - "--batch may not be combined with --forceall, because recomputed upstream " - "jobs in subsequent batches may render already obtained results outdated." - ) - - try: - resources = parse_resources(args.resources) - config = parse_config(args) - - if args.default_resources is not None: - default_resources = DefaultResources(args.default_resources) - else: - default_resources = None - - batch = parse_batch(args) - overwrite_threads = parse_set_threads(args) - overwrite_resources = parse_set_resources(args) - overwrite_resource_scopes = parse_set_resource_scope(args) - - overwrite_scatter = parse_set_scatter(args) - - overwrite_groups = parse_groups(args) - group_components = parse_group_components(args) - except ValueError as e: - print(e, file=sys.stderr) - print("", file=sys.stderr) - sys.exit(1) - - non_local_exec = ( - args.cluster - or args.slurm - or args.slurm_jobstep - or args.cluster_sync - or args.tibanna - or args.kubernetes - or args.tes - or args.az_batch - or args.google_lifesciences - or args.drmaa - or args.flux - ) - no_exec = ( - args.print_compilation - or args.list_code_changes - or args.list_conda_envs - or args.list_input_changes - or args.list_params_changes - or args.list - or args.list_target_rules - or args.list_untracked - or args.list_version_changes - or args.export_cwl - or args.generate_unit_tests - or args.dag - or args.d3dag - or args.filegraph - or args.rulegraph - or args.summary - or args.detailed_summary - or args.lint - or args.containerize - or args.report - or args.gui - or args.archive - or args.unlock - or args.cleanup_metadata - ) - - try: - cores, jobs = parse_cores_jobs( - args.cores, args.jobs, no_exec, non_local_exec, args.dryrun - ) - args.cores = cores - args.jobs = jobs - except CliException as err: - print(err.msg, sys.stderr) - sys.exit(1) - - if args.drmaa_log_dir is not None: - if not os.path.isabs(args.drmaa_log_dir): - args.drmaa_log_dir = os.path.abspath(os.path.expanduser(args.drmaa_log_dir)) - - if args.runtime_profile: - import yappi - - yappi.start() - - if args.immediate_submit and not args.notemp: - print( - "Error: --immediate-submit has to be combined with --notemp, " - "because temp file handling is not supported in this mode.", - file=sys.stderr, - ) - sys.exit(1) - - if (args.conda_prefix or args.conda_create_envs_only) and not args.use_conda: - if args.conda_prefix and os.environ.get("SNAKEMAKE_CONDA_PREFIX", False): - print( - "Warning: The enviorment variable SNAKEMAKE_CONDA_PREFIX is set" - "but --use-conda is not." - "Snakemake will ignore SNAKEMAKE_CONDA_PREFIX" - "and conda enviorments will not be used or created.", - file=sys.stderr, - ) - args.conda_prefix = None - else: - print( - "Error: --use-conda must be set if --conda-prefix or " - "--create-envs-only is set.", - file=sys.stderr, - ) - sys.exit(1) - - if args.singularity_prefix and not args.use_singularity: - print( - "Error: --use_singularity must be set if --singularity-prefix is set.", - file=sys.stderr, - ) - sys.exit(1) - - if args.kubernetes and ( - not args.default_remote_provider or not args.default_remote_prefix - ): - print( - "Error: --kubernetes must be combined with " - "--default-remote-provider and --default-remote-prefix, see " - "https://snakemake.readthedocs.io/en/stable/executing/cloud.html" - "#executing-a-snakemake-workflow-via-kubernetes", - file=sys.stderr, - ) - sys.exit(1) - - if args.tibanna: - if not args.default_remote_prefix: - print( - "Error: --tibanna must be combined with --default-remote-prefix " - "to provide bucket name and subdirectory (prefix) " - "(e.g. 'bucketname/projectname'", - file=sys.stderr, - ) - sys.exit(1) - args.default_remote_prefix = args.default_remote_prefix.rstrip("/") - if not args.tibanna_sfn: - args.tibanna_sfn = os.environ.get("TIBANNA_DEFAULT_STEP_FUNCTION_NAME", "") - if not args.tibanna_sfn: - print( - "Error: to use --tibanna, either --tibanna-sfn or environment variable " - "TIBANNA_DEFAULT_STEP_FUNCTION_NAME must be set and exported " - "to provide name of the tibanna unicorn step function " - "(e.g. 'tibanna_unicorn_monty'). The step function must be deployed first " - "using tibanna cli (e.g. tibanna deploy_unicorn --usergroup=monty " - "--buckets=bucketname)", - file=sys.stderr, - ) - sys.exit(1) - - if args.az_batch: - if not args.default_remote_provider or not args.default_remote_prefix: - print( - "Error: --az-batch must be combined with " - "--default-remote-provider AzBlob and --default-remote-prefix to " - "provide a blob container name\n", - file=sys.stderr, - ) - sys.exit(1) - elif args.az_batch_account_url is None: - print( - "Error: --az-batch-account-url must be set when --az-batch is used\n", - file=sys.stderr, - ) - sys.exit(1) - elif not url_can_parse(args.az_batch_account_url): - print( - "Error: invalide azure batch account url, please use format: https://{account_name}.{location}.batch.azure.com." - ) - sys.exit(1) - elif os.getenv("AZ_BATCH_ACCOUNT_KEY") is None: - print( - "Error: environment variable AZ_BATCH_ACCOUNT_KEY must be set when --az-batch is used\n", - file=sys.stderr, - ) - sys.exit(1) - - if args.google_lifesciences: - if ( - not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") - and not args.google_lifesciences_service_account_email - ): - print( - "Error: Either the GOOGLE_APPLICATION_CREDENTIALS environment variable " - "or --google-lifesciences-service-account-email must be available " - "for --google-lifesciences", - file=sys.stderr, - ) - sys.exit(1) - - if not args.default_remote_prefix: - print( - "Error: --google-lifesciences must be combined with " - " --default-remote-prefix to provide bucket name and " - "subdirectory (prefix) (e.g. 'bucketname/projectname'", - file=sys.stderr, - ) - sys.exit(1) - - if args.delete_all_output and args.delete_temp_output: - print( - "Error: --delete-all-output and --delete-temp-output are mutually exclusive.", - file=sys.stderr, - ) - sys.exit(1) - - if args.gui is not None: - try: - import snakemake.gui as gui - except ImportError: - print( - "Error: GUI needs Flask to be installed. Install " - "with easy_install or contact your administrator.", - file=sys.stderr, - ) - sys.exit(1) - - _logging.getLogger("werkzeug").setLevel(_logging.ERROR) - - _snakemake = partial(snakemake, os.path.abspath(snakefile)) - gui.register(_snakemake, args) - - if ":" in args.gui: - host, port = args.gui.split(":") - else: - port = args.gui - host = "127.0.0.1" - - url = f"http://{host}:{port}" - print(f"Listening on {url}.", file=sys.stderr) - - def open_browser(): - try: - webbrowser.open(url) - except: - pass - - print("Open this address in your browser to access the GUI.", file=sys.stderr) - threading.Timer(0.5, open_browser).start() - success = True - - try: - gui.app.run(debug=False, threaded=True, port=int(port), host=host) - - except (KeyboardInterrupt, SystemExit): - # silently close - pass - else: - log_handler = [] - if args.log_handler_script is not None: - if not os.path.exists(args.log_handler_script): - print( - "Error: no log handler script found, {}.".format( - args.log_handler_script - ), - file=sys.stderr, - ) - sys.exit(1) - log_script = SourceFileLoader("log", args.log_handler_script).load_module() - try: - log_handler.append(log_script.log_handler) - except: - print( - 'Error: Invalid log handler script, {}. Expect python function "log_handler(msg)".'.format( - args.log_handler_script - ), - file=sys.stderr, - ) - sys.exit(1) - - if args.log_service == "slack": - slack_logger = logging.SlackLogger() - log_handler.append(slack_logger.log_handler) - - elif args.wms_monitor or args.log_service == "wms": - # Generate additional metadata for server - metadata = generate_parser_metadata(parser, args) - wms_logger = logging.WMSLogger( - args.wms_monitor, args.wms_monitor_arg, metadata=metadata - ) - log_handler.append(wms_logger.log_handler) - - if args.draft_notebook: - from snakemake import notebook - - args.target = [args.draft_notebook] - args.edit_notebook = notebook.EditMode(draft_only=True) - elif args.edit_notebook: - from snakemake import notebook - - args.target = [args.edit_notebook] - args.force = True - args.edit_notebook = notebook.EditMode(args.notebook_listen) - - aggregated_wait_for_files = args.wait_for_files - if args.wait_for_files_file is not None: - wait_for_files([args.wait_for_files_file], latency_wait=args.latency_wait) - - with open(args.wait_for_files_file) as fd: - extra_wait_files = [line.strip() for line in fd.readlines()] - - if aggregated_wait_for_files is None: - aggregated_wait_for_files = extra_wait_files - else: - aggregated_wait_for_files.extend(extra_wait_files) - - success = snakemake( - snakefile, - batch=batch, - cache=args.cache, - report=args.report, - report_stylesheet=args.report_stylesheet, - lint=args.lint, - containerize=args.containerize, - generate_unit_tests=args.generate_unit_tests, - listrules=args.list, - list_target_rules=args.list_target_rules, - cores=args.cores, - local_cores=args.local_cores, - nodes=args.jobs, - resources=resources, - overwrite_threads=overwrite_threads, - max_threads=args.max_threads, - overwrite_scatter=overwrite_scatter, - default_resources=default_resources, - overwrite_resources=overwrite_resources, - overwrite_resource_scopes=overwrite_resource_scopes, - config=config, - configfiles=args.configfile, - config_args=args.config, - workdir=args.directory, - targets=args.target, - target_jobs=parse_target_jobs_cli_args(args), - dryrun=args.dryrun, - printshellcmds=args.printshellcmds, - printreason=True, # always display reason - debug_dag=args.debug_dag, - printdag=args.dag, - printrulegraph=args.rulegraph, - printfilegraph=args.filegraph, - printd3dag=args.d3dag, - touch=args.touch, - forcetargets=args.force, - forceall=args.forceall, - forcerun=args.forcerun, - prioritytargets=args.prioritize, - until=args.until, - omit_from=args.omit_from, - stats=args.stats, - nocolor=args.nocolor, - quiet=args.quiet, - keepgoing=args.keep_going, - slurm=args.slurm, - slurm_jobstep=args.slurm_jobstep, - rerun_triggers=args.rerun_triggers, - cluster=args.cluster, - cluster_config=args.cluster_config, - cluster_sync=args.cluster_sync, - drmaa=args.drmaa, - drmaa_log_dir=args.drmaa_log_dir, - kubernetes=args.kubernetes, - container_image=args.container_image, - k8s_cpu_scalar=args.k8s_cpu_scalar, - k8s_service_account_name=args.k8s_service_account_name, - flux=args.flux, - tibanna=args.tibanna, - tibanna_sfn=args.tibanna_sfn, - az_batch=args.az_batch, - az_batch_enable_autoscale=args.az_batch_enable_autoscale, - az_batch_account_url=args.az_batch_account_url, - google_lifesciences=args.google_lifesciences, - google_lifesciences_regions=args.google_lifesciences_regions, - google_lifesciences_location=args.google_lifesciences_location, - google_lifesciences_cache=args.google_lifesciences_keep_cache, - google_lifesciences_service_account_email=args.google_lifesciences_service_account_email, - google_lifesciences_network=args.google_lifesciences_network, - google_lifesciences_subnetwork=args.google_lifesciences_subnetwork, - tes=args.tes, - precommand=args.precommand, - preemption_default=args.preemption_default, - preemptible_rules=args.preemptible_rules, - tibanna_config=args.tibanna_config, - jobname=args.jobname, - immediate_submit=args.immediate_submit, - standalone=True, - ignore_ambiguity=args.allow_ambiguity, - lock=not args.nolock, - unlock=args.unlock, - cleanup_metadata=args.cleanup_metadata, - conda_cleanup_envs=args.conda_cleanup_envs, - cleanup_containers=args.cleanup_containers, - cleanup_shadow=args.cleanup_shadow, - force_incomplete=args.rerun_incomplete, - ignore_incomplete=args.ignore_incomplete, - list_version_changes=args.list_version_changes, - list_code_changes=args.list_code_changes, - list_input_changes=args.list_input_changes, - list_params_changes=args.list_params_changes, - list_untracked=args.list_untracked, - summary=args.summary, - detailed_summary=args.detailed_summary, - archive=args.archive, - delete_all_output=args.delete_all_output, - delete_temp_output=args.delete_temp_output, - print_compilation=args.print_compilation, - verbose=args.verbose, - debug=args.debug, - jobscript=args.jobscript, - notemp=args.notemp, - all_temp=args.all_temp, - keep_remote_local=args.keep_remote, - greediness=args.greediness, - no_hooks=args.no_hooks, - overwrite_shellcmd=args.overwrite_shellcmd, - latency_wait=args.latency_wait, - wait_for_files=aggregated_wait_for_files, - keep_target_files=args.keep_target_files, - allowed_rules=args.allowed_rules, - max_jobs_per_second=args.max_jobs_per_second, - max_status_checks_per_second=args.max_status_checks_per_second, - restart_times=args.retries, - attempt=args.attempt, - force_use_threads=args.force_use_threads, - use_conda=args.use_conda, - conda_frontend=args.conda_frontend, - conda_prefix=args.conda_prefix, - conda_cleanup_pkgs=args.conda_cleanup_pkgs, - list_conda_envs=args.list_conda_envs, - use_singularity=args.use_singularity, - use_env_modules=args.use_envmodules, - singularity_prefix=args.singularity_prefix, - shadow_prefix=args.shadow_prefix, - singularity_args=args.singularity_args, - scheduler=args.scheduler, - scheduler_ilp_solver=args.scheduler_ilp_solver, - conda_create_envs_only=args.conda_create_envs_only, - mode=args.mode, - wrapper_prefix=args.wrapper_prefix, - default_remote_provider=args.default_remote_provider, - default_remote_prefix=args.default_remote_prefix, - assume_shared_fs=not args.no_shared_fs, - cluster_status=args.cluster_status, - cluster_cancel=args.cluster_cancel, - cluster_cancel_nargs=args.cluster_cancel_nargs, - cluster_sidecar=args.cluster_sidecar, - export_cwl=args.export_cwl, - show_failed_logs=args.show_failed_logs, - keep_incomplete=args.keep_incomplete, - keep_metadata=not args.drop_metadata, - edit_notebook=args.edit_notebook, - envvars=args.envvars, - overwrite_groups=overwrite_groups, - group_components=group_components, - max_inventory_wait_time=args.max_inventory_time, - log_handler=log_handler, - execute_subworkflows=not args.no_subworkflows, - conda_not_block_search_path_envvars=args.conda_not_block_search_path_envvars, - scheduler_solver_path=args.scheduler_solver_path, - conda_base_path=args.conda_base_path, - local_groupid=args.local_groupid, - cleanup_scripts=not args.skip_script_cleanup, - ) - - if args.runtime_profile: - with open(args.runtime_profile, "w") as out: - profile = yappi.get_func_stats() - profile.sort("totaltime") - profile.print_all( - out=out, - columns={ - 0: ("name", 120), - 1: ("ncall", 10), - 2: ("tsub", 8), - 3: ("ttot", 8), - 4: ("tavg", 8), - }, - ) - - sys.exit(0 if success else 1) - - -def bash_completion(snakefile="Snakefile"): - """Entry point for bash completion.""" - if not len(sys.argv) >= 2: - print( - "Calculate bash completion for snakemake. This tool shall not be invoked by hand." - ) - sys.exit(1) - - def print_candidates(candidates): - if candidates: - candidates = sorted(set(candidates)) - ## Use bytes for avoiding '^M' under Windows. - sys.stdout.buffer.write(b"\n".join(s.encode() for s in candidates)) - - prefix = sys.argv[2] - - if prefix.startswith("-"): - print_candidates( - action.option_strings[0] - for action in get_argument_parser()._actions - if action.option_strings and action.option_strings[0].startswith(prefix) - ) - else: - candidates = [] - files = glob.glob(f"{prefix}*") - if files: - candidates.extend(files) - if os.path.exists(snakefile): - workflow = Workflow(snakefile=snakefile) - workflow.include(snakefile) - - candidates.extend( - [file for file in workflow.concrete_files if file.startswith(prefix)] - + [rule.name for rule in workflow.rules if rule.name.startswith(prefix)] - ) - if len(candidates) > 0: - print_candidates(candidates) - sys.exit(0) diff --git a/snakemake/__main__.py b/snakemake/__main__.py index a628905d4..1e11af197 100644 --- a/snakemake/__main__.py +++ b/snakemake/__main__.py @@ -1,4 +1,4 @@ # This script makes it possible to invoke snakemake with 'python3 -m snakemake' -from snakemake import main +from snakemake.cli import main main() diff --git a/snakemake/api.py b/snakemake/api.py new file mode 100644 index 000000000..6b9361efc --- /dev/null +++ b/snakemake/api.py @@ -0,0 +1,859 @@ +__author__ = "Johannes Köster" +__copyright__ = "Copyright 2022, Johannes Köster" +__email__ = "johannes.koester@uni-due.de" +__license__ = "MIT" + +import sys + +from snakemake.common import MIN_PY_VERSION + +if sys.version_info < MIN_PY_VERSION: + raise ValueError(f"Snakemake requires at least Python {'.'.join(MIN_PY_VERSION)}.") + +import os +from functools import partial +import importlib + +from snakemake_interface_executor_plugins.utils import ExecMode + +from snakemake.workflow import Workflow +from snakemake.exceptions import ( + print_exception, + WorkflowError, +) +from snakemake.logging import setup_logger, logger +from snakemake.io import load_configfile +from snakemake.shell import shell +from snakemake.utils import update_config +from snakemake.common import ( + MIN_PY_VERSION, + RERUN_TRIGGERS, + __version__, + dict_to_key_value_args, +) +from snakemake.resources import DefaultResources + + +def snakemake( + snakefile, + batch=None, + cache=None, + report=None, + report_stylesheet=None, + containerize=False, + lint=None, + generate_unit_tests=None, + listrules=False, + list_target_rules=False, + cores=1, + nodes=None, + local_cores=1, + max_threads=None, + resources=dict(), + overwrite_threads=None, + overwrite_scatter=None, + overwrite_resource_scopes=None, + default_resources=None, + overwrite_resources=None, + config=dict(), + configfiles=None, + config_args=None, + workdir=None, + targets=None, + target_jobs=None, + dryrun=False, + touch=False, + forcetargets=False, + forceall=False, + forcerun=[], + until=[], + omit_from=[], + prioritytargets=[], + stats=None, + printshellcmds=False, + debug_dag=False, + printdag=False, + printrulegraph=False, + printfilegraph=False, + printd3dag=False, + nocolor=False, + quiet=False, + keepgoing=False, + slurm=None, + slurm_jobstep=None, + rerun_triggers=RERUN_TRIGGERS, + cluster=None, + cluster_sync=None, + drmaa=None, + drmaa_log_dir=None, + jobname="snakejob.{rulename}.{jobid}.sh", + immediate_submit=False, + standalone=False, + ignore_ambiguity=False, + snakemakepath=None, + lock=True, + unlock=False, + cleanup_metadata=None, + conda_cleanup_envs=False, + cleanup_shadow=False, + cleanup_scripts=True, + cleanup_containers=False, + force_incomplete=False, + ignore_incomplete=False, + list_version_changes=False, + list_code_changes=False, + list_input_changes=False, + list_params_changes=False, + list_untracked=False, + list_resources=False, + summary=False, + archive=None, + delete_all_output=False, + delete_temp_output=False, + detailed_summary=False, + latency_wait=3, + wait_for_files=None, + print_compilation=False, + debug=False, + notemp=False, + all_temp=False, + keep_remote_local=False, + nodeps=False, + keep_target_files=False, + allowed_rules=None, + jobscript=None, + greediness=None, + no_hooks=False, + overwrite_shellcmd=None, + updated_files=None, + log_handler=[], + keep_logger=False, + max_jobs_per_second=None, + max_status_checks_per_second=100, + restart_times=0, + attempt=1, + verbose=False, + force_use_threads=False, + use_conda=False, + use_singularity=False, + use_env_modules=False, + singularity_args="", + conda_frontend="conda", + conda_prefix=None, + conda_cleanup_pkgs=None, + list_conda_envs=False, + singularity_prefix=None, + shadow_prefix=None, + scheduler="ilp", + scheduler_ilp_solver=None, + conda_create_envs_only=False, + mode=ExecMode.default, + wrapper_prefix=None, + kubernetes=None, + container_image=None, + k8s_cpu_scalar=1.0, + k8s_service_account_name=None, + flux=False, + tibanna=False, + tibanna_sfn=None, + az_batch=False, + az_batch_enable_autoscale=False, + az_batch_account_url=None, + google_lifesciences=False, + google_lifesciences_regions=None, + google_lifesciences_location=None, + google_lifesciences_cache=False, + google_lifesciences_service_account_email=None, + google_lifesciences_network=None, + google_lifesciences_subnetwork=None, + tes=None, + preemption_default=None, + preemptible_rules=None, + precommand="", + default_remote_provider=None, + default_remote_prefix="", + tibanna_config=False, + assume_shared_fs=True, + cluster_status=None, + cluster_cancel=None, + cluster_cancel_nargs=None, + cluster_sidecar=None, + export_cwl=None, + show_failed_logs=False, + keep_incomplete=False, + keep_metadata=True, + messaging=None, + edit_notebook=None, + envvars=None, + overwrite_groups=None, + group_components=None, + max_inventory_wait_time=20, + execute_subworkflows=True, + conda_not_block_search_path_envvars=False, + scheduler_solver_path=None, + conda_base_path=None, + local_groupid="local", + executor_args=None, +): + """Run snakemake on a given snakefile. + + This function provides access to the whole snakemake functionality. It is not thread-safe. + + Args: + snakefile (str): the path to the snakefile + batch (Batch): whether to compute only a partial DAG, defined by the given Batch object (default None) + report (str): create an HTML report for a previous run at the given path + lint (str): print lints instead of executing (None, "plain" or "json", default None) + listrules (bool): list rules (default False) + list_target_rules (bool): list target rules (default False) + cores (int): the number of provided cores (ignored when using cluster support) (default 1) + nodes (int): the number of provided cluster nodes (ignored without cluster support) (default 1) + local_cores (int): the number of provided local cores if in cluster mode (ignored without cluster support) (default 1) + resources (dict): provided resources, a dictionary assigning integers to resource names, e.g. {gpu=1, io=5} (default {}) + default_resources (DefaultResources): default values for resources not defined in rules (default None) + config (dict): override values for workflow config + workdir (str): path to the working directory (default None) + targets (list): list of targets, e.g. rule or file names (default None) + target_jobs (dict): list of snakemake.target_jobs.TargetSpec objects directly targeting specific jobs (default None) + dryrun (bool): only dry-run the workflow (default False) + touch (bool): only touch all output files if present (default False) + forcetargets (bool): force given targets to be re-created (default False) + forceall (bool): force all output files to be re-created (default False) + forcerun (list): list of files and rules that shall be re-created/re-executed (default []) + execute_subworkflows (bool): execute subworkflows if present (default True) + prioritytargets (list): list of targets that shall be run with maximum priority (default []) + stats (str): path to file that shall contain stats about the workflow execution (default None) + printshellcmds (bool): print the shell command of each job (default False) + printdag (bool): print the dag in the graphviz dot language (default False) + printrulegraph (bool): print the graph of rules in the graphviz dot language (default False) + printfilegraph (bool): print the graph of rules with their input and output files in the graphviz dot language (default False) + printd3dag (bool): print a D3.js compatible JSON representation of the DAG (default False) + nocolor (bool): do not print colored output (default False) + quiet (bool): do not print any default job information (default False) + keepgoing (bool): keep going upon errors (default False) + cluster (str): submission command of a cluster or batch system to use, e.g. qsub (default None) + cluster_sync (str): blocking cluster submission command (like SGE 'qsub -sync y') (default None) + drmaa (str): if not None use DRMAA for cluster support, str specifies native args passed to the cluster when submitting a job + drmaa_log_dir (str): the path to stdout and stderr output of DRMAA jobs (default None) + jobname (str): naming scheme for cluster job scripts (default "snakejob.{rulename}.{jobid}.sh") + immediate_submit (bool): immediately submit all cluster jobs, regardless of dependencies (default False) + standalone (bool): kill all processes very rudely in case of failure (do not use this if you use this API) (default False) (deprecated) + ignore_ambiguity (bool): ignore ambiguous rules and always take the first possible one (default False) + snakemakepath (str): deprecated parameter whose value is ignored. Do not use. + lock (bool): lock the working directory when executing the workflow (default True) + unlock (bool): just unlock the working directory (default False) + cleanup_metadata (list): just cleanup metadata of given list of output files (default None) + drop_metadata (bool): drop metadata file tracking information after job finishes (--report and --list_x_changes information will be incomplete) (default False) + conda_cleanup_envs (bool): just cleanup unused conda environments (default False) + cleanup_shadow (bool): just cleanup old shadow directories (default False) + cleanup_scripts (bool): delete wrapper scripts used for execution (default True) + cleanup_containers (bool): delete unused (singularity) containers (default False) + force_incomplete (bool): force the re-creation of incomplete files (default False) + ignore_incomplete (bool): ignore incomplete files (default False) + list_version_changes (bool): list output files with changed rule version (default False) + list_code_changes (bool): list output files with changed rule code (default False) + list_input_changes (bool): list output files with changed input files (default False) + list_params_changes (bool): list output files with changed params (default False) + list_untracked (bool): list files in the workdir that are not used in the workflow (default False) + summary (bool): list summary of all output files and their status (default False) + archive (str): archive workflow into the given tarball + delete_all_output (bool): remove all files generated by the workflow (default False) + delete_temp_output (bool): remove all temporary files generated by the workflow (default False) + latency_wait (int): how many seconds to wait for an output file to appear after the execution of a job, e.g. to handle filesystem latency (default 3) + wait_for_files (list): wait for given files to be present before executing the workflow + list_resources (bool): list resources used in the workflow (default False) + summary (bool): list summary of all output files and their status (default False). If no option is specified a basic summary will be output. If 'detailed' is added as an option e.g --summary detailed, extra info about the input and shell commands will be included + detailed_summary (bool): list summary of all input and output files and their status (default False) + print_compilation (bool): print the compilation of the snakefile (default False) + debug (bool): allow to use the debugger within rules + notemp (bool): ignore temp file flags, e.g. do not delete output files marked as a temp after use (default False) + keep_remote_local (bool): keep local copies of remote files (default False) + nodeps (bool): ignore dependencies (default False) + keep_target_files (bool): do not adjust the paths of given target files relative to the working directory. + allowed_rules (set): restrict allowed rules to the given set. If None or empty, all rules are used. + jobscript (str): path to a custom shell script template for cluster jobs (default None) + greediness (float): set the greediness of scheduling. This value between 0 and 1 determines how careful jobs are selected for execution. The default value (0.5 if prioritytargets are used, 1.0 else) provides the best speed and still acceptable scheduling quality. + overwrite_shellcmd (str): a shell command that shall be executed instead of those given in the workflow. This is for debugging purposes only. + updated_files(list): a list that will be filled with the files that are updated or created during the workflow execution + verbose (bool): show additional debug output (default False) + max_jobs_per_second (int): maximal number of cluster/drmaa jobs per second, None to impose no limit (default None) + restart_times (int): number of times to restart failing jobs (default 0) + attempt (int): initial value of Job.attempt. This is intended for internal use only (default 1). + force_use_threads: whether to force the use of threads over processes. helpful if shared memory is full or unavailable (default False) + use_conda (bool): use conda environments for each job (defined with conda directive of rules) + use_singularity (bool): run jobs in singularity containers (if defined with singularity directive) + use_env_modules (bool): load environment modules if defined in rules + singularity_args (str): additional arguments to pass to a singularity + conda_prefix (str): the directory in which conda environments will be created (default None) + conda_cleanup_pkgs (snakemake.deployment.conda.CondaCleanupMode): + whether to clean up conda tarballs after env creation (default None), valid values: "tarballs", "cache" + singularity_prefix (str): the directory to which singularity images will be pulled (default None) + shadow_prefix (str): prefix for shadow directories. The job-specific shadow directories will be created in $SHADOW_PREFIX/shadow/ (default None) + conda_create_envs_only (bool): if specified, only builds the conda environments specified for each job, then exits. + list_conda_envs (bool): list conda environments and their location on disk. + mode (snakemake.common.Mode): execution mode + wrapper_prefix (str): prefix for wrapper script URLs (default None) + kubernetes (str): submit jobs to Kubernetes, using the given namespace. + container_image (str): Docker image to use, e.g., for Kubernetes. + k8s_cpu_scalar (float): What proportion of each k8s node's CPUs are availabe to snakemake? + k8s_service_account_name (str): Custom k8s service account, needed for workload identity. + flux (bool): Launch workflow to flux cluster. + default_remote_provider (str): default remote provider to use instead of local files (e.g. S3, GS) + default_remote_prefix (str): prefix for default remote provider (e.g. name of the bucket). + tibanna (bool): submit jobs to AWS cloud using Tibanna. + tibanna_sfn (str): Step function (Unicorn) name of Tibanna (e.g. tibanna_unicorn_monty). This must be deployed first using tibanna cli. + az_batch (bool): Submit jobs to azure batch. + az_batch_enable_autoscale (bool): Enable autoscaling of the azure batch pool nodes. This sets the initial dedicated node pool count to zero and resizes the pool only after 5 minutes. So this flag is only recommended for relatively long running jobs., + az_batch_account_url (str): Azure batch account url. + google_lifesciences (bool): submit jobs to Google Cloud Life Sciences (pipelines API). + google_lifesciences_regions (list): a list of regions (e.g., us-east1) + google_lifesciences_location (str): Life Sciences API location (e.g., us-central1) + google_lifesciences_cache (bool): save a cache of the compressed working directories in Google Cloud Storage for later usage. + google_lifesciences_service_account_email (str): Service account to install on Google pipelines API VM instance. + google_lifesciences_network (str): Network name for Google VM instances. + google_lifesciences_subnetwork (str): Subnetwork name for Google VM instances. + tes (str): Execute workflow tasks on GA4GH TES server given by URL. + precommand (str): commands to run on AWS cloud before the snakemake command (e.g. wget, git clone, unzip, etc). Use with --tibanna. + preemption_default (int): set a default number of preemptible instance retries (for Google Life Sciences executor only) + preemptible_rules (list): define custom preemptible instance retries for specific rules (for Google Life Sciences executor only) + tibanna_config (list): Additional tibanna config e.g. --tibanna-config spot_instance=true subnet= security group= + assume_shared_fs (bool): assume that cluster nodes share a common filesystem (default true). + cluster_status (str): status command for cluster execution. If None, Snakemake will rely on flag files. Otherwise, it expects the command to return "success", "failure" or "running" when executing with a cluster jobid as a single argument. + cluster_cancel (str): command to cancel multiple job IDs (like SLURM 'scancel') (default None) + cluster_cancel_nargs (int): maximal number of job ids to pass to cluster_cancel (default 1000) + cluster_sidecar (str): command that starts a sidecar process, see cluster documentation (default None) + export_cwl (str): Compile workflow to CWL and save to given file + log_handler (function): redirect snakemake output to this custom log handler, a function that takes a log message dictionary (see below) as its only argument (default None). The log message dictionary for the log handler has to following entries: + keep_incomplete (bool): keep incomplete output files of failed jobs + edit_notebook (object): "notebook.EditMode" object to configure notebook server for interactive editing of a rule notebook. If None, do not edit. + scheduler (str): Select scheduling algorithm (default ilp) + scheduler_ilp_solver (str): Set solver for ilp scheduler. + overwrite_groups (dict): Rule to group assignments (default None) + group_components (dict): Number of connected components given groups shall span before being split up (1 by default if empty) + conda_not_block_search_path_envvars (bool): Do not block search path envvars (R_LIBS, PYTHONPATH, ...) when using conda environments. + scheduler_solver_path (str): Path to Snakemake environment (this can be used to e.g. overwrite the search path for the ILP solver used during scheduling). + conda_base_path (str): Path to conda base environment (this can be used to overwrite the search path for conda, mamba, and activate). + local_groupid (str): Local groupid to use as a placeholder for groupid-referrring input functions of local jobs (internal use only, default: local). + log_handler (list): redirect snakemake output to this list of custom log handlers, each a function that takes a log message dictionary (see below) as its only argument (default []). The log message dictionary for the log handler has to following entries: + executor_args (dataclasses.Dataclass): custom Data class to pass to custom executors for more flexibility + :level: + the log level ("info", "error", "debug", "progress", "job_info") + + :level="info", "error" or "debug": + :msg: + the log message + :level="progress": + :done: + number of already executed jobs + + :total: + number of total jobs + + :level="job_info": + :input: + list of input files of a job + + :output: + list of output files of a job + + :log: + path to log file of a job + + :local: + whether a job is executed locally (i.e. ignoring cluster) + + :msg: + the job message + + :reason: + the job reason + + :priority: + the job priority + + :threads: + the threads of the job + + + Returns: + bool: True if workflow execution was successful. + + """ + assert not immediate_submit or ( + immediate_submit and notemp + ), "immediate_submit has to be combined with notemp (it does not support temp file handling)" + + if tibanna: + assume_shared_fs = False + default_remote_provider = "S3" + default_remote_prefix = default_remote_prefix.rstrip("/") + assert ( + default_remote_prefix + ), "default_remote_prefix needed if tibanna is specified" + assert tibanna_sfn, "tibanna_sfn needed if tibanna is specified" + if tibanna_config: + tibanna_config_dict = dict() + for cf in tibanna_config: + k, v = cf.split("=") + if v == "true": + v = True + elif v == "false": + v = False + elif v.isnumeric(): + v = int(v) + else: + try: + v = float(v) + except ValueError: + pass + tibanna_config_dict.update({k: v}) + tibanna_config = tibanna_config_dict + + # Azure batch uses compute engine and storage + if az_batch: + assume_shared_fs = False + default_remote_provider = "AzBlob" + + # Google Cloud Life Sciences API uses compute engine and storage + if google_lifesciences: + assume_shared_fs = False + default_remote_provider = "GS" + default_remote_prefix = default_remote_prefix.rstrip("/") + if kubernetes: + assume_shared_fs = False + + # Currently preemptible instances only supported for Google LifeSciences Executor + if preemption_default or preemptible_rules and not google_lifesciences: + logger.warning( + "Preemptible instances are only available for the Google Life Sciences Executor." + ) + + if updated_files is None: + updated_files = list() + + run_local = not ( + cluster + or cluster_sync + or drmaa + or kubernetes + or tibanna + or az_batch + or google_lifesciences + or tes + or slurm + or slurm_jobstep + ) + if run_local: + if not dryrun: + # clean up all previously recorded jobids. + shell.cleanup() + else: + if default_resources is None: + # use full default resources if in cluster or cloud mode + default_resources = DefaultResources(mode="full") + if edit_notebook: + raise WorkflowError( + "Notebook edit mode is only allowed with local execution." + ) + + shell.conda_block_conflicting_envvars = not conda_not_block_search_path_envvars + + # force thread use for any kind of cluster + use_threads = ( + force_use_threads + or (os.name not in ["posix", "nt"]) + or cluster + or cluster_sync + or drmaa + ) + + if not keep_logger: + stdout = ( + ( + dryrun + and not (printdag or printd3dag or printrulegraph or printfilegraph) + ) + or listrules + or list_target_rules + or list_resources + ) + + setup_logger( + handler=log_handler, + quiet=quiet, + printshellcmds=printshellcmds, + debug_dag=debug_dag, + nocolor=nocolor, + stdout=stdout, + debug=verbose, + use_threads=use_threads, + mode=mode, + show_failed_logs=show_failed_logs, + dryrun=dryrun, + ) + + if greediness is None: + greediness = 0.5 if prioritytargets else 1.0 + else: + if not (0 <= greediness <= 1.0): + logger.error("Error: greediness must be a float between 0 and 1.") + return False + + if not os.path.exists(snakefile): + logger.error(f'Error: Snakefile "{snakefile}" not found.') + return False + snakefile = os.path.abspath(snakefile) + + cluster_mode = ( + (cluster is not None) + (cluster_sync is not None) + (drmaa is not None) + ) + if cluster_mode > 1: + logger.error("Error: cluster and drmaa args are mutually exclusive") + return False + + if debug and (cluster_mode or cores is not None and cores > 1): + logger.error( + "Error: debug mode cannot be used with more than one core or cluster execution." + ) + return False + + overwrite_config = dict() + if configfiles is None: + configfiles = [] + for f in configfiles: + # get values to override. Later configfiles override earlier ones. + update_config(overwrite_config, load_configfile(f)) + # convert provided paths to absolute paths + configfiles = list(map(os.path.abspath, configfiles)) + + # directly specified elements override any configfiles + if config: + update_config(overwrite_config, config) + if config_args is None: + config_args = dict_to_key_value_args(config) + + if workdir: + olddir = os.getcwd() + if not os.path.exists(workdir): + logger.info(f"Creating specified working directory {workdir}.") + os.makedirs(workdir) + workdir = os.path.abspath(workdir) + os.chdir(workdir) + + logger.setup_logfile() + + try: + # handle default remote provider + _default_remote_provider = None + if default_remote_provider is not None: + try: + rmt = importlib.import_module( + "snakemake.remote." + default_remote_provider + ) + except ImportError as e: + raise WorkflowError("Unknown default remote provider.") + if rmt.RemoteProvider.supports_default: + _default_remote_provider = rmt.RemoteProvider( + keep_local=keep_remote_local, is_default=True + ) + else: + raise WorkflowError( + "Remote provider {} does not (yet) support to " + "be used as default provider." + ) + + workflow = Workflow( + snakefile=snakefile, + rerun_triggers=rerun_triggers, + jobscript=jobscript, + overwrite_shellcmd=overwrite_shellcmd, + overwrite_config=overwrite_config, + overwrite_workdir=workdir, + overwrite_configfiles=configfiles, + overwrite_threads=overwrite_threads, + max_threads=max_threads, + overwrite_scatter=overwrite_scatter, + overwrite_groups=overwrite_groups, + overwrite_resources=overwrite_resources, + overwrite_resource_scopes=overwrite_resource_scopes, + group_components=group_components, + config_args=config_args, + debug=debug, + verbose=verbose, + use_conda=use_conda or list_conda_envs or conda_cleanup_envs, + use_singularity=use_singularity, + use_env_modules=use_env_modules, + conda_frontend=conda_frontend, + conda_prefix=conda_prefix, + conda_cleanup_pkgs=conda_cleanup_pkgs, + singularity_prefix=singularity_prefix, + shadow_prefix=shadow_prefix, + singularity_args=singularity_args, + scheduler_type=scheduler, + scheduler_ilp_solver=scheduler_ilp_solver, + mode=mode, + wrapper_prefix=wrapper_prefix, + printshellcmds=printshellcmds, + restart_times=restart_times, + attempt=attempt, + default_remote_provider=_default_remote_provider, + default_remote_prefix=default_remote_prefix, + run_local=run_local, + assume_shared_fs=assume_shared_fs, + default_resources=default_resources, + cache=cache, + cores=cores, + nodes=nodes, + resources=resources, + edit_notebook=edit_notebook, + envvars=envvars, + max_inventory_wait_time=max_inventory_wait_time, + conda_not_block_search_path_envvars=conda_not_block_search_path_envvars, + execute_subworkflows=execute_subworkflows, + scheduler_solver_path=scheduler_solver_path, + conda_base_path=conda_base_path, + check_envvars=not lint, # for linting, we do not need to check whether requested envvars exist + all_temp=all_temp, + local_groupid=local_groupid, + keep_metadata=keep_metadata, + latency_wait=latency_wait, + executor_args=executor_args, + cleanup_scripts=cleanup_scripts, + immediate_submit=immediate_submit, + quiet=quiet, + ) + success = True + + workflow.include( + snakefile, + overwrite_default_target=True, + print_compilation=print_compilation, + ) + workflow.check() + + if not print_compilation: + if lint: + success = not workflow.lint(json=lint == "json") + elif listrules: + workflow.list_rules() + elif list_target_rules: + workflow.list_rules(only_targets=True) + elif list_resources: + workflow.list_resources() + else: + # if not printdag and not printrulegraph: + # handle subworkflows + subsnakemake = partial( + snakemake, + local_cores=local_cores, + max_threads=max_threads, + cache=cache, + overwrite_threads=overwrite_threads, + overwrite_scatter=overwrite_scatter, + overwrite_resources=overwrite_resources, + overwrite_resource_scopes=overwrite_resource_scopes, + default_resources=default_resources, + dryrun=dryrun, + touch=touch, + printshellcmds=printshellcmds, + debug_dag=debug_dag, + nocolor=nocolor, + quiet=quiet, + keepgoing=keepgoing, + cluster=cluster, + cluster_sync=cluster_sync, + drmaa=drmaa, + drmaa_log_dir=drmaa_log_dir, + jobname=jobname, + immediate_submit=immediate_submit, + standalone=standalone, + ignore_ambiguity=ignore_ambiguity, + restart_times=restart_times, + attempt=attempt, + lock=lock, + unlock=unlock, + cleanup_metadata=cleanup_metadata, + conda_cleanup_envs=conda_cleanup_envs, + cleanup_containers=cleanup_containers, + cleanup_shadow=cleanup_shadow, + cleanup_scripts=cleanup_scripts, + force_incomplete=force_incomplete, + ignore_incomplete=ignore_incomplete, + latency_wait=latency_wait, + verbose=verbose, + notemp=notemp, + all_temp=all_temp, + keep_remote_local=keep_remote_local, + nodeps=nodeps, + jobscript=jobscript, + greediness=greediness, + no_hooks=no_hooks, + overwrite_shellcmd=overwrite_shellcmd, + config=config, + config_args=config_args, + keep_logger=True, + force_use_threads=use_threads, + use_conda=use_conda, + use_singularity=use_singularity, + use_env_modules=use_env_modules, + conda_prefix=conda_prefix, + conda_cleanup_pkgs=conda_cleanup_pkgs, + conda_frontend=conda_frontend, + singularity_prefix=singularity_prefix, + shadow_prefix=shadow_prefix, + singularity_args=singularity_args, + scheduler=scheduler, + scheduler_ilp_solver=scheduler_ilp_solver, + list_conda_envs=list_conda_envs, + kubernetes=kubernetes, + container_image=container_image, + k8s_cpu_scalar=k8s_cpu_scalar, + k8s_service_account_name=k8s_service_account_name, + conda_create_envs_only=conda_create_envs_only, + default_remote_provider=default_remote_provider, + default_remote_prefix=default_remote_prefix, + tibanna=tibanna, + tibanna_sfn=tibanna_sfn, + az_batch=az_batch, + az_batch_enable_autoscale=az_batch_enable_autoscale, + az_batch_account_url=az_batch_account_url, + google_lifesciences=google_lifesciences, + google_lifesciences_regions=google_lifesciences_regions, + google_lifesciences_location=google_lifesciences_location, + google_lifesciences_cache=google_lifesciences_cache, + google_lifesciences_service_account_email=google_lifesciences_service_account_email, + google_lifesciences_network=google_lifesciences_network, + google_lifesciences_subnetwork=google_lifesciences_subnetwork, + flux=flux, + tes=tes, + precommand=precommand, + preemption_default=preemption_default, + preemptible_rules=preemptible_rules, + tibanna_config=tibanna_config, + assume_shared_fs=assume_shared_fs, + cluster_status=cluster_status, + cluster_cancel=cluster_cancel, + cluster_cancel_nargs=cluster_cancel_nargs, + cluster_sidecar=cluster_sidecar, + max_jobs_per_second=max_jobs_per_second, + max_status_checks_per_second=max_status_checks_per_second, + overwrite_groups=overwrite_groups, + group_components=group_components, + max_inventory_wait_time=max_inventory_wait_time, + conda_not_block_search_path_envvars=conda_not_block_search_path_envvars, + local_groupid=local_groupid, + ) + success = workflow.execute( + targets=targets, + target_jobs=target_jobs, + dryrun=dryrun, + generate_unit_tests=generate_unit_tests, + touch=touch, + scheduler_type=scheduler, + scheduler_ilp_solver=scheduler_ilp_solver, + local_cores=local_cores, + forcetargets=forcetargets, + forceall=forceall, + forcerun=forcerun, + prioritytargets=prioritytargets, + until=until, + omit_from=omit_from, + keepgoing=keepgoing, + printrulegraph=printrulegraph, + printfilegraph=printfilegraph, + printdag=printdag, + slurm=slurm, + slurm_jobstep=slurm_jobstep, + cluster=cluster, + cluster_sync=cluster_sync, + jobname=jobname, + drmaa=drmaa, + drmaa_log_dir=drmaa_log_dir, + kubernetes=kubernetes, + container_image=container_image, + k8s_cpu_scalar=k8s_cpu_scalar, + k8s_service_account_name=k8s_service_account_name, + tibanna=tibanna, + tibanna_sfn=tibanna_sfn, + az_batch=az_batch, + az_batch_enable_autoscale=az_batch_enable_autoscale, + az_batch_account_url=az_batch_account_url, + google_lifesciences=google_lifesciences, + google_lifesciences_regions=google_lifesciences_regions, + google_lifesciences_location=google_lifesciences_location, + google_lifesciences_cache=google_lifesciences_cache, + google_lifesciences_service_account_email=google_lifesciences_service_account_email, + google_lifesciences_network=google_lifesciences_network, + google_lifesciences_subnetwork=google_lifesciences_subnetwork, + tes=tes, + flux=flux, + precommand=precommand, + preemption_default=preemption_default, + preemptible_rules=preemptible_rules, + tibanna_config=tibanna_config, + max_jobs_per_second=max_jobs_per_second, + max_status_checks_per_second=max_status_checks_per_second, + printd3dag=printd3dag, + ignore_ambiguity=ignore_ambiguity, + stats=stats, + force_incomplete=force_incomplete, + ignore_incomplete=ignore_incomplete, + list_version_changes=list_version_changes, + list_code_changes=list_code_changes, + list_input_changes=list_input_changes, + list_params_changes=list_params_changes, + list_untracked=list_untracked, + list_conda_envs=list_conda_envs, + summary=summary, + archive=archive, + delete_all_output=delete_all_output, + delete_temp_output=delete_temp_output, + wait_for_files=wait_for_files, + detailed_summary=detailed_summary, + nolock=not lock, + unlock=unlock, + notemp=notemp, + keep_remote_local=keep_remote_local, + nodeps=nodeps, + keep_target_files=keep_target_files, + cleanup_metadata=cleanup_metadata, + conda_cleanup_envs=conda_cleanup_envs, + cleanup_containers=cleanup_containers, + cleanup_shadow=cleanup_shadow, + subsnakemake=subsnakemake, + updated_files=updated_files, + allowed_rules=allowed_rules, + greediness=greediness, + no_hooks=no_hooks, + force_use_threads=use_threads, + conda_create_envs_only=conda_create_envs_only, + cluster_status=cluster_status, + cluster_cancel=cluster_cancel, + cluster_cancel_nargs=cluster_cancel_nargs, + cluster_sidecar=cluster_sidecar, + report=report, + report_stylesheet=report_stylesheet, + export_cwl=export_cwl, + batch=batch, + keepincomplete=keep_incomplete, + containerize=containerize, + ) + + except BrokenPipeError: + # ignore this exception and stop. It occurs if snakemake output is piped into less and less quits before reading the whole output. + # in such a case, snakemake shall stop scheduling and quit with error 1 + success = False + except BaseException as ex: + if "workflow" in locals(): + print_exception(ex, workflow.linemaps) + else: + print_exception(ex, dict()) + success = False + + if workdir: + os.chdir(olddir) + if "workflow" in locals() and workflow.persistence: + workflow.persistence.unlock() + if not keep_logger: + logger.cleanup() + return success diff --git a/snakemake/cli.py b/snakemake/cli.py new file mode 100644 index 000000000..05560f17f --- /dev/null +++ b/snakemake/cli.py @@ -0,0 +1,2482 @@ +__author__ = "Johannes Köster" +__copyright__ = "Copyright 2023, Johannes Köster" +__email__ = "johannes.koester@uni-due.de" +__license__ = "MIT" + +import sys + +from snakemake import logging +from snakemake.api import snakemake + +import os +import glob +from argparse import ArgumentDefaultsHelpFormatter +import logging as _logging +from pathlib import Path +import re +import threading +import webbrowser +from functools import partial +import shlex +from importlib.machinery import SourceFileLoader + +from snakemake_interface_executor_plugins.utils import url_can_parse, ExecMode +from snakemake_interface_executor_plugins.registry import ExecutorPluginRegistry + +from snakemake.target_jobs import parse_target_jobs_cli_args +from snakemake.workflow import Workflow +from snakemake.dag import Batch +from snakemake.exceptions import ( + CliException, + ResourceScopesException, +) +from snakemake.io import wait_for_files +from snakemake.utils import update_config, available_cpu_count +from snakemake.common import ( + RERUN_TRIGGERS, + __version__, + MIN_PY_VERSION, + get_appdirs, + parse_key_value_arg, +) +from snakemake.resources import ResourceScopes, parse_resources, DefaultResources + +SNAKEFILE_CHOICES = [ + "Snakefile", + "snakefile", + "workflow/Snakefile", + "workflow/snakefile", +] + + +def parse_set_threads(args): + return parse_set_ints( + args.set_threads, + "Invalid threads definition: entries have to be defined as RULE=THREADS pairs " + "(with THREADS being a positive integer).", + ) + + +def parse_set_resources(args): + errmsg = ( + "Invalid resource definition: entries have to be defined as RULE:RESOURCE=VALUE, with " + "VALUE being a positive integer or a string." + ) + + from collections import defaultdict + + assignments = defaultdict(dict) + if args.set_resources is not None: + for entry in args.set_resources: + key, value = parse_key_value_arg(entry, errmsg=errmsg) + key = key.split(":") + if len(key) != 2: + raise ValueError(errmsg) + rule, resource = key + try: + value = int(value) + except ValueError: + assignments[rule][resource] = value + continue + if value < 0: + raise ValueError(errmsg) + assignments[rule][resource] = value + return assignments + + +def parse_set_scatter(args): + return parse_set_ints( + args.set_scatter, + "Invalid scatter definition: entries have to be defined as NAME=SCATTERITEMS pairs " + "(with SCATTERITEMS being a positive integer).", + ) + + +def parse_set_resource_scope(args): + err_msg = ( + "Invalid resource scopes: entries must be defined as RESOURCE=SCOPE pairs, " + "where SCOPE is either 'local', 'global', or 'excluded'" + ) + if args.set_resource_scopes is not None: + try: + return ResourceScopes( + parse_key_value_arg(entry, errmsg=err_msg) + for entry in args.set_resource_scopes + ) + except ResourceScopesException as err: + invalid_resources = ", ".join( + f"'{res}={scope}'" for res, scope in err.invalid_resources.items() + ) + raise ValueError(f"{err.msg} (got {invalid_resources})") + + return ResourceScopes() + + +def parse_set_ints(arg, errmsg): + assignments = dict() + if arg is not None: + for entry in arg: + key, value = parse_key_value_arg(entry, errmsg=errmsg) + try: + value = int(value) + except ValueError: + raise ValueError(errmsg) + if value < 0: + raise ValueError(errmsg) + assignments[key] = value + return assignments + + +def parse_batch(args): + errmsg = "Invalid batch definition: batch entry has to be defined as RULE=BATCH/BATCHES (with integers BATCH <= BATCHES, BATCH >= 1)." + if args.batch is not None: + rule, batchdef = parse_key_value_arg(args.batch, errmsg=errmsg) + try: + batch, batches = batchdef.split("/") + batch = int(batch) + batches = int(batches) + except ValueError: + raise ValueError(errmsg) + if batch > batches or batch < 1: + raise ValueError(errmsg) + return Batch(rule, batch, batches) + return None + + +def parse_groups(args): + errmsg = "Invalid groups definition: entries have to be defined as RULE=GROUP pairs" + overwrite_groups = dict() + if args.groups is not None: + for entry in args.groups: + rule, group = parse_key_value_arg(entry, errmsg=errmsg) + overwrite_groups[rule] = group + return overwrite_groups + + +def parse_group_components(args): + errmsg = "Invalid group components definition: entries have to be defined as GROUP=COMPONENTS pairs (with COMPONENTS being a positive integer)" + group_components = dict() + if args.group_components is not None: + for entry in args.group_components: + group, count = parse_key_value_arg(entry, errmsg=errmsg) + try: + count = int(count) + except ValueError: + raise ValueError(errmsg) + if count <= 0: + raise ValueError(errmsg) + group_components[group] = count + return group_components + + +def _bool_parser(value): + if value == "True": + return True + elif value == "False": + return False + raise ValueError + + +def parse_config(args): + """Parse config from args.""" + import yaml + + yaml_base_load = lambda s: yaml.load(s, Loader=yaml.loader.BaseLoader) + parsers = [int, float, _bool_parser, yaml_base_load, str] + config = dict() + if args.config is not None: + valid = re.compile(r"[a-zA-Z_]\w*$") + for entry in args.config: + key, val = parse_key_value_arg( + entry, + errmsg="Invalid config definition: Config entries have to be defined as name=value pairs.", + ) + if not valid.match(key): + raise ValueError( + "Invalid config definition: Config entry must start with a valid identifier." + ) + v = None + if val == "": + update_config(config, {key: v}) + continue + for parser in parsers: + try: + v = parser(val) + # avoid accidental interpretation as function + if not callable(v): + break + except: + pass + assert v is not None + update_config(config, {key: v}) + return config + + +def parse_cores(cores, allow_none=False): + if cores is None: + if allow_none: + return cores + raise CliException( + "Error: you need to specify the maximum number of CPU cores to " + "be used at the same time. If you want to use N cores, say --cores N " + "or -cN. For all cores on your system (be sure that this is " + "appropriate) use --cores all. For no parallelization use --cores 1 or " + "-c1." + ) + if cores == "all": + return available_cpu_count() + try: + return int(cores) + except ValueError: + raise CliException( + "Error parsing number of cores (--cores, -c, -j): must be integer, " + "empty, or 'all'." + ) + + +def parse_jobs(jobs, allow_none=False): + if jobs is None: + if allow_none: + return jobs + raise CliException( + "Error: you need to specify the maximum number of jobs to " + "be queued or executed at the same time with --jobs or -j." + ) + if jobs == "unlimited": + return sys.maxsize + try: + return int(jobs) + except ValueError: + raise CliException( + "Error parsing number of jobs (--jobs, -j): must be integer." + ) + + +def parse_cores_jobs(cores, jobs, no_exec, non_local_exec, dryrun): + if no_exec or dryrun: + cores = parse_cores(cores, allow_none=True) or 1 + jobs = parse_jobs(jobs, allow_none=True) or 1 + elif non_local_exec: + cores = parse_cores(cores, allow_none=True) + jobs = parse_jobs(jobs) + else: + cores = parse_cores(cores or jobs) + jobs = None + + return cores, jobs + + +def get_profile_file(profile, file, return_default=False): + dirs = get_appdirs() + if os.path.exists(profile): + search_dirs = [os.path.dirname(profile)] + profile = os.path.basename(profile) + else: + search_dirs = [os.getcwd(), dirs.user_config_dir, dirs.site_config_dir] + get_path = lambda d: os.path.join(d, profile, file) + for d in search_dirs: + p = get_path(d) + # "file" can actually be a full command. If so, `p` won't exist as the + # below would check if e.g. '/path/to/profile/script --arg1 val --arg2' + # exists. To fix this, we use shlex.split() to get the path to the + # script. We check for both, in case the path contains spaces or some + # other thing that would cause shlex.split() to mangle the path + # inaccurately. + if os.path.exists(p) or os.path.exists(shlex.split(p)[0]): + return p + + if return_default: + return file + return None + + +def get_argument_parser(profiles=None): + """Generate and return argument parser.""" + import configargparse + from snakemake.profiles import ProfileConfigFileParser + + dirs = get_appdirs() + config_files = [] + if profiles: + for profile in profiles: + if profile == "": + print("Error: invalid profile name.", file=sys.stderr) + exit(1) + + config_file = get_profile_file(profile, "config.yaml") + if config_file is None: + print( + "Error: profile given but no config.yaml found. " + "Profile has to be given as either absolute path, relative " + "path or name of a directory available in either " + "{site} or {user}.".format( + site=dirs.site_config_dir, user=dirs.user_config_dir + ), + file=sys.stderr, + ) + exit(1) + config_files.append(config_file) + + parser = configargparse.ArgumentParser( + description="Snakemake is a Python based language and execution " + "environment for GNU Make-like workflows.", + formatter_class=ArgumentDefaultsHelpFormatter, + default_config_files=config_files, + config_file_parser_class=ProfileConfigFileParser, + ) + + group_exec = parser.add_argument_group("EXECUTION") + + group_exec.add_argument( + "target", + nargs="*", + default=None, + help="Targets to build. May be rules or files.", + ) + + group_exec.add_argument( + "--dry-run", + "--dryrun", + "-n", + dest="dryrun", + action="store_true", + help="Do not execute anything, and display what would be done. " + "If you have a very large workflow, use --dry-run --quiet to just " + "print a summary of the DAG of jobs.", + ) + + group_exec.add_argument( + "--profile", + help=f""" + Name of profile to use for configuring + Snakemake. Snakemake will search for a corresponding + folder in {dirs.site_config_dir} and {dirs.user_config_dir}. Alternatively, this can be an + absolute or relative path. + The profile folder has to contain a file 'config.yaml'. + This file can be used to set default values for command + line options in YAML format. For example, + '--cluster qsub' becomes 'cluster: qsub' in the YAML + file. Profiles can be obtained from + https://github.com/snakemake-profiles. + The profile can also be set via the environment variable $SNAKEMAKE_PROFILE. + To override this variable and use no profile at all, provide the value 'none' + to this argument. + """, + env_var="SNAKEMAKE_PROFILE", + ) + + group_exec.add_argument( + "--workflow-profile", + help=""" + Path (relative to current directory) to workflow specific profile + folder to use for configuring Snakemake with parameters specific for this + workflow (like resources). + If this flag is not used, Snakemake will by default use + 'profiles/default' if present (searched both relative to current directory + and relative to Snakefile, in this order). + For skipping any workflow specific profile provide the special value 'none'. + Settings made in the workflow profile will override settings made in the + general profile (see --profile). + The profile folder has to contain a file 'config.yaml'. + This file can be used to set default values for command + line options in YAML format. For example, + '--cluster qsub' becomes 'cluster: qsub' in the YAML + file. It is advisable to use the workflow profile to set + or overwrite e.g. workflow specific resources like the amount of threads + of a particular rule or the amount of memory needed. + Note that in such cases, the arguments may be given as nested YAML mappings + in the profile, e.g. 'set-threads: myrule: 4' instead of 'set-threads: myrule=4'. + """, + ) + + group_exec.add_argument( + "--cache", + nargs="*", + metavar="RULE", + help="Store output files of given rules in a central cache given by the environment " + "variable $SNAKEMAKE_OUTPUT_CACHE. Likewise, retrieve output files of the given rules " + "from this cache if they have been created before (by anybody writing to the same cache), " + "instead of actually executing the rules. Output files are identified by hashing all " + "steps, parameters and software stack (conda envs or containers) needed to create them.", + ) + + group_exec.add_argument( + "--snakefile", + "-s", + metavar="FILE", + help=( + "The workflow definition in form of a snakefile." + "Usually, you should not need to specify this. " + "By default, Snakemake will search for {} " + "beneath the current working " + "directory, in this order. " + "Only if you definitely want a different layout, " + "you need to use this parameter." + ).format(", ".join(map("'{}'".format, SNAKEFILE_CHOICES))), + ) + group_exec.add_argument( + "--cores", + "-c", + action="store", + const=available_cpu_count(), + nargs="?", + metavar="N", + help=( + "Use at most N CPU cores/jobs in parallel. " + "If N is omitted or 'all', the limit is set to the number of " + "available CPU cores. " + "In case of cluster/cloud execution, this argument sets the maximum number " + "of cores requested from the cluster or cloud scheduler. (See " + "https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#" + "resources-remote-execution for more info)" + "This number is available to rules via workflow.cores." + ), + ) + group_exec.add_argument( + "--jobs", + "-j", + metavar="N", + nargs="?", + const=available_cpu_count(), + action="store", + help=( + "Use at most N CPU cluster/cloud jobs in parallel. For local execution this is " + "an alias for --cores. Note: Set to 'unlimited' in case, this does not play a role." + ), + ) + group_exec.add_argument( + "--local-cores", + action="store", + default=available_cpu_count(), + metavar="N", + type=int, + help=( + "In cluster/cloud mode, use at most N cores of the host machine in parallel " + "(default: number of CPU cores of the host). The cores are used to execute " + "local rules. This option is ignored when not in cluster/cloud mode." + ), + ) + group_exec.add_argument( + "--resources", + "--res", + nargs="*", + metavar="NAME=INT", + help=( + "Define additional resources that shall constrain the scheduling " + "analogously to --cores (see above). A resource is defined as " + "a name and an integer value. E.g. --resources mem_mb=1000. Rules can " + "use resources by defining the resource keyword, e.g. " + "resources: mem_mb=600. If now two rules require 600 of the resource " + "'mem_mb' they won't be run in parallel by the scheduler. In " + "cluster/cloud mode, this argument will also constrain the amount of " + "resources requested from the server. (See " + "https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#" + "resources-remote-execution for more info)" + ), + ) + group_exec.add_argument( + "--set-threads", + metavar="RULE=THREADS", + nargs="+", + help="Overwrite thread usage of rules. This allows to fine-tune workflow " + "parallelization. In particular, this is helpful to target certain cluster nodes " + "by e.g. shifting a rule to use more, or less threads than defined in the workflow. " + "Thereby, THREADS has to be a positive integer, and RULE has to be the name of the rule.", + ) + group_exec.add_argument( + "--max-threads", + type=int, + help="Define a global maximum number of threads available to any rule. Rules " + "requesting more threads (via the threads keyword) will have their values " + "reduced to the maximum. This can be useful when you want to restrict the " + "maximum number of threads without modifying the workflow definition or " + "overwriting rules individually with --set-threads.", + ) + group_exec.add_argument( + "--set-resources", + metavar="RULE:RESOURCE=VALUE", + nargs="+", + help="Overwrite resource usage of rules. This allows to fine-tune workflow " + "resources. In particular, this is helpful to target certain cluster nodes " + "by e.g. defining a certain partition for a rule, or overriding a temporary directory. " + "Thereby, VALUE has to be a positive integer or a string, RULE has to be the name of the " + "rule, and RESOURCE has to be the name of the resource.", + ) + group_exec.add_argument( + "--set-scatter", + metavar="NAME=SCATTERITEMS", + nargs="+", + help="Overwrite number of scatter items of scattergather processes. This allows to fine-tune " + "workflow parallelization. Thereby, SCATTERITEMS has to be a positive integer, and NAME has to be " + "the name of the scattergather process defined via a scattergather directive in the workflow.", + ) + group_exec.add_argument( + "--set-resource-scopes", + metavar="RESOURCE=[global|local]", + nargs="+", + help="Overwrite resource scopes. A scope determines how a constraint is " + "reckoned in cluster execution. With RESOURCE=local, a constraint applied to " + "RESOURCE using --resources will be considered the limit for each group " + "submission. With RESOURCE=global, the constraint will apply across all groups " + "cumulatively. By default, only `mem_mb` and `disk_mb` are considered local, " + "all other resources are global. This may be modified in the snakefile using " + "the `resource_scopes:` directive. Note that number of threads, specified via " + "--cores, is always considered local. (See " + "https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#" + "resources-remote-execution for more info)", + ) + group_exec.add_argument( + "--default-resources", + "--default-res", + nargs="*", + metavar="NAME=INT", + help=( + "Define default values of resources for rules that do not define their own values. " + "In addition to plain integers, python expressions over inputsize are allowed (e.g. '2*input.size_mb'). " + "The inputsize is the sum of the sizes of all input files of a rule. " + "By default, Snakemake assumes a default for mem_mb, disk_mb, and tmpdir (see below). " + "This option allows to add further defaults (e.g. account and partition for slurm) or to overwrite these default values. " + "The defaults are 'mem_mb=max(2*input.size_mb, 1000)', " + "'disk_mb=max(2*input.size_mb, 1000)' " + "(i.e., default disk and mem usage is twice the input file size but at least 1GB), and " + "the system temporary directory (as given by $TMPDIR, $TEMP, or $TMP) is used for the tmpdir resource. " + "The tmpdir resource is automatically used by shell commands, scripts and wrappers to store temporary data (as it is " + "mirrored into $TMPDIR, $TEMP, and $TMP for the executed subprocesses). " + "If this argument is not specified at all, Snakemake just uses the tmpdir resource as outlined above." + ), + ) + + group_exec.add_argument( + "--preemption-default", + type=int, + default=None, + help=( + "A preemptible instance can be requested when using the Google Life Sciences API. If you set a --preemption-default," + "all rules will be subject to the default. Specifically, this integer is the number of restart attempts that will be " + "made given that the instance is killed unexpectedly. Note that preemptible instances have a maximum running time of 24 " + "hours. If you want to set preemptible instances for only a subset of rules, use --preemptible-rules instead." + ), + ) + + group_exec.add_argument( + "--preemptible-rules", + nargs="+", + default=None, + help=( + "A preemptible instance can be requested when using the Google Life Sciences API. If you want to use these instances " + "for a subset of your rules, you can use --preemptible-rules and then specify a list of rule and integer pairs, where " + "each integer indicates the number of restarts to use for the rule's instance in the case that the instance is " + "terminated unexpectedly. --preemptible-rules can be used in combination with --preemption-default, and will take " + "priority. Note that preemptible instances have a maximum running time of 24. If you want to apply a consistent " + "number of retries across all your rules, use --preemption-default instead. " + "Example: snakemake --preemption-default 10 --preemptible-rules map_reads=3 call_variants=0" + ), + ) + + group_exec.add_argument( + "--config", + "-C", + nargs="*", + metavar="KEY=VALUE", + help=( + "Set or overwrite values in the workflow config object. " + "The workflow config object is accessible as variable config inside " + "the workflow. Default values can be set by providing a JSON file " + "(see Documentation)." + ), + ) + group_exec.add_argument( + "--configfile", + "--configfiles", + nargs="+", + metavar="FILE", + help=( + "Specify or overwrite the config file of the workflow (see the docs). " + "Values specified in JSON or YAML format are available in the global config " + "dictionary inside the workflow. Multiple files overwrite each other in " + "the given order. Thereby missing keys in previous config files are extended by " + "following configfiles. Note that this order also includes a config file defined " + "in the workflow definition itself (which will come first)." + ), + ) + group_exec.add_argument( + "--envvars", + nargs="+", + metavar="VARNAME", + help="Environment variables to pass to cloud jobs.", + ) + group_exec.add_argument( + "--directory", + "-d", + metavar="DIR", + action="store", + help=( + "Specify working directory (relative paths in " + "the snakefile will use this as their origin)." + ), + ) + group_exec.add_argument( + "--touch", + "-t", + action="store_true", + help=( + "Touch output files (mark them up to date without really " + "changing them) instead of running their commands. This is " + "used to pretend that the rules were executed, in order to " + "fool future invocations of snakemake. Fails if a file does " + "not yet exist. Note that this will only touch files that would " + "otherwise be recreated by Snakemake (e.g. because their input " + "files are newer). For enforcing a touch, combine this with " + "--force, --forceall, or --forcerun. Note however that you lose " + "the provenance information when the files have been created in " + "reality. Hence, this should be used only as a last resort." + ), + ) + group_exec.add_argument( + "--keep-going", + "-k", + action="store_true", + help="Go on with independent jobs if a job fails.", + ) + group_exec.add_argument( + "--rerun-triggers", + nargs="+", + choices=RERUN_TRIGGERS, + default=RERUN_TRIGGERS, + help="Define what triggers the rerunning of a job. By default, " + "all triggers are used, which guarantees that results are " + "consistent with the workflow code and configuration. If you " + "rather prefer the traditional way of just considering " + "file modification dates, use '--rerun-trigger mtime'.", + ) + group_exec.add_argument( + "--force", + "-f", + action="store_true", + help=( + "Force the execution of the selected target or the first rule " + "regardless of already created output." + ), + ) + group_exec.add_argument( + "--executor", + "-e", + help="Specify a custom executor, available via an executor plugin: snakemake_executor_", + choices=ExecutorPluginRegistry().plugins, + ) + group_exec.add_argument( + "--forceall", + "-F", + action="store_true", + help=( + "Force the execution of the selected (or the first) rule and " + "all rules it is dependent on regardless of already created " + "output." + ), + ) + group_exec.add_argument( + "--forcerun", + "-R", + nargs="*", + metavar="TARGET", + help=( + "Force the re-execution or creation of the given rules or files." + " Use this option if you changed a rule and want to have all its " + "output in your workflow updated." + ), + ) + group_exec.add_argument( + "--prioritize", + "-P", + nargs="+", + metavar="TARGET", + help=( + "Tell the scheduler to assign creation of given targets " + "(and all their dependencies) highest priority. (EXPERIMENTAL)" + ), + ) + group_exec.add_argument( + "--batch", + metavar="RULE=BATCH/BATCHES", + help=( + "Only create the given BATCH of the input files of the given RULE. " + "This can be used to iteratively run parts of very large workflows. " + "Only the execution plan of the relevant part of the workflow has to " + "be calculated, thereby speeding up DAG computation. " + "It is recommended to provide the most suitable rule for batching when " + "documenting a workflow. It should be some aggregating rule that " + "would be executed only once, and has a large number of input files. " + "For example, it can be a rule that aggregates over samples." + ), + ) + group_exec.add_argument( + "--until", + "-U", + nargs="+", + metavar="TARGET", + help=( + "Runs the pipeline until it reaches the specified rules or " + "files. Only runs jobs that are dependencies of the specified " + "rule or files, does not run sibling DAGs. " + ), + ) + group_exec.add_argument( + "--omit-from", + "-O", + nargs="+", + metavar="TARGET", + help=( + "Prevent the execution or creation of the given rules or files " + "as well as any rules or files that are downstream of these targets " + "in the DAG. Also runs jobs in sibling DAGs that are independent of the " + "rules or files specified here." + ), + ) + group_exec.add_argument( + "--rerun-incomplete", + "--ri", + action="store_true", + help=("Re-run all jobs the output of which is recognized as incomplete."), + ) + group_exec.add_argument( + "--shadow-prefix", + metavar="DIR", + help=( + "Specify a directory in which the 'shadow' directory is created. " + "If not supplied, the value is set to the '.snakemake' directory relative " + "to the working directory." + ), + ) + + try: + import pulp + + lp_solvers = pulp.list_solvers(onlyAvailable=True) + except ImportError: + # Dummy list for the case that pulp is not available + # This only happened when building docs. + lp_solvers = ["COIN_CMD"] + recommended_lp_solver = "COIN_CMD" + + group_exec.add_argument( + "--scheduler", + default="greedy" if recommended_lp_solver not in lp_solvers else "ilp", + nargs="?", + choices=["ilp", "greedy"], + help=( + "Specifies if jobs are selected by a greedy algorithm or by solving an ilp. " + "The ilp scheduler aims to reduce runtime and hdd usage by best possible use of resources." + ), + ) + group_exec.add_argument( + "--wms-monitor", + action="store", + nargs="?", + help=( + "IP and port of workflow management system to monitor the execution of snakemake (e.g. http://127.0.0.1:5000)" + " Note that if your service requires an authorization token, you must export WMS_MONITOR_TOKEN in the environment." + ), + ) + group_exec.add_argument( + "--wms-monitor-arg", + nargs="*", + metavar="NAME=VALUE", + help=( + "If the workflow management service accepts extra arguments, provide." + " them in key value pairs with --wms-monitor-arg. For example, to run" + " an existing workflow using a wms monitor, you can provide the pair " + " id=12345 and the arguments will be provided to the endpoint to " + " first interact with the workflow" + ), + ) + group_exec.add_argument( + "--scheduler-ilp-solver", + default=recommended_lp_solver, + choices=lp_solvers, + help=("Specifies solver to be utilized when selecting ilp-scheduler."), + ) + group_exec.add_argument( + "--scheduler-solver-path", + help="Set the PATH to search for scheduler solver binaries (internal use only).", + ) + group_exec.add_argument( + "--conda-base-path", + help="Path of conda base installation (home of conda, mamba, activate) (internal use only).", + ) + + group_exec.add_argument( + "--no-subworkflows", + "--nosw", + action="store_true", + help=("Do not evaluate or execute subworkflows."), + ) + + # TODO add group_partitioning, allowing to define --group rulename=groupname. + # i.e. setting groups via the CLI for improving cluster performance given + # available resources. + # TODO add an additional flag --group-components groupname=3, allowing to set the + # number of connected components a group is allowed to span. By default, this is 1 + # (as now), but the flag allows to extend this. This can be used to run e.g. + # 3 jobs of the same rule in the same group, although they are not connected. + # Can be helpful for putting together many small jobs or benefitting of shared memory + # setups. + + group_group = parser.add_argument_group("GROUPING") + group_group.add_argument( + "--groups", + nargs="+", + help="Assign rules to groups (this overwrites any " + "group definitions from the workflow).", + ) + group_group.add_argument( + "--group-components", + nargs="+", + help="Set the number of connected components a group is " + "allowed to span. By default, this is 1, but this flag " + "allows to extend this. This can be used to run e.g. 3 " + "jobs of the same rule in the same group, although they " + "are not connected. It can be helpful for putting together " + "many small jobs or benefitting of shared memory setups.", + ) + group_report = parser.add_argument_group("REPORTS") + + group_report.add_argument( + "--report", + nargs="?", + const="report.html", + metavar="FILE", + help="Create an HTML report with results and statistics. " + "This can be either a .html file or a .zip file. " + "In the former case, all results are embedded into the .html (this only works for small data). " + "In the latter case, results are stored along with a file report.html in the zip archive. " + "If no filename is given, an embedded report.html is the default.", + ) + group_report.add_argument( + "--report-stylesheet", + metavar="CSSFILE", + help="Custom stylesheet to use for report. In particular, this can be used for " + "branding the report with e.g. a custom logo, see docs.", + ) + + group_notebooks = parser.add_argument_group("NOTEBOOKS") + + group_notebooks.add_argument( + "--draft-notebook", + metavar="TARGET", + help="Draft a skeleton notebook for the rule used to generate the given target file. This notebook " + "can then be opened in a jupyter server, executed and implemented until ready. After saving, it " + "will automatically be reused in non-interactive mode by Snakemake for subsequent jobs.", + ) + group_notebooks.add_argument( + "--edit-notebook", + metavar="TARGET", + help="Interactively edit the notebook associated with the rule used to generate the given target file. " + "This will start a local jupyter notebook server. " + "Any changes to the notebook should be saved, and the server has to be stopped by " + "closing the notebook and hitting the 'Quit' button on the jupyter dashboard. " + "Afterwards, the updated notebook will be automatically stored in the path defined in the rule. " + "If the notebook is not yet present, this will create an empty draft. ", + ) + group_notebooks.add_argument( + "--notebook-listen", + metavar="IP:PORT", + default="localhost:8888", + help="The IP address and PORT the notebook server used for editing the notebook (--edit-notebook) will listen on.", + ) + + group_utils = parser.add_argument_group("UTILITIES") + group_utils.add_argument( + "--lint", + nargs="?", + const="text", + choices=["text", "json"], + help="Perform linting on the given workflow. This will print snakemake " + "specific suggestions to improve code quality (work in progress, more lints " + "to be added in the future). If no argument is provided, plain text output is used.", + ) + group_utils.add_argument( + "--generate-unit-tests", + nargs="?", + const=".tests/unit", + metavar="TESTPATH", + help="Automatically generate unit tests for each workflow rule. " + "This assumes that all input files of each job are already present. " + "Rules without a job with present input files will be skipped (a warning will be issued). " + "For each rule, one test case will be " + "created in the specified test folder (.tests/unit by default). After " + "successful execution, tests can be run with " + "'pytest TESTPATH'.", + ) + group_utils.add_argument( + "--containerize", + action="store_true", + help="Print a Dockerfile that provides an execution environment for the workflow, including all " + "conda environments.", + ) + group_utils.add_argument( + "--export-cwl", + action="store", + metavar="FILE", + help="Compile workflow to CWL and store it in given FILE.", + ) + group_utils.add_argument( + "--list", + "-l", + action="store_true", + help="Show available rules in given Snakefile.", + ) + group_utils.add_argument( + "--list-target-rules", + "--lt", + action="store_true", + help="Show available target rules in given Snakefile.", + ) + group_utils.add_argument( + "--dag", + action="store_true", + help="Do not execute anything and print the directed " + "acyclic graph of jobs in the dot language. Recommended " + "use on Unix systems: snakemake --dag | dot | display. " + "Note print statements in your Snakefile may interfere " + "with visualization.", + ) + group_utils.add_argument( + "--rulegraph", + action="store_true", + help="Do not execute anything and print the dependency graph " + "of rules in the dot language. This will be less " + "crowded than above DAG of jobs, but also show less information. " + "Note that each rule is displayed once, hence the displayed graph will be " + "cyclic if a rule appears in several steps of the workflow. " + "Use this if above option leads to a DAG that is too large. " + "Recommended use on Unix systems: snakemake --rulegraph | dot | display. " + "Note print statements in your Snakefile may interfere " + "with visualization.", + ) + group_utils.add_argument( + "--filegraph", + action="store_true", + help="Do not execute anything and print the dependency graph " + "of rules with their input and output files in the dot language. " + "This is an intermediate solution between above DAG of jobs and the rule graph. " + "Note that each rule is displayed once, hence the displayed graph will be " + "cyclic if a rule appears in several steps of the workflow. " + "Use this if above option leads to a DAG that is too large. " + "Recommended use on Unix systems: snakemake --filegraph | dot | display. " + "Note print statements in your Snakefile may interfere " + "with visualization.", + ) + group_utils.add_argument( + "--d3dag", + action="store_true", + help="Print the DAG in D3.js compatible JSON format.", + ) + group_utils.add_argument( + "--summary", + "-S", + action="store_true", + help="Print a summary of all files created by the workflow. The " + "has the following columns: filename, modification time, " + "rule version, status, plan.\n" + "Thereby rule version contains the version" + "the file was created with (see the version keyword of rules), and " + "status denotes whether the file is missing, its input files are " + "newer or if version or implementation of the rule changed since " + "file creation. Finally the last column denotes whether the file " + "will be updated or created during the next workflow execution.", + ) + group_utils.add_argument( + "--detailed-summary", + "-D", + action="store_true", + help="Print a summary of all files created by the workflow. The " + "has the following columns: filename, modification time, " + "rule version, input file(s), shell command, status, plan.\n" + "Thereby rule version contains the version " + "the file was created with (see the version keyword of rules), and " + "status denotes whether the file is missing, its input files are " + "newer or if version or implementation of the rule changed since " + "file creation. The input file and shell command columns are self " + "explanatory. Finally the last column denotes whether the file " + "will be updated or created during the next workflow execution.", + ) + group_utils.add_argument( + "--archive", + metavar="FILE", + help="Archive the workflow into the given tar archive FILE. The archive " + "will be created such that the workflow can be re-executed on a vanilla " + "system. The function needs conda and git to be installed. " + "It will archive every file that is under git version control. " + "Note that it is best practice to have the Snakefile, config files, and " + "scripts under version control. Hence, they will be included in the archive. " + "Further, it will add input files that are not generated by " + "by the workflow itself and conda environments. Note that symlinks are " + "dereferenced. Supported " + "formats are .tar, .tar.gz, .tar.bz2 and .tar.xz.", + ) + group_utils.add_argument( + "--cleanup-metadata", + "--cm", + nargs="+", + metavar="FILE", + help="Cleanup the metadata " + "of given files. That means that snakemake removes any tracked " + "version info, and any marks that files are incomplete.", + ) + group_utils.add_argument( + "--cleanup-shadow", + action="store_true", + help="Cleanup old shadow directories which have not been deleted due " + "to failures or power loss.", + ) + group_utils.add_argument( + "--skip-script-cleanup", + action="store_true", + help="Don't delete wrapper scripts used for execution", + ) + group_utils.add_argument( + "--unlock", action="store_true", help="Remove a lock on the working directory." + ) + group_utils.add_argument( + "--list-version-changes", + "--lv", + action="store_true", + help="List all output files that have been created with " + "a different version (as determined by the version keyword).", + ) + group_utils.add_argument( + "--list-code-changes", + "--lc", + action="store_true", + help="List all output files for which the rule body (run or shell) have " + "changed in the Snakefile.", + ) + group_utils.add_argument( + "--list-input-changes", + "--li", + action="store_true", + help="List all output files for which the defined input files have changed " + "in the Snakefile (e.g. new input files were added in the rule " + "definition or files were renamed). For listing input file " + "modification in the filesystem, use --summary.", + ) + group_utils.add_argument( + "--list-params-changes", + "--lp", + action="store_true", + help="List all output files for which the defined params have changed " + "in the Snakefile.", + ) + group_utils.add_argument( + "--list-untracked", + "--lu", + action="store_true", + help="List all files in the working directory that are not used in the " + "workflow. This can be used e.g. for identifying leftover files. Hidden files " + "and directories are ignored.", + ) + group_utils.add_argument( + "--delete-all-output", + action="store_true", + help="Remove all files generated by the workflow. Use together with --dry-run " + "to list files without actually deleting anything. Note that this will " + "not recurse into subworkflows. Write-protected files are not removed. " + "Nevertheless, use with care!", + ) + group_utils.add_argument( + "--delete-temp-output", + action="store_true", + help="Remove all temporary files generated by the workflow. Use together " + "with --dry-run to list files without actually deleting anything. Note " + "that this will not recurse into subworkflows.", + ) + group_utils.add_argument( + "--bash-completion", + action="store_true", + help="Output code to register bash completion for snakemake. Put the " + "following in your .bashrc (including the accents): " + "`snakemake --bash-completion` or issue it in an open terminal " + "session.", + ) + group_utils.add_argument( + "--keep-incomplete", + action="store_true", + help="Do not remove incomplete output files by failed jobs.", + ) + group_utils.add_argument( + "--drop-metadata", + action="store_true", + help="Drop metadata file tracking information after job finishes. " + "Provenance-information based reports (e.g. --report and the " + "--list_x_changes functions) will be empty or incomplete.", + ) + group_utils.add_argument("--version", "-v", action="version", version=__version__) + + group_output = parser.add_argument_group("OUTPUT") + group_output.add_argument( + "--gui", + nargs="?", + const="8000", + metavar="PORT", + type=str, + help="Serve an HTML based user interface to the given network and " + "port e.g. 168.129.10.15:8000. By default Snakemake is only " + "available in the local network (default port: 8000). To make " + "Snakemake listen to all ip addresses add the special host address " + "0.0.0.0 to the url (0.0.0.0:8000). This is important if Snakemake " + "is used in a virtualised environment like Docker. If possible, a " + "browser window is opened.", + ) + group_output.add_argument( + "--printshellcmds", + "-p", + action="store_true", + help="Print out the shell commands that will be executed.", + ) + group_output.add_argument( + "--debug-dag", + action="store_true", + help="Print candidate and selected jobs (including their wildcards) while " + "inferring DAG. This can help to debug unexpected DAG topology or errors.", + ) + group_output.add_argument( + "--stats", + metavar="FILE", + help="Write stats about Snakefile execution in JSON format to the given file.", + ) + group_output.add_argument( + "--nocolor", action="store_true", help="Do not use a colored output." + ) + group_output.add_argument( + "--quiet", + "-q", + nargs="*", + choices=["progress", "rules", "all"], + default=None, + help="Do not output certain information. " + "If used without arguments, do not output any progress or rule " + "information. Defining 'all' results in no information being " + "printed at all.", + ) + group_output.add_argument( + "--print-compilation", + action="store_true", + help="Print the python representation of the workflow.", + ) + + group_output.add_argument( + "--verbose", action="store_true", help="Print debugging output." + ) + + group_behavior = parser.add_argument_group("BEHAVIOR") + group_behavior.add_argument( + "--force-use-threads", + dest="force_use_threads", + action="store_true", + help="Force threads rather than processes. Helpful if shared memory (/dev/shm) is full or unavailable.", + ) + group_behavior.add_argument( + "--allow-ambiguity", + "-a", + action="store_true", + help=( + "Don't check for ambiguous rules and simply use the first if " + "several can produce the same file. This allows the user to " + "prioritize rules by their order in the snakefile." + ), + ) + group_behavior.add_argument( + "--nolock", action="store_true", help="Do not lock the working directory" + ) + group_behavior.add_argument( + "--ignore-incomplete", + "--ii", + action="store_true", + help="Do not check for incomplete output files.", + ) + group_behavior.add_argument( + "--max-inventory-time", + type=int, + default=20, + metavar="SECONDS", + help="Spend at most SECONDS seconds to create a file inventory for the working directory. " + "The inventory vastly speeds up file modification and existence checks when computing " + "which jobs need to be executed. However, creating the inventory itself can be slow, e.g. on " + "network file systems. Hence, we do not spend more than a given amount of time and fall back " + "to individual checks for the rest.", + ) + group_behavior.add_argument( + "--latency-wait", + "--output-wait", + "-w", + type=int, + default=5, + metavar="SECONDS", + help="Wait given seconds if an output file of a job is not present after " + "the job finished. This helps if your filesystem " + "suffers from latency (default 5).", + ) + group_behavior.add_argument( + "--wait-for-files", + nargs="*", + metavar="FILE", + help="Wait --latency-wait seconds for these " + "files to be present before executing the workflow. " + "This option is used internally to handle filesystem latency in cluster " + "environments.", + ) + group_behavior.add_argument( + "--wait-for-files-file", + metavar="FILE", + help="Same behaviour as --wait-for-files, but file list is " + "stored in file instead of being passed on the commandline. " + "This is useful when the list of files is too long to be " + "passed on the commandline.", + ) + group_behavior.add_argument( + "--notemp", + "--nt", + action="store_true", + help="Ignore temp() declarations. This is useful when running only " + "a part of the workflow, since temp() would lead to deletion of " + "probably needed files by other parts of the workflow.", + ) + group_behavior.add_argument( + "--all-temp", + action="store_true", + help="Mark all output files as temp files. This can be useful for CI testing, " + "in order to save space.", + ) + group_behavior.add_argument( + "--keep-remote", + action="store_true", + help="Keep local copies of remote input files.", + ) + group_behavior.add_argument( + "--keep-target-files", + action="store_true", + help="Do not adjust the paths of given target files relative to the working directory.", + ) + group_behavior.add_argument( + "--allowed-rules", + nargs="+", + help="Only consider given rules. If omitted, all rules in Snakefile are " + "used. Note that this is intended primarily for internal use and may " + "lead to unexpected results otherwise.", + ) + group_behavior.add_argument( + "--target-jobs", + nargs="+", + help="Target particular jobs by RULE:WILDCARD1=VALUE,WILDCARD2=VALUE,... " + "This is meant for internal use by Snakemake itself only.", + ) + group_behavior.add_argument( + "--local-groupid", + default="local", + help="Name for local groupid, meant for internal use only.", + ) + group_behavior.add_argument( + "--max-jobs-per-second", + default=10, + type=float, + help="Maximal number of cluster/drmaa jobs per second, default is 10, " + "fractions allowed.", + ) + group_behavior.add_argument( + "--max-status-checks-per-second", + default=10, + type=float, + help="Maximal number of job status checks per second, default is 10, " + "fractions allowed.", + ) + group_behavior.add_argument( + "-T", + "--retries", + "--restart-times", + default=0, + type=int, + help="Number of times to restart failing jobs (defaults to 0).", + ) + group_behavior.add_argument( + "--attempt", + default=1, + type=int, + help="Internal use only: define the initial value of the attempt " + "parameter (default: 1).", + ) + group_behavior.add_argument( + "--wrapper-prefix", + default="https://github.com/snakemake/snakemake-wrappers/raw/", + help="Prefix for URL created from wrapper directive (default: " + "https://github.com/snakemake/snakemake-wrappers/raw/). Set this to " + "a different URL to use your fork or a local clone of the repository, " + "e.g., use a git URL like 'git+file://path/to/your/local/clone@'.", + ) + group_behavior.add_argument( + "--default-remote-provider", + choices=[ + "S3", + "GS", + "FTP", + "SFTP", + "S3Mocked", + "gfal", + "gridftp", + "iRODS", + "AzBlob", + "XRootD", + ], + help="Specify default remote provider to be used for " + "all input and output files that don't yet specify " + "one.", + ) + group_behavior.add_argument( + "--default-remote-prefix", + default="", + help="Specify prefix for default remote provider. E.g. a bucket name.", + ) + group_behavior.add_argument( + "--no-shared-fs", + action="store_true", + help="Do not assume that jobs share a common file " + "system. When this flag is activated, Snakemake will " + "assume that the filesystem on a cluster node is not " + "shared with other nodes. For example, this will lead " + "to downloading remote files on each cluster node " + "separately. Further, it won't take special measures " + "to deal with filesystem latency issues. This option " + "will in most cases only make sense in combination with " + "--default-remote-provider. Further, when using --cluster " + "you will have to also provide --cluster-status. " + "Only activate this if you " + "know what you are doing.", + ) + group_behavior.add_argument( + "--greediness", + type=float, + default=None, + help="Set the greediness of scheduling. This value between 0 and 1 " + "determines how careful jobs are selected for execution. The default " + "value (1.0) provides the best speed and still acceptable scheduling " + "quality.", + ) + group_behavior.add_argument( + "--no-hooks", + action="store_true", + help="Do not invoke onstart, onsuccess or onerror hooks after execution.", + ) + group_behavior.add_argument( + "--overwrite-shellcmd", + help="Provide a shell command that shall be executed instead of those " + "given in the workflow. " + "This is for debugging purposes only.", + ) + group_behavior.add_argument( + "--debug", + action="store_true", + help="Allow to debug rules with e.g. PDB. This flag " + "allows to set breakpoints in run blocks.", + ) + group_behavior.add_argument( + "--runtime-profile", + metavar="FILE", + help="Profile Snakemake and write the output to FILE. This requires yappi " + "to be installed.", + ) + group_behavior.add_argument( + "--mode", + choices=[ExecMode.default, ExecMode.subprocess, ExecMode.remote], + default=ExecMode.default, + type=int, + help="Set execution mode of Snakemake (internal use only).", + ) + group_behavior.add_argument( + "--show-failed-logs", + action="store_true", + help="Automatically display logs of failed jobs.", + ) + group_behavior.add_argument( + "--log-handler-script", + metavar="FILE", + default=None, + help="Provide a custom script containing a function 'def log_handler(msg):'. " + "Snakemake will call this function for every logging output (given as a dictionary msg)" + "allowing to e.g. send notifications in the form of e.g. slack messages or emails.", + ) + group_behavior.add_argument( + "--log-service", + default=None, + choices=["none", "slack", "wms"], + help="Set a specific messaging service for logging output." + "Snakemake will notify the service on errors and completed execution." + "Currently slack and workflow management system (wms) are supported.", + ) + + group_slurm = parser.add_argument_group("SLURM") + slurm_mode_group = group_slurm.add_mutually_exclusive_group() + + slurm_mode_group.add_argument( + "--slurm", + action="store_true", + help=( + "Execute snakemake rules as SLURM batch jobs according" + " to their 'resources' definition. SLURM resources as " + " 'partition', 'ntasks', 'cpus', etc. need to be defined" + " per rule within the 'resources' definition. Note, that" + " memory can only be defined as 'mem_mb' or 'mem_mb_per_cpu'" + " as analogous to the SLURM 'mem' and 'mem-per-cpu' flags" + " to sbatch, respectively. Here, the unit is always 'MiB'." + " In addition '--default_resources' should contain the" + " SLURM account." + ), + ), + slurm_mode_group.add_argument( + "--slurm-jobstep", + action="store_true", + help=configargparse.SUPPRESS, # this should be hidden and only be used + # for snakemake to be working in jobscript- + # mode + ) + + group_cluster = parser.add_argument_group("CLUSTER") + + # TODO extend below description to explain the wildcards that can be used + cluster_mode_group = group_cluster.add_mutually_exclusive_group() + cluster_mode_group.add_argument( + "--cluster", + metavar="CMD", + help=( + "Execute snakemake rules with the given submit command, " + "e.g. qsub. Snakemake compiles jobs into scripts that are " + "submitted to the cluster with the given command, once all input " + "files for a particular job are present.\n" + "The submit command can be decorated to make it aware of certain " + "job properties (name, rulename, input, output, params, wildcards, log, threads " + "and dependencies (see the argument below)), e.g.:\n" + "$ snakemake --cluster 'qsub -pe threaded {threads}'." + ), + ), + cluster_mode_group.add_argument( + "--cluster-sync", + metavar="CMD", + help=( + "cluster submission command will block, returning the remote exit" + "status upon remote termination (for example, this should be used" + "if the cluster command is 'qsub -sync y' (SGE)" + ), + ), + cluster_mode_group.add_argument( + "--drmaa", + nargs="?", + const="", + metavar="ARGS", + help="Execute snakemake on a cluster accessed via DRMAA, " + "Snakemake compiles jobs into scripts that are " + "submitted to the cluster with the given command, once all input " + "files for a particular job are present. ARGS can be used to " + "specify options of the underlying cluster system, " + "thereby using the job properties name, rulename, input, output, params, wildcards, log, " + "threads and dependencies, e.g.: " + "--drmaa ' -pe threaded {threads}'. Note that ARGS must be given in quotes and " + "with a leading whitespace.", + ) + + group_cluster.add_argument( + "--immediate-submit", + "--is", + action="store_true", + help="Immediately submit all jobs to the cluster instead of waiting " + "for present input files. This will fail, unless you make " + "the cluster aware of job dependencies, e.g. via:\n" + "$ snakemake --cluster 'sbatch --dependency {dependencies}.\n" + "Assuming that your submit script (here sbatch) outputs the " + "generated job id to the first stdout line, {dependencies} will " + "be filled with space separated job ids this job depends on. " + "Does not work for workflows that contain checkpoint rules.", + ) + group_cluster.add_argument( + "--jobscript", + "--js", + metavar="SCRIPT", + help="Provide a custom job script for submission to the cluster. " + "The default script resides as 'jobscript.sh' in the " + "installation directory.", + ) + group_cluster.add_argument( + "--jobname", + "--jn", + default="snakejob.{name}.{jobid}.sh", + metavar="NAME", + help="Provide a custom name for the jobscript that is submitted to the " + 'cluster (see --cluster). NAME is "snakejob.{name}.{jobid}.sh" ' + "per default. The wildcard {jobid} has to be present in the name.", + ) + group_cluster.add_argument( + "--cluster-status", + help="Status command for cluster execution. This is only considered " + "in combination with the --cluster flag. If provided, Snakemake will " + "use the status command to determine if a job has finished successfully " + "or failed. For this it is necessary that the submit command provided " + "to --cluster returns the cluster job id. Then, the status command " + "will be invoked with the job id. Snakemake expects it to return " + "'success' if the job was successful, 'failed' if the job failed and " + "'running' if the job still runs.", + ) + group_cluster.add_argument( + "--cluster-cancel", + default=None, + help="Specify a command that allows to stop currently running jobs. " + "The command will be passed a single argument, the job id.", + ) + group_cluster.add_argument( + "--cluster-cancel-nargs", + type=int, + default=1000, + help="Specify maximal number of job ids to pass to --cluster-cancel " + "command, defaults to 1000.", + ) + group_cluster.add_argument( + "--cluster-sidecar", + default=None, + help="Optional command to start a sidecar process during cluster " + "execution. Only active when --cluster is given as well.", + ) + group_cluster.add_argument( + "--drmaa-log-dir", + metavar="DIR", + help="Specify a directory in which stdout and stderr files of DRMAA" + " jobs will be written. The value may be given as a relative path," + " in which case Snakemake will use the current invocation directory" + " as the origin. If given, this will override any given '-o' and/or" + " '-e' native specification. If not given, all DRMAA stdout and" + " stderr files are written to the current working directory.", + ) + + group_flux = parser.add_argument_group("FLUX") + group_kubernetes = parser.add_argument_group("KUBERNETES") + group_google_life_science = parser.add_argument_group("GOOGLE_LIFE_SCIENCE") + group_kubernetes = parser.add_argument_group("KUBERNETES") + group_tes = parser.add_argument_group("TES") + group_tibanna = parser.add_argument_group("TIBANNA") + + group_kubernetes.add_argument( + "--kubernetes", + metavar="NAMESPACE", + nargs="?", + const="default", + help="Execute workflow in a kubernetes cluster (in the cloud). " + "NAMESPACE is the namespace you want to use for your job (if nothing " + "specified: 'default'). " + "Usually, this requires --default-remote-provider and " + "--default-remote-prefix to be set to a S3 or GS bucket where your . " + "data shall be stored. It is further advisable to activate conda " + "integration via --use-conda.", + ) + group_kubernetes.add_argument( + "--container-image", + metavar="IMAGE", + help="Docker image to use, e.g., when submitting jobs to kubernetes " + "Defaults to 'https://hub.docker.com/r/snakemake/snakemake', tagged with " + "the same version as the currently running Snakemake instance. " + "Note that overwriting this value is up to your responsibility. " + "Any used image has to contain a working snakemake installation " + "that is compatible with (or ideally the same as) the currently " + "running version.", + ) + group_kubernetes.add_argument( + "--k8s-cpu-scalar", + metavar="FLOAT", + default=0.95, + type=float, + help="K8s reserves some proportion of available CPUs for its own use. " + "So, where an underlying node may have 8 CPUs, only e.g. 7600 milliCPUs " + "are allocatable to k8s pods (i.e. snakemake jobs). As 8 > 7.6, k8s can't " + "find a node with enough CPU resource to run such jobs. This argument acts " + "as a global scalar on each job's CPU request, so that e.g. a job whose " + "rule definition asks for 8 CPUs will request 7600m CPUs from k8s, " + "allowing it to utilise one entire node. N.B: the job itself would still " + "see the original value, i.e. as the value substituted in {threads}.", + ) + + group_kubernetes.add_argument( + "--k8s-service-account-name", + metavar="SERVICEACCOUNTNAME", + default=None, + help="This argument allows the use of customer service accounts for " + "kubernetes pods. If specified serviceAccountName will be added to the " + "pod specs. This is needed when using workload identity which is enforced " + "when using Google Cloud GKE Autopilot.", + ) + + group_tibanna.add_argument( + "--tibanna", + action="store_true", + help="Execute workflow on AWS cloud using Tibanna. This requires " + "--default-remote-prefix to be set to S3 bucket name and prefix" + " (e.g. 'bucketname/subdirectory') where input is already stored" + " and output will be sent to. Using --tibanna implies --default-resources" + " is set as default. Optionally, use --precommand to" + " specify any preparation command to run before snakemake command" + " on the cloud (inside snakemake container on Tibanna VM)." + " Also, --use-conda, --use-singularity, --config, --configfile are" + " supported and will be carried over.", + ) + group_tibanna.add_argument( + "--tibanna-sfn", + help="Name of Tibanna Unicorn step function (e.g. tibanna_unicorn_monty)." + "This works as serverless scheduler/resource allocator and must be " + "deployed first using tibanna cli. (e.g. tibanna deploy_unicorn --usergroup=" + "monty --buckets=bucketname)", + ) + group_tibanna.add_argument( + "--precommand", + help="Any command to execute before snakemake command on AWS cloud " + "such as wget, git clone, unzip, etc. This is used with --tibanna." + "Do not include input/output download/upload commands - file transfer" + " between S3 bucket and the run environment (container) is automatically" + " handled by Tibanna.", + ) + group_tibanna.add_argument( + "--tibanna-config", + nargs="+", + help="Additional tibanna config e.g. --tibanna-config spot_instance=true subnet=" + " security group=", + ) + group_google_life_science.add_argument( + "--google-lifesciences", + action="store_true", + help="Execute workflow on Google Cloud cloud using the Google Life. " + " Science API. This requires default application credentials (json) " + " to be created and export to the environment to use Google Cloud " + " Storage, Compute Engine, and Life Sciences. The credential file " + " should be exported as GOOGLE_APPLICATION_CREDENTIALS for snakemake " + " to discover. Also, --use-conda, --use-singularity, --config, " + "--configfile are supported and will be carried over.", + ) + group_google_life_science.add_argument( + "--google-lifesciences-regions", + nargs="+", + default=["us-east1", "us-west1", "us-central1"], + help="Specify one or more valid instance regions (defaults to US)", + ) + group_google_life_science.add_argument( + "--google-lifesciences-location", + help="The Life Sciences API service used to schedule the jobs. " + " E.g., us-centra1 (Iowa) and europe-west2 (London) " + " Watch the terminal output to see all options found to be available. " + " If not specified, defaults to the first found with a matching prefix " + " from regions specified with --google-lifesciences-regions.", + ) + group_google_life_science.add_argument( + "--google-lifesciences-keep-cache", + action="store_true", + help="Cache workflows in your Google Cloud Storage Bucket specified " + "by --default-remote-prefix/{source}/{cache}. Each workflow working " + "directory is compressed to a .tar.gz, named by the hash of the " + "contents, and kept in Google Cloud Storage. By default, the caches " + "are deleted at the shutdown step of the workflow.", + ) + group_google_life_science.add_argument( + "--google-lifesciences-service-account-email", + help="Specify a service account email address", + ) + group_google_life_science.add_argument( + "--google-lifesciences-network", + help="Specify a network for a Google Compute Engine VM instance", + ) + group_google_life_science.add_argument( + "--google-lifesciences-subnetwork", + help="Specify a subnetwork for a Google Compute Engine VM instance", + ) + + group_azure_batch = parser.add_argument_group("AZURE_BATCH") + + group_azure_batch.add_argument( + "--az-batch", + action="store_true", + help="Execute workflow on azure batch", + ) + + group_azure_batch.add_argument( + "--az-batch-enable-autoscale", + action="store_true", + help="Enable autoscaling of the azure batch pool nodes, this option will set the initial dedicated node count to zero, and requires five minutes to resize the cluster, so is only recommended for longer running jobs.", + ) + + group_azure_batch.add_argument( + "--az-batch-account-url", + nargs="?", + help="Azure batch account url, requires AZ_BATCH_ACCOUNT_KEY environment variable to be set.", + ) + + group_flux.add_argument( + "--flux", + action="store_true", + help="Execute your workflow on a flux cluster. " + "Flux can work with both a shared network filesystem (like NFS) or without. " + "If you don't have a shared filesystem, additionally specify --no-shared-fs.", + ) + + group_tes.add_argument( + "--tes", + metavar="URL", + help="Send workflow tasks to GA4GH TES server specified by url.", + ) + + group_conda = parser.add_argument_group("CONDA") + + group_conda.add_argument( + "--use-conda", + action="store_true", + help="If defined in the rule, run job in a conda environment. " + "If this flag is not set, the conda directive is ignored.", + ) + group_conda.add_argument( + "--conda-not-block-search-path-envvars", + action="store_true", + help="Do not block environment variables that modify the search path " + "(R_LIBS, PYTHONPATH, PERL5LIB, PERLLIB) when using conda environments.", + ) + group_conda.add_argument( + "--list-conda-envs", + action="store_true", + help="List all conda environments and their location on disk.", + ) + group_conda.add_argument( + "--conda-prefix", + metavar="DIR", + default=os.environ.get("SNAKEMAKE_CONDA_PREFIX", None), + help="Specify a directory in which the 'conda' and 'conda-archive' " + "directories are created. These are used to store conda environments " + "and their archives, respectively. If not supplied, the value is set " + "to the '.snakemake' directory relative to the invocation directory. " + "If supplied, the `--use-conda` flag must also be set. The value may " + "be given as a relative path, which will be extrapolated to the " + "invocation directory, or as an absolute path. The value can also be " + "provided via the environment variable $SNAKEMAKE_CONDA_PREFIX.", + ) + group_conda.add_argument( + "--conda-cleanup-envs", + action="store_true", + help="Cleanup unused conda environments.", + ) + + from snakemake.deployment.conda import CondaCleanupMode + + group_conda.add_argument( + "--conda-cleanup-pkgs", + type=CondaCleanupMode, + const=CondaCleanupMode.tarballs, + choices=list(CondaCleanupMode), + nargs="?", + help="Cleanup conda packages after creating environments. " + "In case of 'tarballs' mode, will clean up all downloaded package tarballs. " + "In case of 'cache' mode, will additionally clean up unused package caches. " + "If mode is omitted, will default to only cleaning up the tarballs.", + ) + group_conda.add_argument( + "--conda-create-envs-only", + action="store_true", + help="If specified, only creates the job-specific " + "conda environments then exits. The `--use-conda` " + "flag must also be set.", + ) + group_conda.add_argument( + "--conda-frontend", + default="mamba", + choices=["conda", "mamba"], + help="Choose the conda frontend for installing environments. " + "Mamba is much faster and highly recommended.", + ) + + group_singularity = parser.add_argument_group("SINGULARITY") + + group_singularity.add_argument( + "--use-singularity", + action="store_true", + help="If defined in the rule, run job within a singularity container. " + "If this flag is not set, the singularity directive is ignored.", + ) + group_singularity.add_argument( + "--singularity-prefix", + metavar="DIR", + help="Specify a directory in which singularity images will be stored." + "If not supplied, the value is set " + "to the '.snakemake' directory relative to the invocation directory. " + "If supplied, the `--use-singularity` flag must also be set. The value " + "may be given as a relative path, which will be extrapolated to the " + "invocation directory, or as an absolute path.", + ) + group_singularity.add_argument( + "--singularity-args", + default="", + metavar="ARGS", + help="Pass additional args to singularity.", + ) + group_singularity.add_argument( + "--cleanup-containers", + action="store_true", + help="Remove unused (singularity) containers", + ) + + group_env_modules = parser.add_argument_group("ENVIRONMENT MODULES") + + group_env_modules.add_argument( + "--use-envmodules", + action="store_true", + help="If defined in the rule, run job within the given environment " + "modules, loaded in the given order. This can be combined with " + "--use-conda and --use-singularity, which will then be only used as a " + "fallback for rules which don't define environment modules.", + ) + + # Add namespaced arguments to parser for each plugin + ExecutorPluginRegistry().register_cli_args(parser) + return parser + + +def generate_parser_metadata(parser, args): + """Given a populated parser, generate the original command along with + metadata that can be handed to a logger to use as needed. + """ + command = "snakemake %s" % " ".join( + parser._source_to_settings["command_line"][""][1] + ) + metadata = args.__dict__ + metadata.update({"command": command}) + return metadata + + +def main(argv=None): + """Main entry point.""" + + if sys.version_info < MIN_PY_VERSION: + print( + f"Snakemake requires at least Python {MIN_PY_VERSION}.", + file=sys.stderr, + ) + exit(1) + + parser = get_argument_parser() + args = parser.parse_args(argv) + + snakefile = args.snakefile + if snakefile is None: + for p in SNAKEFILE_CHOICES: + if os.path.exists(p): + snakefile = p + break + if snakefile is None: + print( + "Error: no Snakefile found, tried {}.".format( + ", ".join(SNAKEFILE_CHOICES) + ), + file=sys.stderr, + ) + sys.exit(1) + + # Custom argument parsing based on chosen executor + # We also only validate an executor plugin when it's selected + executor_args = None + if args.executor: + plugin = ExecutorPluginRegistry().plugins[args.executor] + + # This is the dataclass prepared by the executor + executor_args = plugin.get_executor_settings(args) + + # Hold a handle to the plugin class + executor_args._executor = plugin + + workflow_profile = None + if args.workflow_profile != "none": + if args.workflow_profile: + workflow_profile = args.workflow_profile + else: + # checking for default profile + default_path = Path("profiles/default") + workflow_profile_candidates = [ + default_path, + Path(snakefile).parent.joinpath(default_path), + ] + for profile in workflow_profile_candidates: + if profile.exists(): + workflow_profile = profile + break + + if args.profile == "none": + args.profile = None + + if (args.profile or workflow_profile) and args.mode == ExecMode.default: + # Reparse args while inferring config file from profile. + # But only do this if the user has invoked Snakemake (ExecMode.default) + profiles = [] + if args.profile: + profiles.append(args.profile) + if workflow_profile: + workflow_profile_stmt = f" and workflow specific profile {workflow_profile}" + profiles.append(workflow_profile) + else: + workflow_profile_stmt = "" + + print( + f"Using profile{'s' if len(profiles) > 1 else ''} " + f"{' and '.join(map(str, profiles))}{workflow_profile_stmt} for setting default command line arguments.", + file=sys.stderr, + ) + + parser = get_argument_parser(profiles=profiles) + args = parser.parse_args(argv) + + def adjust_path(f): + if os.path.exists(f) or os.path.isabs(f): + return f + else: + return get_profile_file(args.profile, f, return_default=True) + + # update file paths to be relative to the profile + # (if they do not exist relative to CWD) + if args.jobscript: + args.jobscript = adjust_path(args.jobscript) + if args.cluster: + args.cluster = adjust_path(args.cluster) + if args.cluster_sync: + args.cluster_sync = adjust_path(args.cluster_sync) + for key in "cluster_status", "cluster_cancel", "cluster_sidecar": + if getattr(args, key): + setattr(args, key, adjust_path(getattr(args, key))) + if args.report_stylesheet: + args.report_stylesheet = adjust_path(args.report_stylesheet) + + if args.quiet is not None and len(args.quiet) == 0: + # default case, set quiet to progress and rule + args.quiet = ["progress", "rules"] + + if args.bash_completion: + cmd = b"complete -o bashdefault -C snakemake-bash-completion snakemake" + sys.stdout.buffer.write(cmd) + sys.exit(0) + + if args.batch is not None and args.forceall: + print( + "--batch may not be combined with --forceall, because recomputed upstream " + "jobs in subsequent batches may render already obtained results outdated." + ) + + try: + resources = parse_resources(args.resources) + config = parse_config(args) + + if args.default_resources is not None: + default_resources = DefaultResources(args.default_resources) + else: + default_resources = None + + batch = parse_batch(args) + overwrite_threads = parse_set_threads(args) + overwrite_resources = parse_set_resources(args) + overwrite_resource_scopes = parse_set_resource_scope(args) + + overwrite_scatter = parse_set_scatter(args) + + overwrite_groups = parse_groups(args) + group_components = parse_group_components(args) + except ValueError as e: + print(e, file=sys.stderr) + print("", file=sys.stderr) + sys.exit(1) + + non_local_exec = ( + args.cluster + or args.slurm + or args.slurm_jobstep + or args.cluster_sync + or args.tibanna + or args.kubernetes + or args.tes + or args.az_batch + or args.google_lifesciences + or args.drmaa + or args.flux + ) + no_exec = ( + args.print_compilation + or args.list_code_changes + or args.list_conda_envs + or args.list_input_changes + or args.list_params_changes + or args.list + or args.list_target_rules + or args.list_untracked + or args.list_version_changes + or args.export_cwl + or args.generate_unit_tests + or args.dag + or args.d3dag + or args.filegraph + or args.rulegraph + or args.summary + or args.detailed_summary + or args.lint + or args.containerize + or args.report + or args.gui + or args.archive + or args.unlock + or args.cleanup_metadata + ) + + try: + cores, jobs = parse_cores_jobs( + args.cores, args.jobs, no_exec, non_local_exec, args.dryrun + ) + args.cores = cores + args.jobs = jobs + except CliException as err: + print(err.msg, sys.stderr) + sys.exit(1) + + if args.drmaa_log_dir is not None and not os.path.isabs(args.drmaa_log_dir): + args.drmaa_log_dir = os.path.abspath(os.path.expanduser(args.drmaa_log_dir)) + + if args.runtime_profile: + import yappi + + yappi.start() + + if args.immediate_submit and not args.notemp: + print( + "Error: --immediate-submit has to be combined with --notemp, " + "because temp file handling is not supported in this mode.", + file=sys.stderr, + ) + sys.exit(1) + + if (args.conda_prefix or args.conda_create_envs_only) and not args.use_conda: + if args.conda_prefix and os.environ.get("SNAKEMAKE_CONDA_PREFIX", False): + print( + "Warning: The enviorment variable SNAKEMAKE_CONDA_PREFIX is set" + "but --use-conda is not." + "Snakemake will ignore SNAKEMAKE_CONDA_PREFIX" + "and conda enviorments will not be used or created.", + file=sys.stderr, + ) + args.conda_prefix = None + else: + print( + "Error: --use-conda must be set if --conda-prefix or " + "--create-envs-only is set.", + file=sys.stderr, + ) + sys.exit(1) + + if args.singularity_prefix and not args.use_singularity: + print( + "Error: --use_singularity must be set if --singularity-prefix is set.", + file=sys.stderr, + ) + sys.exit(1) + + if args.kubernetes and ( + not args.default_remote_provider or not args.default_remote_prefix + ): + print( + "Error: --kubernetes must be combined with " + "--default-remote-provider and --default-remote-prefix, see " + "https://snakemake.readthedocs.io/en/stable/executing/cloud.html" + "#executing-a-snakemake-workflow-via-kubernetes", + file=sys.stderr, + ) + sys.exit(1) + + if args.tibanna: + if not args.default_remote_prefix: + print( + "Error: --tibanna must be combined with --default-remote-prefix " + "to provide bucket name and subdirectory (prefix) " + "(e.g. 'bucketname/projectname'", + file=sys.stderr, + ) + sys.exit(1) + args.default_remote_prefix = args.default_remote_prefix.rstrip("/") + if not args.tibanna_sfn: + args.tibanna_sfn = os.environ.get("TIBANNA_DEFAULT_STEP_FUNCTION_NAME", "") + if not args.tibanna_sfn: + print( + "Error: to use --tibanna, either --tibanna-sfn or environment variable " + "TIBANNA_DEFAULT_STEP_FUNCTION_NAME must be set and exported " + "to provide name of the tibanna unicorn step function " + "(e.g. 'tibanna_unicorn_monty'). The step function must be deployed first " + "using tibanna cli (e.g. tibanna deploy_unicorn --usergroup=monty " + "--buckets=bucketname)", + file=sys.stderr, + ) + sys.exit(1) + + if args.az_batch: + if not args.default_remote_provider or not args.default_remote_prefix: + print( + "Error: --az-batch must be combined with " + "--default-remote-provider AzBlob and --default-remote-prefix to " + "provide a blob container name\n", + file=sys.stderr, + ) + sys.exit(1) + elif args.az_batch_account_url is None: + print( + "Error: --az-batch-account-url must be set when --az-batch is used\n", + file=sys.stderr, + ) + sys.exit(1) + elif not url_can_parse(args.az_batch_account_url): + print( + "Error: invalide azure batch account url, please use format: https://{account_name}.{location}.batch.azure.com." + ) + sys.exit(1) + elif os.getenv("AZ_BATCH_ACCOUNT_KEY") is None: + print( + "Error: environment variable AZ_BATCH_ACCOUNT_KEY must be set when --az-batch is used\n", + file=sys.stderr, + ) + sys.exit(1) + + if args.google_lifesciences: + if ( + not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS") + and not args.google_lifesciences_service_account_email + ): + print( + "Error: Either the GOOGLE_APPLICATION_CREDENTIALS environment variable " + "or --google-lifesciences-service-account-email must be available " + "for --google-lifesciences", + file=sys.stderr, + ) + sys.exit(1) + + if not args.default_remote_prefix: + print( + "Error: --google-lifesciences must be combined with " + " --default-remote-prefix to provide bucket name and " + "subdirectory (prefix) (e.g. 'bucketname/projectname'", + file=sys.stderr, + ) + sys.exit(1) + + if args.delete_all_output and args.delete_temp_output: + print( + "Error: --delete-all-output and --delete-temp-output are mutually exclusive.", + file=sys.stderr, + ) + sys.exit(1) + + if args.gui is not None: + try: + import snakemake.gui as gui + except ImportError: + print( + "Error: GUI needs Flask to be installed. Install " + "with easy_install or contact your administrator.", + file=sys.stderr, + ) + sys.exit(1) + + _logging.getLogger("werkzeug").setLevel(_logging.ERROR) + + _snakemake = partial(snakemake, os.path.abspath(snakefile)) + gui.register(_snakemake, args) + + if ":" in args.gui: + host, port = args.gui.split(":") + else: + port = args.gui + host = "127.0.0.1" + + url = f"http://{host}:{port}" + print(f"Listening on {url}.", file=sys.stderr) + + def open_browser(): + try: + webbrowser.open(url) + except: + pass + + print("Open this address in your browser to access the GUI.", file=sys.stderr) + threading.Timer(0.5, open_browser).start() + success = True + + try: + gui.app.run(debug=False, threaded=True, port=int(port), host=host) + + except (KeyboardInterrupt, SystemExit): + # silently close + pass + else: + log_handler = [] + if args.log_handler_script is not None: + if not os.path.exists(args.log_handler_script): + print( + "Error: no log handler script found, {}.".format( + args.log_handler_script + ), + file=sys.stderr, + ) + sys.exit(1) + log_script = SourceFileLoader("log", args.log_handler_script).load_module() + try: + log_handler.append(log_script.log_handler) + except: + print( + 'Error: Invalid log handler script, {}. Expect python function "log_handler(msg)".'.format( + args.log_handler_script + ), + file=sys.stderr, + ) + sys.exit(1) + + if args.log_service == "slack": + slack_logger = logging.SlackLogger() + log_handler.append(slack_logger.log_handler) + + elif args.wms_monitor or args.log_service == "wms": + # Generate additional metadata for server + metadata = generate_parser_metadata(parser, args) + wms_logger = logging.WMSLogger( + args.wms_monitor, args.wms_monitor_arg, metadata=metadata + ) + log_handler.append(wms_logger.log_handler) + + if args.draft_notebook: + from snakemake import notebook + + args.target = [args.draft_notebook] + args.edit_notebook = notebook.EditMode(draft_only=True) + elif args.edit_notebook: + from snakemake import notebook + + args.target = [args.edit_notebook] + args.force = True + args.edit_notebook = notebook.EditMode(args.notebook_listen) + + aggregated_wait_for_files = args.wait_for_files + if args.wait_for_files_file is not None: + wait_for_files([args.wait_for_files_file], latency_wait=args.latency_wait) + + with open(args.wait_for_files_file) as fd: + extra_wait_files = [line.strip() for line in fd.readlines()] + + if aggregated_wait_for_files is None: + aggregated_wait_for_files = extra_wait_files + else: + aggregated_wait_for_files.extend(extra_wait_files) + + success = snakemake( + snakefile, + batch=batch, + cache=args.cache, + report=args.report, + report_stylesheet=args.report_stylesheet, + lint=args.lint, + containerize=args.containerize, + generate_unit_tests=args.generate_unit_tests, + listrules=args.list, + list_target_rules=args.list_target_rules, + cores=args.cores, + local_cores=args.local_cores, + nodes=args.jobs, + resources=resources, + overwrite_threads=overwrite_threads, + max_threads=args.max_threads, + overwrite_scatter=overwrite_scatter, + default_resources=default_resources, + overwrite_resources=overwrite_resources, + overwrite_resource_scopes=overwrite_resource_scopes, + config=config, + configfiles=args.configfile, + config_args=args.config, + workdir=args.directory, + targets=args.target, + target_jobs=parse_target_jobs_cli_args(args), + dryrun=args.dryrun, + printshellcmds=args.printshellcmds, + debug_dag=args.debug_dag, + printdag=args.dag, + printrulegraph=args.rulegraph, + printfilegraph=args.filegraph, + printd3dag=args.d3dag, + touch=args.touch, + forcetargets=args.force, + forceall=args.forceall, + forcerun=args.forcerun, + prioritytargets=args.prioritize, + until=args.until, + omit_from=args.omit_from, + stats=args.stats, + nocolor=args.nocolor, + quiet=args.quiet, + keepgoing=args.keep_going, + slurm=args.slurm, + slurm_jobstep=args.slurm_jobstep, + rerun_triggers=args.rerun_triggers, + cluster=args.cluster, + cluster_sync=args.cluster_sync, + drmaa=args.drmaa, + drmaa_log_dir=args.drmaa_log_dir, + kubernetes=args.kubernetes, + container_image=args.container_image, + k8s_cpu_scalar=args.k8s_cpu_scalar, + k8s_service_account_name=args.k8s_service_account_name, + flux=args.flux, + tibanna=args.tibanna, + tibanna_sfn=args.tibanna_sfn, + az_batch=args.az_batch, + az_batch_enable_autoscale=args.az_batch_enable_autoscale, + az_batch_account_url=args.az_batch_account_url, + google_lifesciences=args.google_lifesciences, + google_lifesciences_regions=args.google_lifesciences_regions, + google_lifesciences_location=args.google_lifesciences_location, + google_lifesciences_cache=args.google_lifesciences_keep_cache, + google_lifesciences_service_account_email=args.google_lifesciences_service_account_email, + google_lifesciences_network=args.google_lifesciences_network, + google_lifesciences_subnetwork=args.google_lifesciences_subnetwork, + tes=args.tes, + precommand=args.precommand, + preemption_default=args.preemption_default, + preemptible_rules=args.preemptible_rules, + tibanna_config=args.tibanna_config, + jobname=args.jobname, + immediate_submit=args.immediate_submit, + standalone=True, + ignore_ambiguity=args.allow_ambiguity, + lock=not args.nolock, + unlock=args.unlock, + cleanup_metadata=args.cleanup_metadata, + conda_cleanup_envs=args.conda_cleanup_envs, + cleanup_containers=args.cleanup_containers, + cleanup_shadow=args.cleanup_shadow, + force_incomplete=args.rerun_incomplete, + ignore_incomplete=args.ignore_incomplete, + list_version_changes=args.list_version_changes, + list_code_changes=args.list_code_changes, + list_input_changes=args.list_input_changes, + list_params_changes=args.list_params_changes, + list_untracked=args.list_untracked, + summary=args.summary, + detailed_summary=args.detailed_summary, + archive=args.archive, + delete_all_output=args.delete_all_output, + delete_temp_output=args.delete_temp_output, + print_compilation=args.print_compilation, + verbose=args.verbose, + debug=args.debug, + jobscript=args.jobscript, + notemp=args.notemp, + all_temp=args.all_temp, + keep_remote_local=args.keep_remote, + greediness=args.greediness, + no_hooks=args.no_hooks, + overwrite_shellcmd=args.overwrite_shellcmd, + latency_wait=args.latency_wait, + wait_for_files=aggregated_wait_for_files, + keep_target_files=args.keep_target_files, + allowed_rules=args.allowed_rules, + max_jobs_per_second=args.max_jobs_per_second, + max_status_checks_per_second=args.max_status_checks_per_second, + restart_times=args.retries, + attempt=args.attempt, + force_use_threads=args.force_use_threads, + use_conda=args.use_conda, + conda_frontend=args.conda_frontend, + conda_prefix=args.conda_prefix, + conda_cleanup_pkgs=args.conda_cleanup_pkgs, + list_conda_envs=args.list_conda_envs, + use_singularity=args.use_singularity, + use_env_modules=args.use_envmodules, + singularity_prefix=args.singularity_prefix, + shadow_prefix=args.shadow_prefix, + singularity_args=args.singularity_args, + scheduler=args.scheduler, + scheduler_ilp_solver=args.scheduler_ilp_solver, + conda_create_envs_only=args.conda_create_envs_only, + mode=args.mode, + wrapper_prefix=args.wrapper_prefix, + default_remote_provider=args.default_remote_provider, + default_remote_prefix=args.default_remote_prefix, + assume_shared_fs=not args.no_shared_fs, + cluster_status=args.cluster_status, + cluster_cancel=args.cluster_cancel, + cluster_cancel_nargs=args.cluster_cancel_nargs, + cluster_sidecar=args.cluster_sidecar, + export_cwl=args.export_cwl, + show_failed_logs=args.show_failed_logs, + keep_incomplete=args.keep_incomplete, + keep_metadata=not args.drop_metadata, + edit_notebook=args.edit_notebook, + envvars=args.envvars, + overwrite_groups=overwrite_groups, + group_components=group_components, + max_inventory_wait_time=args.max_inventory_time, + log_handler=log_handler, + execute_subworkflows=not args.no_subworkflows, + conda_not_block_search_path_envvars=args.conda_not_block_search_path_envvars, + scheduler_solver_path=args.scheduler_solver_path, + conda_base_path=args.conda_base_path, + local_groupid=args.local_groupid, + executor_args=executor_args, + cleanup_scripts=not args.skip_script_cleanup, + ) + + if args.runtime_profile: + with open(args.runtime_profile, "w") as out: + profile = yappi.get_func_stats() + profile.sort("totaltime") + profile.print_all( + out=out, + columns={ + 0: ("name", 120), + 1: ("ncall", 10), + 2: ("tsub", 8), + 3: ("ttot", 8), + 4: ("tavg", 8), + }, + ) + + sys.exit(0 if success else 1) + + +def bash_completion(snakefile="Snakefile"): + """Entry point for bash completion.""" + if len(sys.argv) < 2: + print( + "Calculate bash completion for snakemake. This tool shall not be invoked by hand." + ) + sys.exit(1) + + def print_candidates(candidates): + if candidates: + candidates = sorted(set(candidates)) + ## Use bytes for avoiding '^M' under Windows. + sys.stdout.buffer.write(b"\n".join(s.encode() for s in candidates)) + + prefix = sys.argv[2] + + if prefix.startswith("-"): + print_candidates( + action.option_strings[0] + for action in get_argument_parser()._actions + if action.option_strings and action.option_strings[0].startswith(prefix) + ) + else: + candidates = [] + files = glob.glob(f"{prefix}*") + if files: + candidates.extend(files) + if os.path.exists(snakefile): + workflow = Workflow(snakefile=snakefile) + workflow.include(snakefile) + + candidates.extend( + [file for file in workflow.concrete_files if file.startswith(prefix)] + + [rule.name for rule in workflow.rules if rule.name.startswith(prefix)] + ) + if len(candidates) > 0: + print_candidates(candidates) + sys.exit(0) diff --git a/snakemake/common/__init__.py b/snakemake/common/__init__.py index f7988298c..84452fc0b 100644 --- a/snakemake/common/__init__.py +++ b/snakemake/common/__init__.py @@ -1,5 +1,5 @@ __author__ = "Johannes Köster" -__copyright__ = "Copyright 2022, Johannes Köster" +__copyright__ = "Copyright 2023, Johannes Köster" __email__ = "johannes.koester@protonmail.com" __license__ = "MIT" @@ -7,9 +7,11 @@ import contextlib import itertools import math +import operator import platform import hashlib import inspect +import sys import threading import uuid import os @@ -26,11 +28,11 @@ MIN_PY_VERSION = (3, 7) DYNAMIC_FILL = "__snakemake_dynamic__" -SNAKEMAKE_SEARCHPATH = str(Path(__file__).parent.parent.parent) UUID_NAMESPACE = uuid.uuid5(uuid.NAMESPACE_URL, "https://snakemake.readthedocs.io") NOTHING_TO_BE_DONE_MSG = ( "Nothing to be done (all requested files are present and up to date)." ) +RERUN_TRIGGERS = ["mtime", "params", "input", "software-env", "code"] ON_WINDOWS = platform.system() == "Windows" # limit the number of input/output files list in job properties @@ -38,6 +40,13 @@ IO_PROP_LIMIT = 100 +def get_snakemake_searchpaths(): + paths = [str(Path(__file__).parent.parent.parent)] + [ + path for path in sys.path if path.endswith("site-packages") + ] + return list(unique_justseen(paths)) + + def mb_to_mib(mb): return int(math.ceil(mb * 0.95367431640625)) @@ -185,40 +194,6 @@ def bytesto(bytes, to, bsize=1024): return answer -class Mode: - """ - Enum for execution mode of Snakemake. - This handles the behavior of e.g. the logger. - """ - - default = 0 - subprocess = 1 - cluster = 2 - - -class lazy_property(property): - __slots__ = ["method", "cached", "__doc__"] - - @staticmethod - def clean(instance, method): - delattr(instance, method) - - def __init__(self, method): - self.method = method - self.cached = f"_{method.__name__}" - super().__init__(method, doc=method.__doc__) - - def __get__(self, instance, owner): - cached = ( - getattr(instance, self.cached) if hasattr(instance, self.cached) else None - ) - if cached is not None: - return cached - value = self.method(instance) - setattr(instance, self.cached, value) - return value - - def strip_prefix(text, prefix): if text.startswith(prefix): return text[len(prefix) :] @@ -306,3 +281,14 @@ async def async_lock(_lock: threading.Lock): yield # the lock is held finally: _lock.release() + + +def unique_justseen(iterable, key=None): + """ + List unique elements, preserving order. Remember only the element just seen. + + From https://docs.python.org/3/library/itertools.html#itertools-recipes + """ + # unique_justseen('AAAABBBCCDAABBB') --> A B C D A B + # unique_justseen('ABBcCAD', str.lower) --> A B c A D + return map(next, map(operator.itemgetter(1), itertools.groupby(iterable, key))) diff --git a/snakemake/cwl.py b/snakemake/cwl.py index 12cedeff1..0389e9b37 100644 --- a/snakemake/cwl.py +++ b/snakemake/cwl.py @@ -13,7 +13,7 @@ from snakemake.utils import format from snakemake.exceptions import WorkflowError from snakemake.shell import shell -from snakemake.common import get_container_image, Mode +from snakemake.common import get_container_image def cwl( diff --git a/snakemake/dag.py b/snakemake/dag.py index 6c792f081..5e9dbb123 100755 --- a/snakemake/dag.py +++ b/snakemake/dag.py @@ -12,12 +12,15 @@ import textwrap import time import uuid +import subprocess from collections import Counter, defaultdict, deque, namedtuple from functools import partial from itertools import chain, filterfalse, groupby from operator import attrgetter from pathlib import Path +from snakemake_interface_executor_plugins.dag import DAGExecutorInterface + from snakemake import workflow from snakemake import workflow as _workflow from snakemake.common import DYNAMIC_FILL, ON_WINDOWS, group_into_chunks, is_local_file @@ -37,7 +40,6 @@ WildcardError, WorkflowError, ) -from snakemake.interfaces import DAGExecutorInterface from snakemake.io import ( PeriodicityDetector, get_flag_value, @@ -2372,7 +2374,7 @@ def add(path): for env in envs: add(env) - except (Exception, BaseException) as e: + except BaseException as e: os.remove(path) raise e diff --git a/snakemake/deployment/conda.py b/snakemake/deployment/conda.py index 5e8107694..5a6f53479 100644 --- a/snakemake/deployment/conda.py +++ b/snakemake/deployment/conda.py @@ -31,7 +31,6 @@ from snakemake.logging import logger from snakemake.common import ( is_local_file, - lazy_property, parse_uri, ON_WINDOWS, ) @@ -42,6 +41,7 @@ contains_wildcard, _IOFile, ) +from snakemake_interface_executor_plugins.utils import lazy_property class CondaCleanupMode(Enum): @@ -318,7 +318,7 @@ def create_archive(self): ) as e: shutil.rmtree(env_archive) raise WorkflowError(f"Error downloading conda package {pkg_url}.") - except (Exception, BaseException) as e: + except BaseException as e: shutil.rmtree(env_archive) raise e return env_archive diff --git a/snakemake/deployment/singularity.py b/snakemake/deployment/singularity.py index bc4b5acb4..4c0a8c2f5 100644 --- a/snakemake/deployment/singularity.py +++ b/snakemake/deployment/singularity.py @@ -3,24 +3,31 @@ __email__ = "johannes.koester@uni-due.de" __license__ = "MIT" +from pathlib import Path import subprocess import shutil import os import hashlib from snakemake.common import ( + get_snakemake_searchpaths, is_local_file, parse_uri, - lazy_property, - SNAKEMAKE_SEARCHPATH, ) from snakemake.exceptions import WorkflowError from snakemake.logging import logger +from snakemake_interface_executor_plugins.utils import lazy_property SNAKEMAKE_MOUNTPOINT = "/mnt/snakemake" +def get_snakemake_searchpath_mountpoints(): + paths = get_snakemake_searchpaths() + base = Path("/mnt/snakemake_searchpaths") + return [str(base / f"item_{i}") for i in range(len(paths))] + + class Image: def __init__(self, url, dag, is_containerized): if " " in url: @@ -110,7 +117,12 @@ def shellcmd( if is_python_script: # mount host snakemake module into container - args += f" --bind {repr(SNAKEMAKE_SEARCHPATH)}:{repr(SNAKEMAKE_MOUNTPOINT)}" + args += " ".join( + f" --bind {repr(searchpath)}:{repr(mountpoint)}" + for searchpath, mountpoint in zip( + get_snakemake_searchpaths(), get_snakemake_searchpath_mountpoints() + ) + ) if container_workdir: args += f" --pwd {repr(container_workdir)}" diff --git a/snakemake/exceptions.py b/snakemake/exceptions.py index 081a67de7..91dd612cc 100644 --- a/snakemake/exceptions.py +++ b/snakemake/exceptions.py @@ -8,6 +8,7 @@ import textwrap from tokenize import TokenError from snakemake.logging import logger +from snakemake_interface_executor_plugins.exceptions import WorkflowError def format_error( @@ -147,40 +148,6 @@ def print_exception(ex, linemaps): traceback.print_exception(type(ex), ex, ex.__traceback__) -class WorkflowError(Exception): - @staticmethod - def format_arg(arg): - if isinstance(arg, str): - return arg - elif isinstance(arg, WorkflowError): - spec = "" - if arg.rule is not None: - spec += f"rule {arg.rule}" - if arg.snakefile is not None: - if spec: - spec += ", " - spec += f"line {arg.lineno}, {arg.snakefile}" - - if spec: - spec = f" ({spec})" - - return "{}{}:\n{}".format( - arg.__class__.__name__, spec, textwrap.indent(str(arg), " ") - ) - else: - return f"{arg.__class__.__name__}: {arg}" - - def __init__(self, *args, lineno=None, snakefile=None, rule=None): - super().__init__("\n".join(self.format_arg(arg) for arg in args)) - if rule is not None: - self.lineno = rule.lineno - self.snakefile = rule.snakefile - else: - self.lineno = lineno - self.snakefile = snakefile - self.rule = rule - - class SourceFileError(WorkflowError): def __init__(self, msg): super().__init__(f"Error in source file definition: {msg}") diff --git a/snakemake/executors/__init__.py b/snakemake/executors/__init__.py index 62fb37123..5dfa909ba 100644 --- a/snakemake/executors/__init__.py +++ b/snakemake/executors/__init__.py @@ -24,15 +24,21 @@ import uuid import re import math -from snakemake.interfaces import ( - DAGExecutorInterface, + +from snakemake_interface_executor_plugins.executors.base import AbstractExecutor +from snakemake_interface_executor_plugins.executors.real import RealExecutor +from snakemake_interface_executor_plugins.executors.remote import RemoteExecutor +from snakemake_interface_executor_plugins.dag import DAGExecutorInterface +from snakemake_interface_executor_plugins.workflow import WorkflowExecutorInterface +from snakemake_interface_executor_plugins.persistence import StatsExecutorInterface +from snakemake_interface_executor_plugins.logging import LoggerExecutorInterface +from snakemake_interface_executor_plugins.jobs import ( ExecutorJobInterface, - GroupJobExecutorInterface, SingleJobExecutorInterface, - WorkflowExecutorInterface, + GroupJobExecutorInterface, ) -from snakemake.target_jobs import encode_target_jobs_cli_args -from fractions import Fraction +from snakemake_interface_executor_plugins.utils import sleep +from snakemake_interface_executor_plugins.utils import ExecMode from snakemake.shell import shell from snakemake.logging import logger @@ -47,163 +53,16 @@ CacheMissException, ) from snakemake.common import ( - Mode, get_container_image, get_uuid, - lazy_property, async_lock, ) -from snakemake.executors.common import format_cli_arg, join_cli_args - - -# TODO move each executor into a separate submodule - - -async def sleep(): - # do not sleep on CI. In that case we just want to quickly test everything. - if os.environ.get("CI") != "true": - await asyncio.sleep(10) - else: - await asyncio.sleep(1) - - -class AbstractExecutor(ABC): - def __init__( - self, - workflow: WorkflowExecutorInterface, - dag: DAGExecutorInterface, - printreason=False, - quiet=False, - printshellcmds=False, - printthreads=True, - keepincomplete=False, - ): - self.workflow = workflow - self.dag = dag - self.quiet = quiet - self.printreason = printreason - self.printshellcmds = printshellcmds - self.printthreads = printthreads - self.latency_wait = workflow.latency_wait - self.keepincomplete = keepincomplete - - def get_default_remote_provider_args(self): - return join_cli_args( - [ - self.workflow_property_to_arg("default_remote_prefix"), - self.workflow_property_to_arg("default_remote_provider", attr="name"), - ] - ) - - def get_set_resources_args(self): - return format_cli_arg( - "--set-resources", - [ - f"{rule}:{name}={value}" - for rule, res in self.workflow.overwrite_resources.items() - for name, value in res.items() - ], - skip=not self.workflow.overwrite_resources, - ) - - def get_default_resources_args(self, default_resources=None): - default_resources = default_resources or self.workflow.default_resources - return format_cli_arg("--default-resources", default_resources.args) - - def get_resource_scopes_args(self): - return format_cli_arg( - "--set-resource-scopes", self.workflow.overwrite_resource_scopes - ) - - def get_resource_declarations_dict(self, job: ExecutorJobInterface): - def isdigit(i): - s = str(i) - # Adapted from https://stackoverflow.com/a/1265696 - if s[0] in ("-", "+"): - return s[1:].isdigit() - return s.isdigit() - - excluded_resources = self.workflow.resource_scopes.excluded.union( - {"_nodes", "_cores"} - ) - return { - resource: value - for resource, value in job.resources.items() - if isinstance(value, int) - # need to check bool seperately because bool is a subclass of int - and isdigit(value) and resource not in excluded_resources - } - - def get_resource_declarations(self, job: ExecutorJobInterface): - resources = [ - f"{resource}={value}" - for resource, value in self.get_resource_declarations_dict(job).items() - ] - return format_cli_arg("--resources", resources) - - def run_jobs( - self, - jobs: List[ExecutorJobInterface], - callback=None, - submit_callback=None, - error_callback=None, - ): - """Run a list of jobs that is ready at a given point in time. - - By default, this method just runs each job individually. - This method can be overwritten to submit many jobs in a more efficient way than one-by-one. - Note that in any case, for each job, the callback functions have to be called individually! - """ - for job in jobs: - self.run( - job, - callback=callback, - submit_callback=submit_callback, - error_callback=error_callback, - ) - - def run( - self, - job: ExecutorJobInterface, - callback=None, - submit_callback=None, - error_callback=None, - ): - """Run a specific job or group job.""" - self._run(job) - callback(job) - - @abstractmethod - def shutdown(self): - ... - - @abstractmethod - def cancel(self): - ... - - def _run(self, job: ExecutorJobInterface): - job.check_protected_output() - self.printjob(job) - - def rule_prefix(self, job: ExecutorJobInterface): - return "local " if job.is_local else "" - - def printjob(self, job: ExecutorJobInterface): - job.log_info(skip_dynamic=True) - - def print_job_error(self, job: ExecutorJobInterface, msg=None, **kwargs): - job.log_error(msg, **kwargs) - - @abstractmethod - def handle_job_success(self, job: ExecutorJobInterface): - ... - - @abstractmethod - def handle_job_error(self, job: ExecutorJobInterface): - ... class DryrunExecutor(AbstractExecutor): + def get_exec_mode(self): + raise NotImplementedError() + def printjob(self, job: ExecutorJobInterface): super().printjob(job) if job.is_group(): @@ -241,232 +100,22 @@ def handle_job_error(self, job: ExecutorJobInterface): pass -class RealExecutor(AbstractExecutor): +class TouchExecutor(RealExecutor): def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - printreason=False, - quiet=False, - printshellcmds=False, - assume_shared_fs=True, - keepincomplete=False, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, ): super().__init__( workflow, dag, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - keepincomplete=keepincomplete, + stats, + logger, + executor_settings=None, ) - self.assume_shared_fs = assume_shared_fs - self.stats = Stats() - self.snakefile = workflow.main_snakefile - def register_job(self, job: ExecutorJobInterface): - job.register() - - def _run(self, job: ExecutorJobInterface, callback=None, error_callback=None): - super()._run(job) - self.stats.report_job_start(job) - - try: - self.register_job(job) - except IOError as e: - logger.info( - "Failed to set marker file for job started ({}). " - "Snakemake will work, but cannot ensure that output files " - "are complete in case of a kill signal or power loss. " - "Please ensure write permissions for the " - "directory {}".format(e, self.workflow.persistence.path) - ) - - def handle_job_success( - self, - job: ExecutorJobInterface, - upload_remote=True, - handle_log=True, - handle_touch=True, - ignore_missing_output=False, - ): - job.postprocess( - upload_remote=upload_remote, - handle_log=handle_log, - handle_touch=handle_touch, - ignore_missing_output=ignore_missing_output, - latency_wait=self.latency_wait, - assume_shared_fs=self.assume_shared_fs, - keep_metadata=self.workflow.keep_metadata, - ) - self.stats.report_job_end(job) - - def handle_job_error(self, job: ExecutorJobInterface, upload_remote=True): - job.postprocess( - error=True, - assume_shared_fs=self.assume_shared_fs, - latency_wait=self.latency_wait, - ) - - def workflow_property_to_arg( - self, property, flag=None, quote=True, skip=False, invert=False, attr=None - ): - if skip: - return "" - - value = getattr(self.workflow, property) - - if value is not None and attr is not None: - value = getattr(value, attr) - - if flag is None: - flag = f"--{property.replace('_', '-')}" - - if invert and isinstance(value, bool): - value = not value - - return format_cli_arg(flag, value, quote=quote) - - @lazy_property - def general_args(self): - """Return a string to add to self.exec_job that includes additional - arguments from the command line. This is currently used in the - ClusterExecutor and CPUExecutor, as both were using the same - code. Both have base class of the RealExecutor. - """ - w2a = self.workflow_property_to_arg - - return join_cli_args( - [ - "--force", - "--keep-target-files", - "--keep-remote", - "--max-inventory-time 0", - "--nocolor", - "--notemp", - "--no-hooks", - "--nolock", - "--ignore-incomplete", - format_cli_arg("--keep-incomplete", self.keepincomplete), - w2a("rerun_triggers"), - w2a("cleanup_scripts", flag="--skip-script-cleanup"), - w2a("shadow_prefix"), - w2a("use_conda"), - w2a("conda_frontend"), - w2a("conda_prefix"), - w2a("conda_base_path", skip=not self.assume_shared_fs), - w2a("use_singularity"), - w2a("singularity_prefix"), - w2a("singularity_args"), - w2a("execute_subworkflows", flag="--no-subworkflows", invert=True), - w2a("max_threads"), - w2a("use_env_modules", flag="--use-envmodules"), - w2a("keep_metadata", flag="--drop-metadata", invert=True), - w2a("wrapper_prefix"), - w2a("overwrite_threads", flag="--set-threads"), - w2a("overwrite_scatter", flag="--set-scatter"), - w2a("local_groupid", skip=self.job_specific_local_groupid), - w2a("conda_not_block_search_path_envvars"), - w2a("overwrite_configfiles", flag="--configfiles"), - w2a("config_args", flag="--config"), - w2a("printshellcmds"), - w2a("latency_wait"), - w2a("scheduler_type", flag="--scheduler"), - format_cli_arg( - "--scheduler-solver-path", - os.path.dirname(sys.executable), - skip=not self.assume_shared_fs, - ), - self.get_set_resources_args(), - self.get_default_remote_provider_args(), - self.get_default_resources_args(), - self.get_resource_scopes_args(), - self.get_workdir_arg(), - join_cli_args(self.additional_general_args()), - format_cli_arg("--mode", self.get_exec_mode()), - ] - ) - - def additional_general_args(self): - """Inherit this method to add stuff to the general args. - - A list must be returned. - """ - return [] - - def get_workdir_arg(self): - return self.workflow_property_to_arg("overwrite_workdir", flag="--directory") - - def get_job_args(self, job: ExecutorJobInterface, **kwargs): - return join_cli_args( - [ - format_cli_arg( - "--target-jobs", encode_target_jobs_cli_args(job.get_target_spec()) - ), - # Restrict considered rules for faster DAG computation. - # This does not work for updated jobs because they need - # to be updated in the spawned process as well. - format_cli_arg( - "--allowed-rules", - job.rules, - quote=False, - skip=job.is_branched or job.is_updated, - ), - # Ensure that a group uses its proper local groupid. - format_cli_arg("--local-groupid", job.jobid, skip=not job.is_group()), - format_cli_arg("--cores", kwargs.get("cores", self.cores)), - format_cli_arg("--attempt", job.attempt), - format_cli_arg("--force-use-threads", not job.is_group()), - self.get_resource_declarations(job), - ] - ) - - @property - def job_specific_local_groupid(self): - return True - - def get_snakefile(self): - return self.snakefile - - @abstractmethod - def get_python_executable(self): - ... - - @abstractmethod - def get_exec_mode(self): - ... - - def get_envvar_declarations(self): - return "" - - def get_job_exec_prefix(self, job: ExecutorJobInterface): - return "" - - def get_job_exec_suffix(self, job: ExecutorJobInterface): - return "" - - def format_job_exec(self, job: ExecutorJobInterface): - prefix = self.get_job_exec_prefix(job) - if prefix: - prefix += " &&" - suffix = self.get_job_exec_suffix(job) - if suffix: - suffix = f"&& {suffix}" - return join_cli_args( - [ - prefix, - self.get_envvar_declarations(), - self.get_python_executable(), - "-m snakemake", - format_cli_arg("--snakefile", self.get_snakefile()), - self.get_job_args(job), - self.general_args, - suffix, - ] - ) - - -class TouchExecutor(RealExecutor): def run( self, job: ExecutorJobInterface, @@ -483,6 +132,9 @@ def run( print_exception(ex, self.workflow.linemaps) error_callback(job) + def get_exec_mode(self): + raise NotImplementedError() + def handle_job_success(self, job: ExecutorJobInterface): super().handle_job_success(job, ignore_missing_output=True) @@ -492,9 +144,6 @@ def cancel(self): def shutdown(self): pass - def get_exec_mode(self): - raise NotImplementedError() - def get_python_executable(self): raise NotImplementedError() @@ -513,32 +162,31 @@ def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - workers, - printreason=False, - quiet=False, - printshellcmds=False, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, + cores: int, use_threads=False, - cores=1, - keepincomplete=False, ): super().__init__( workflow, dag, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - keepincomplete=keepincomplete, + stats, + logger, + executor_settings=None, + job_core_limit=cores, ) self.use_threads = use_threads - self.cores = cores # Zero thread jobs do not need a thread, but they occupy additional workers. # Hence we need to reserve additional workers for them. - workers = workers + 5 if workers is not None else 5 + workers = cores + 5 if cores is not None else 5 self.workers = workers self.pool = concurrent.futures.ThreadPoolExecutor(max_workers=self.workers) + def get_exec_mode(self): + return ExecMode.subprocess + @property def job_specific_local_groupid(self): return False @@ -546,9 +194,6 @@ def job_specific_local_groupid(self): def get_job_exec_prefix(self, job: ExecutorJobInterface): return f"cd {shlex.quote(self.workflow.workdir_init)}" - def get_exec_mode(self): - return Mode.subprocess - def get_python_executable(self): return sys.executable @@ -720,7 +365,7 @@ def _callback( except SpawnedJobError: # don't print error message, this is done by the spawned subprocess error_callback(job) - except (Exception, BaseException) as ex: + except BaseException as ex: self.print_job_error(job) if self.workflow.verbose or (not job.is_group() and not job.is_shell): print_exception(ex, self.workflow.linemaps) @@ -736,315 +381,29 @@ def handle_job_error(self, job: ExecutorJobInterface): self.workflow.persistence.cleanup(job) -class ClusterExecutor(RealExecutor): - """Backend for distributed execution. - - The key idea is that a job is converted into a script that invokes Snakemake again, in whatever environment is targeted. The script is submitted to some job management platform (e.g. a cluster scheduler like slurm). - This class can be specialized to generate more specific backends, also for the cloud. - """ - - default_jobscript = "jobscript.sh" - - def __init__( - self, - workflow: WorkflowExecutorInterface, - dag: DAGExecutorInterface, - cores, - jobname="snakejob.{name}.{jobid}.sh", - printreason=False, - quiet=False, - printshellcmds=False, - cluster_config=None, - local_input=None, - restart_times=None, - assume_shared_fs=True, - max_status_checks_per_second=1, - disable_default_remote_provider_args=False, - disable_default_resources_args=False, - disable_envvar_declarations=False, - keepincomplete=False, - ): - from throttler import Throttler - - local_input = local_input or [] - super().__init__( - workflow, - dag, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - assume_shared_fs=assume_shared_fs, - keepincomplete=keepincomplete, - ) - self.max_status_checks_per_second = max_status_checks_per_second - - if not self.assume_shared_fs: - # use relative path to Snakefile - self.snakefile = os.path.relpath(workflow.main_snakefile) - - self.is_default_jobscript = False - jobscript = workflow.jobscript - if jobscript is None: - jobscript = os.path.join(os.path.dirname(__file__), self.default_jobscript) - self.is_default_jobscript = True - try: - with open(jobscript) as f: - self.jobscript = f.read() - except IOError as e: - raise WorkflowError(e) - - if not "jobid" in get_wildcard_names(jobname): - raise WorkflowError( - 'Defined jobname ("{}") has to contain the wildcard {jobid}.' - ) - - self.jobname = jobname - self._tmpdir = None - self.cores = cores if cores else "all" - self.cluster_config = cluster_config if cluster_config else dict() - - self.restart_times = restart_times - - self.active_jobs = list() - self.lock = threading.Lock() - self.wait = True - self.wait_thread = threading.Thread(target=self._wait_thread) - self.wait_thread.daemon = True - self.wait_thread.start() - - self.disable_default_remote_provider_args = disable_default_remote_provider_args - self.disable_default_resources_args = disable_default_resources_args - self.disable_envvar_declarations = disable_envvar_declarations - - max_status_checks_frac = Fraction( - max_status_checks_per_second - ).limit_denominator() - self.status_rate_limiter = Throttler( - rate_limit=max_status_checks_frac.numerator, - period=max_status_checks_frac.denominator, - ) - - def get_default_remote_provider_args(self): - if not self.disable_default_remote_provider_args: - return super().get_default_remote_provider_args() - else: - return "" - - def get_default_resources_args(self, default_resources=None): - if not self.disable_default_resources_args: - return super().get_default_resources_args(default_resources) - else: - return "" - - def get_workdir_arg(self): - if self.assume_shared_fs: - return super().get_workdir_arg() - return "" - - def get_envvar_declarations(self): - if not self.disable_envvar_declarations: - return " ".join( - f"{var}={repr(os.environ[var])}" for var in self.workflow.envvars - ) - else: - return "" - - def get_python_executable(self): - return sys.executable if self.assume_shared_fs else "python" - - def get_exec_mode(self): - return Mode.cluster - - def get_job_args(self, job: ExecutorJobInterface): - waitfiles_parameter = "" - if self.assume_shared_fs: - wait_for_files = [] - wait_for_files.append(self.tmpdir) - wait_for_files.extend(job.get_wait_for_files()) - - # Only create extra file if we have more than 20 input files. - # This should not require the file creation in most cases. - if len(wait_for_files) > 20: - wait_for_files_file = self.get_jobscript(job) + ".waitforfilesfile.txt" - with open(wait_for_files_file, "w") as fd: - print(*wait_for_files, sep="\n", file=fd) - - waitfiles_parameter = format_cli_arg( - "--wait-for-files-file", wait_for_files_file - ) - else: - waitfiles_parameter = format_cli_arg("--wait-for-files", wait_for_files) - - return f"{super().get_job_args(job)} {waitfiles_parameter}" - - @abstractmethod - async def _wait_for_jobs(self): - ... - - def _wait_thread(self): - try: - asyncio.run(self._wait_for_jobs()) - except Exception as e: - print(e) - self.workflow.scheduler.executor_error_callback(e) - - def shutdown(self): - with self.lock: - self.wait = False - self.wait_thread.join() - if not self.workflow.immediate_submit: - # Only delete tmpdir (containing jobscripts) if not using - # immediate_submit. With immediate_submit, jobs can be scheduled - # after this method is completed. Hence we have to keep the - # directory. - shutil.rmtree(self.tmpdir) - - def cancel(self): - self.shutdown() - - def _run(self, job: ExecutorJobInterface, callback=None, error_callback=None): - if self.assume_shared_fs: - job.remove_existing_output() - job.download_remote_input() - super()._run(job, callback=callback, error_callback=error_callback) - - @property - def tmpdir(self): - if self._tmpdir is None: - self._tmpdir = tempfile.mkdtemp(dir=".snakemake", prefix="tmp.") - return os.path.abspath(self._tmpdir) - - def get_jobname(self, job: ExecutorJobInterface): - return job.format_wildcards(self.jobname, cluster=self.cluster_wildcards(job)) - - def get_jobscript(self, job: ExecutorJobInterface): - f = self.get_jobname(job) - - if os.path.sep in f: - raise WorkflowError( - "Path separator ({}) found in job name {}. " - "This is not supported.".format(os.path.sep, f) - ) - - return os.path.join(self.tmpdir, f) - - def write_jobscript(self, job: ExecutorJobInterface, jobscript): - exec_job = self.format_job_exec(job) - - try: - content = self.jobscript.format( - properties=job.properties(cluster=self.cluster_params(job)), - exec_job=exec_job, - ) - except KeyError as e: - if self.is_default_jobscript: - raise e - else: - raise WorkflowError( - f"Error formatting custom jobscript {self.workflow.jobscript}: value for {e} not found.\n" - "Make sure that your custom jobscript is defined as expected." - ) - - logger.debug(f"Jobscript:\n{content}") - with open(jobscript, "w") as f: - print(content, file=f) - os.chmod(jobscript, os.stat(jobscript).st_mode | stat.S_IXUSR | stat.S_IRUSR) - - def cluster_params(self, job: ExecutorJobInterface): - """Return wildcards object for job from cluster_config.""" - cluster = self.cluster_config.get("__default__", dict()).copy() - cluster.update(self.cluster_config.get(job.name, dict())) - # Format values with available parameters from the job. - for key, value in list(cluster.items()): - if isinstance(value, str): - try: - cluster[key] = job.format_wildcards(value) - except NameError as e: - if job.is_group(): - msg = ( - "Failed to format cluster config for group job. " - "You have to ensure that your default entry " - "does not contain any items that group jobs " - "cannot provide, like {rule}, {wildcards}." - ) - else: - msg = ( - "Failed to format cluster config " - "entry for job {}.".format(job.rule.name) - ) - raise WorkflowError(msg, e) - - return cluster - - def cluster_wildcards(self, job: ExecutorJobInterface): - return Wildcards(fromdict=self.cluster_params(job)) - - def handle_job_success(self, job: ExecutorJobInterface): - super().handle_job_success( - job, upload_remote=False, handle_log=False, handle_touch=False - ) - - def handle_job_error(self, job: ExecutorJobInterface): - # TODO what about removing empty remote dirs?? This cannot be decided - # on the cluster node. - super().handle_job_error(job, upload_remote=False) - logger.debug("Cleanup job metadata.") - # We have to remove metadata here as well. - # It will be removed by the CPUExecutor in case of a shared FS, - # but we might not see the removal due to filesystem latency. - # By removing it again, we make sure that it is gone on the host FS. - if not self.keepincomplete: - self.workflow.persistence.cleanup(job) - # Also cleanup the jobs output files, in case the remote job - # was not able to, due to e.g. timeout. - logger.debug("Cleanup failed jobs output files.") - job.cleanup() - - def print_cluster_job_error(self, job_info, jobid): - job = job_info.job - kind = ( - f"rule {job.rule.name}" - if not job.is_group() - else f"group job {job.groupid}" - ) - logger.error( - "Error executing {} on cluster (jobid: {}, external: " - "{}, jobscript: {}). For error details see the cluster " - "log and the log files of the involved rule(s).".format( - kind, jobid, job_info.jobid, job_info.jobscript - ) - ) - - GenericClusterJob = namedtuple( "GenericClusterJob", "job jobid callback error_callback jobscript jobfinished jobfailed", ) -class GenericClusterExecutor(ClusterExecutor): +class GenericClusterExecutor(RemoteExecutor): def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - cores, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, submitcmd="qsub", statuscmd=None, cancelcmd=None, cancelnargs=None, sidecarcmd=None, - cluster_config=None, jobname="snakejob.{rulename}.{jobid}.sh", - printreason=False, - quiet=False, - printshellcmds=False, - restart_times=0, - assume_shared_fs=True, max_status_checks_per_second=1, - keepincomplete=False, ): self.submitcmd = submitcmd - if not assume_shared_fs and statuscmd is None: + if not workflow.assume_shared_fs and statuscmd is None: raise WorkflowError( "When no shared filesystem can be assumed, a " "status command must be given." @@ -1062,23 +421,18 @@ def __init__( super().__init__( workflow, dag, - cores, + stats, + logger, + None, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, - restart_times=restart_times, - assume_shared_fs=assume_shared_fs, max_status_checks_per_second=max_status_checks_per_second, - keepincomplete=keepincomplete, ) self.sidecar_vars = None if self.sidecarcmd: self._launch_sidecar() - if not statuscmd and not assume_shared_fs: + if not statuscmd and not self.assume_shared_fs: raise WorkflowError( "If no shared filesystem is used, you have to " "specify a cluster status command." @@ -1240,9 +594,7 @@ def run( self.external_jobid[f] for f in job.input if f in self.external_jobid ) try: - submitcmd = job.format_wildcards( - self.submitcmd, dependencies=deps, cluster=self.cluster_wildcards(job) - ) + submitcmd = job.format_wildcards(self.submitcmd, dependencies=deps) except AttributeError as e: raise WorkflowError(str(e), rule=job.rule if not job.is_group() else None) @@ -1403,7 +755,7 @@ def job_status(job): ) -class SynchronousClusterExecutor(ClusterExecutor): +class SynchronousClusterExecutor(RemoteExecutor): """ invocations like "qsub -sync y" (SGE) or "bsub -K" (LSF) are synchronous, blocking the foreground thread and returning the @@ -1414,30 +766,19 @@ def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - cores, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, submitcmd="qsub", - cluster_config=None, jobname="snakejob.{rulename}.{jobid}.sh", - printreason=False, - quiet=False, - printshellcmds=False, - restart_times=0, - assume_shared_fs=True, - keepincomplete=False, ): super().__init__( workflow, dag, - cores, + stats, + logger, + None, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, - restart_times=restart_times, - assume_shared_fs=assume_shared_fs, max_status_checks_per_second=10, - keepincomplete=keepincomplete, ) self.submitcmd = submitcmd self.external_jobid = dict() @@ -1468,9 +809,7 @@ def run( self.external_jobid[f] for f in job.input if f in self.external_jobid ) try: - submitcmd = job.format_wildcards( - self.submitcmd, dependencies=deps, cluster=self.cluster_wildcards(job) - ) + submitcmd = job.format_wildcards(self.submitcmd, dependencies=deps) except AttributeError as e: raise WorkflowError(str(e), rule=job.rule if not job.is_group() else None) @@ -1525,37 +864,26 @@ async def _wait_for_jobs(self): ) -class DRMAAExecutor(ClusterExecutor): +class DRMAAExecutor(RemoteExecutor): def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - cores, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, jobname="snakejob.{rulename}.{jobid}.sh", - printreason=False, - quiet=False, - printshellcmds=False, drmaa_args="", drmaa_log_dir=None, - cluster_config=None, - restart_times=0, - assume_shared_fs=True, max_status_checks_per_second=1, - keepincomplete=False, ): super().__init__( workflow, dag, - cores, + stats, + logger, + None, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, - restart_times=restart_times, - assume_shared_fs=assume_shared_fs, max_status_checks_per_second=max_status_checks_per_second, - keepincomplete=keepincomplete, ) try: import drmaa @@ -1602,9 +930,7 @@ def run( self.write_jobscript(job, jobscript) try: - drmaa_args = job.format_wildcards( - self.drmaa_args, cluster=self.cluster_wildcards(job) - ) + drmaa_args = job.format_wildcards(self.drmaa_args) except AttributeError as e: raise WorkflowError(str(e), rule=job.rule) @@ -1733,38 +1059,28 @@ def change_working_directory(directory=None): ) -class KubernetesExecutor(ClusterExecutor): +class KubernetesExecutor(RemoteExecutor): def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, namespace, container_image=None, k8s_cpu_scalar=1.0, k8s_service_account_name=None, jobname="{rulename}.{jobid}", - printreason=False, - quiet=False, - printshellcmds=False, - cluster_config=None, - local_input=None, - restart_times=None, - keepincomplete=False, ): self.workflow = workflow super().__init__( workflow, dag, + stats, + logger, None, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, - local_input=local_input, - restart_times=restart_times, - assume_shared_fs=False, max_status_checks_per_second=10, disable_envvar_declarations=True, ) @@ -2201,24 +1517,30 @@ async def _wait_for_jobs(self): ) -class TibannaExecutor(ClusterExecutor): +class TibannaExecutor(RemoteExecutor): def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - cores, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, tibanna_sfn, precommand="", tibanna_config=False, container_image=None, - printreason=False, - quiet=False, - printshellcmds=False, - local_input=None, - restart_times=None, max_status_checks_per_second=1, - keepincomplete=False, ): + super().__init__( + workflow, + dag, + stats, + logger, + None, + max_status_checks_per_second=max_status_checks_per_second, + disable_default_remote_provider_args=True, + disable_default_resources_args=True, + disable_envvar_declarations=True, + ) self.workflow = workflow self.workflow_sources = [] for wfs in dag.get_sources(): @@ -2250,23 +1572,8 @@ def __init__( logger.debug("precommand= " + self.precommand) logger.debug("bucket=" + self.s3_bucket) logger.debug("subdir=" + self.s3_subdir) - self.quiet = quiet + self.quiet = workflow.quiet - super().__init__( - workflow, - dag, - cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - local_input=local_input, - restart_times=restart_times, - assume_shared_fs=False, - max_status_checks_per_second=max_status_checks_per_second, - disable_default_remote_provider_args=True, - disable_default_resources_args=True, - disable_envvar_declarations=True, - ) self.container_image = container_image or get_container_image() logger.info(f"Using {self.container_image} for Tibanna jobs.") self.tibanna_config = tibanna_config @@ -2683,7 +1990,7 @@ def run_wrapper( # Re-raise the keyboard interrupt in order to record an error in the # scheduler but ignore it raise e - except (Exception, BaseException) as ex: + except BaseException as ex: # this ensures that exception can be re-raised in the parent thread origin = get_exception_origin(ex, linemaps) if origin is not None: @@ -2701,5 +2008,5 @@ def run_wrapper( if benchmark is not None: try: write_benchmark_records(bench_records, benchmark) - except (Exception, BaseException) as ex: + except BaseException as ex: raise WorkflowError(ex) diff --git a/snakemake/executors/azure_batch.py b/snakemake/executors/azure_batch.py index d46ddd6e8..cc2077fd0 100644 --- a/snakemake/executors/azure_batch.py +++ b/snakemake/executors/azure_batch.py @@ -18,16 +18,18 @@ from typing import Optional from urllib.parse import urlparse +from snakemake_interface_executor_plugins.executors import RemoteExecutor +from snakemake_interface_executor_plugins.dag import DAGExecutorInterface +from snakemake_interface_executor_plugins.jobs import ExecutorJobInterface +from snakemake_interface_executor_plugins.workflow import WorkflowExecutorInterface +from snakemake_interface_executor_plugins.utils import sleep + +from snakemake.exceptions import WorkflowError import msrest.authentication as msa from snakemake.common import async_lock, bytesto, get_container_image, get_file_hash from snakemake.exceptions import WorkflowError -from snakemake.executors import ClusterExecutor, sleep -from snakemake.interfaces import ( - DAGExecutorInterface, - ExecutorJobInterface, - WorkflowExecutorInterface, -) +from snakemake.executors import sleep from snakemake.logging import logger AzBatchJob = namedtuple("AzBatchJob", "job jobid task_id callback error_callback") @@ -221,24 +223,20 @@ def signed_session(self, session=None): return super(AzureIdentityCredentialAdapter, self).signed_session(session) -class AzBatchExecutor(ClusterExecutor): +class AzBatchExecutor(RemoteExecutor): "Azure Batch Executor" def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - cores, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, jobname="snakejob.{name}.{jobid}.sh", - printreason=False, - quiet=False, - printshellcmds=False, container_image=None, regions=None, location=None, cache=False, - local_input=None, - restart_times=None, max_status_checks_per_second=1, az_batch_account_url=None, az_batch_enable_autoscale=False, @@ -246,14 +244,11 @@ def __init__( super().__init__( workflow, dag, + stats, + logger, None, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - restart_times=restart_times, - assume_shared_fs=False, - max_status_checks_per_second=1, + max_status_checks_per_second=max_status_checks_per_second, ) try: diff --git a/snakemake/executors/common.py b/snakemake/executors/common.py deleted file mode 100644 index 8f1f61df8..000000000 --- a/snakemake/executors/common.py +++ /dev/null @@ -1,34 +0,0 @@ -from collections import UserDict -from snakemake.io import not_iterable -from urllib.parse import urlparse - - -def format_cli_arg(flag, value, quote=True, skip=False): - if not skip and value: - if isinstance(value, bool): - value = "" - else: - value = format_cli_pos_arg(value, quote=quote) - return f"{flag} {value}" - return "" - - -def format_cli_pos_arg(value, quote=True): - if isinstance(value, (dict, UserDict)): - return join_cli_args(repr(f"{key}={val}") for key, val in value.items()) - elif not_iterable(value): - return repr(value) - else: - return join_cli_args(repr(v) for v in value) - - -def join_cli_args(args): - return " ".join(arg for arg in args if arg) - - -def url_can_parse(url: str) -> bool: - """ - returns true if urllib.parse.urlparse can parse - scheme and netloc - """ - return all(list(urlparse(url))[:2]) diff --git a/snakemake/executors/flux.py b/snakemake/executors/flux.py index 3e9c6b1a9..6b0ec1575 100644 --- a/snakemake/executors/flux.py +++ b/snakemake/executors/flux.py @@ -6,14 +6,16 @@ import os import shlex from collections import namedtuple -from snakemake.exceptions import WorkflowError -from snakemake.executors import ClusterExecutor, sleep -from snakemake.interfaces import ( - DAGExecutorInterface, - ExecutorJobInterface, - WorkflowExecutorInterface, -) +from snakemake_interface_executor_plugins.dag import DAGExecutorInterface +from snakemake_interface_executor_plugins.jobs import ExecutorJobInterface +from snakemake_interface_executor_plugins.workflow import WorkflowExecutorInterface +from snakemake_interface_executor_plugins.utils import sleep +from snakemake_interface_executor_plugins.executors.remote import RemoteExecutor +from snakemake_interface_executor_plugins.persistence import StatsExecutorInterface +from snakemake_interface_executor_plugins.logging import LoggerExecutorInterface + +from snakemake.exceptions import WorkflowError from snakemake.logging import logger from snakemake.resources import DefaultResources from snakemake.common import async_lock @@ -31,7 +33,7 @@ ) -class FluxExecutor(ClusterExecutor): +class FluxExecutor(RemoteExecutor): """ The Flux executor deploys workflows to a flux cluster. """ @@ -40,21 +42,17 @@ def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - cores, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, jobname="snakejob.{name}.{jobid}.sh", - printreason=False, - quiet=False, - printshellcmds=False, ): super().__init__( workflow, dag, + stats, + logger, None, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - assume_shared_fs=False, max_status_checks_per_second=10, ) diff --git a/snakemake/executors/ga4gh_tes.py b/snakemake/executors/ga4gh_tes.py index 446ddbe95..5588f303f 100644 --- a/snakemake/executors/ga4gh_tes.py +++ b/snakemake/executors/ga4gh_tes.py @@ -8,14 +8,16 @@ import os from collections import namedtuple -from snakemake.interfaces import ( - DAGExecutorInterface, - ExecutorJobInterface, - WorkflowExecutorInterface, -) +from snakemake_interface_executor_plugins.dag import DAGExecutorInterface +from snakemake_interface_executor_plugins.jobs import ExecutorJobInterface +from snakemake_interface_executor_plugins.workflow import WorkflowExecutorInterface +from snakemake_interface_executor_plugins.utils import sleep +from snakemake_interface_executor_plugins.executors.remote import RemoteExecutor +from snakemake_interface_executor_plugins.persistence import StatsExecutorInterface +from snakemake_interface_executor_plugins.logging import LoggerExecutorInterface + from snakemake.logging import logger from snakemake.exceptions import WorkflowError -from snakemake.executors import ClusterExecutor from snakemake.common import get_container_image, async_lock TaskExecutionServiceJob = namedtuple( @@ -23,24 +25,27 @@ ) -class TaskExecutionServiceExecutor(ClusterExecutor): +class TaskExecutionServiceExecutor(RemoteExecutor): def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - cores, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, jobname="snakejob.{name}.{jobid}.sh", - printreason=False, - quiet=False, - printshellcmds=False, - cluster_config=None, - local_input=None, - restart_times=None, - assume_shared_fs=False, max_status_checks_per_second=0.5, tes_url=None, container_image=None, ): + super().__init__( + workflow, + dag, + stats, + logger, + None, + jobname=jobname, + max_status_checks_per_second=max_status_checks_per_second, + ) try: import tes except ImportError: @@ -59,24 +64,8 @@ def __init__( user=os.environ.get("FUNNEL_SERVER_USER"), password=os.environ.get("FUNNEL_SERVER_PASSWORD"), ) - logger.info(f"[TES] Job execution on TES: {self.tes_url}") - super().__init__( - workflow, - dag, - None, - jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, - local_input=local_input, - restart_times=restart_times, - assume_shared_fs=assume_shared_fs, - max_status_checks_per_second=max_status_checks_per_second, - ) - def get_job_exec_prefix(self, job: ExecutorJobInterface): return "mkdir /tmp/conda && cd /tmp" diff --git a/snakemake/executors/google_lifesciences.py b/snakemake/executors/google_lifesciences.py index 84ad7b075..9b3bf4185 100644 --- a/snakemake/executors/google_lifesciences.py +++ b/snakemake/executors/google_lifesciences.py @@ -15,16 +15,17 @@ import re import math -from snakemake.interfaces import ( - DAGExecutorInterface, - ExecutorJobInterface, - WorkflowExecutorInterface, -) +from snakemake_interface_executor_plugins.dag import DAGExecutorInterface +from snakemake_interface_executor_plugins.jobs import ExecutorJobInterface +from snakemake_interface_executor_plugins.workflow import WorkflowExecutorInterface +from snakemake_interface_executor_plugins.executors.remote import RemoteExecutor +from snakemake_interface_executor_plugins.persistence import StatsExecutorInterface +from snakemake_interface_executor_plugins.logging import LoggerExecutorInterface + from snakemake.logging import logger from snakemake.exceptions import print_exception from snakemake.exceptions import log_verbose_traceback from snakemake.exceptions import WorkflowError -from snakemake.executors import ClusterExecutor, sleep from snakemake.common import bytesto, get_container_image, get_file_hash, async_lock from snakemake.resources import DefaultResources @@ -51,7 +52,7 @@ def check_source_size(filename, warning_size_gb=0.2): return filename -class GoogleLifeSciencesExecutor(ClusterExecutor): +class GoogleLifeSciencesExecutor(RemoteExecutor): """ The GoogleLifeSciences executor uses Google Cloud Storage, and Compute Engine paired with the Google Life Sciences API. @@ -62,11 +63,9 @@ def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - cores, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, jobname="snakejob.{name}.{jobid}.sh", - printreason=False, - quiet=False, - printshellcmds=False, container_image=None, regions=None, location=None, @@ -74,29 +73,24 @@ def __init__( service_account_email=None, network=None, subnetwork=None, - local_input=None, - restart_times=None, - max_status_checks_per_second=1, + max_status_checks_per_second=10, preemption_default=None, preemptible_rules=None, ): super().__init__( workflow, dag, + stats, + logger, None, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - restart_times=restart_times, - assume_shared_fs=False, - max_status_checks_per_second=10, + max_status_checks_per_second=max_status_checks_per_second, ) # Prepare workflow sources for build package self._set_workflow_sources() # Attach variables for easy access - self.quiet = quiet + self.quiet = workflow.quiet self.workdir = os.path.realpath(os.path.dirname(self.workflow.persistence.path)) self._save_storage_cache = cache diff --git a/snakemake/executors/slurm/slurm_jobstep.py b/snakemake/executors/slurm/slurm_jobstep.py index 0d44f8067..1c19404a8 100644 --- a/snakemake/executors/slurm/slurm_jobstep.py +++ b/snakemake/executors/slurm/slurm_jobstep.py @@ -1,15 +1,15 @@ import os import subprocess -from snakemake.common.tbdstring import TBDString -from snakemake.executors import ClusterExecutor -from snakemake.interfaces import ( - DAGExecutorInterface, - ExecutorJobInterface, - WorkflowExecutorInterface, -) +from snakemake_interface_executor_plugins.dag import DAGExecutorInterface +from snakemake_interface_executor_plugins.jobs import ExecutorJobInterface +from snakemake_interface_executor_plugins.workflow import WorkflowExecutorInterface +from snakemake_interface_executor_plugins.executors.remote import RemoteExecutor +from snakemake_interface_executor_plugins.persistence import StatsExecutorInterface +from snakemake_interface_executor_plugins.logging import LoggerExecutorInterface -class SlurmJobstepExecutor(ClusterExecutor): + +class SlurmJobstepExecutor(RemoteExecutor): """ executes SLURM jobsteps and is *only* instaniated in a SLURM job context @@ -19,13 +19,9 @@ def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - printreason=False, - quiet=False, - printshellcmds=False, - cluster_config=None, - restart_times=0, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, max_status_checks_per_second=0.5, - **kwargs, ): # overwrite the command to execute a single snakemake job if necessary # exec_job = "..." @@ -33,13 +29,9 @@ def __init__( super().__init__( workflow, dag, + stats, + logger, None, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, - restart_times=restart_times, - assume_shared_fs=True, max_status_checks_per_second=max_status_checks_per_second, disable_envvar_declarations=True, ) @@ -49,11 +41,6 @@ def __init__( self.cpus_on_node = os.getenv("SLURM_CPUS_ON_NODE") self.jobid = os.getenv("SLURM_JOB_ID") - self.context = dict(kwargs) - self.env_modules = self.context.get("env_modules", None) - - # if not self.mem_per_node - async def _wait_for_jobs(self): pass diff --git a/snakemake/executors/slurm/slurm_submit.py b/snakemake/executors/slurm/slurm_submit.py index 5412b01a6..182846344 100644 --- a/snakemake/executors/slurm/slurm_submit.py +++ b/snakemake/executors/slurm/slurm_submit.py @@ -7,15 +7,16 @@ import shlex import subprocess import uuid -from snakemake.interfaces import ( - DAGExecutorInterface, - ExecutorJobInterface, - WorkflowExecutorInterface, -) + +from snakemake_interface_executor_plugins.dag import DAGExecutorInterface +from snakemake_interface_executor_plugins.jobs import ExecutorJobInterface +from snakemake_interface_executor_plugins.workflow import WorkflowExecutorInterface +from snakemake_interface_executor_plugins.executors.remote import RemoteExecutor +from snakemake_interface_executor_plugins.persistence import StatsExecutorInterface +from snakemake_interface_executor_plugins.logging import LoggerExecutorInterface from snakemake.logging import logger from snakemake.exceptions import WorkflowError -from snakemake.executors import ClusterExecutor from snakemake.common import async_lock SlurmJob = namedtuple("SlurmJob", "job jobid callback error_callback slurm_logfile") @@ -86,7 +87,7 @@ def get_default_partition(job): return "" -class SlurmExecutor(ClusterExecutor): +class SlurmExecutor(RemoteExecutor): """ the SLURM_Executor abstracts execution on SLURM clusters using snakemake resource string @@ -96,26 +97,18 @@ def __init__( self, workflow: WorkflowExecutorInterface, dag: DAGExecutorInterface, - cores, + stats: StatsExecutorInterface, + logger: LoggerExecutorInterface, jobname="snakejob_{name}_{jobid}", - printreason=False, - quiet=False, - printshellcmds=False, - restart_times=0, max_status_checks_per_second=0.5, - cluster_config=None, ): super().__init__( workflow, dag, - cores, + stats, + logger, + None, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, - restart_times=restart_times, - assume_shared_fs=True, max_status_checks_per_second=max_status_checks_per_second, ) self.run_uuid = str(uuid.uuid4()) diff --git a/snakemake/interfaces.py b/snakemake/interfaces.py deleted file mode 100644 index 18b9ac609..000000000 --- a/snakemake/interfaces.py +++ /dev/null @@ -1,489 +0,0 @@ -__author__ = "Johannes Köster" -__copyright__ = "Copyright 2023, Johannes Köster" -__email__ = "johannes.koester@uni-due.de" -__license__ = "MIT" - -import sys - -from abc import ABC, abstractmethod -from typing import Optional, List, Dict - - -class ExecutorJobInterface(ABC): - @property - @abstractmethod - def name(self): - ... - - @property - @abstractmethod - def jobid(self): - ... - - @abstractmethod - def logfile_suggestion(self, prefix: str) -> str: - ... - - @abstractmethod - def is_group(self): - ... - - @abstractmethod - def log_info(self, skip_dynamic=False): - ... - - @abstractmethod - def log_error(self, msg=None, **kwargs): - ... - - @abstractmethod - def remove_existing_output(self): - ... - - @abstractmethod - def download_remote_input(self): - ... - - @abstractmethod - def properties(self, omit_resources=["_cores", "_nodes"], **aux_properties): - ... - - @property - @abstractmethod - def resources(self): - ... - - @abstractmethod - def check_protected_output(self): - ... - - @property - @abstractmethod - def is_local(self): - ... - - @property - @abstractmethod - def is_branched(self): - ... - - @property - @abstractmethod - def is_updated(self): - ... - - @property - @abstractmethod - def output(self): - ... - - @abstractmethod - def register(self): - ... - - @abstractmethod - def postprocess(self): - ... - - @abstractmethod - def get_target_spec(self): - ... - - @abstractmethod - def rules(self): - ... - - @property - @abstractmethod - def attempt(self): - ... - - @property - @abstractmethod - def input(self): - ... - - @property - @abstractmethod - def threads(self) -> int: - ... - - @property - @abstractmethod - def resources(self): - ... - - @property - @abstractmethod - def log(self): - ... - - @abstractmethod - def cleanup(self): - ... - - @abstractmethod - def get_wait_for_files(self): - ... - - @abstractmethod - def format_wildcards(self, string, **variables): - ... - - @property - @abstractmethod - def needs_singularity(self): - ... - - -class SingleJobExecutorInterface(ABC): - @property - @abstractmethod - def rule(self): - ... - - @abstractmethod - def prepare(self): - ... - - @property - @abstractmethod - def conda_env(self): - ... - - @property - @abstractmethod - def container_img_path(self): - ... - - @property - @abstractmethod - def env_modules(self): - ... - - @property - @abstractmethod - def benchmark_repeats(self): - ... - - @property - @abstractmethod - def benchmark(self): - ... - - @property - @abstractmethod - def params(self): - ... - - @property - @abstractmethod - def wildcards(self): - ... - - @property - @abstractmethod - def shadow_dir(self): - ... - - @property - @abstractmethod - def is_shadow(self): - ... - - @property - @abstractmethod - def is_run(self): - ... - - @property - @abstractmethod - def is_template_engine(self): - ... - - @property - @abstractmethod - def message(self): - ... - - -class GroupJobExecutorInterface(ABC): - @property - @abstractmethod - def jobs(self): - ... - - @property - @abstractmethod - def groupid(self): - ... - - @property - @abstractmethod - def toposorted(self): - ... - - -class DAGExecutorInterface(ABC): - @abstractmethod - def is_edit_notebook_job(self, job: ExecutorJobInterface): - ... - - @abstractmethod - def incomplete_external_jobid(self, job: ExecutorJobInterface): - ... - - @abstractmethod - def jobid(self, job: ExecutorJobInterface): - ... - - @abstractmethod - def get_sources(self): - ... - - -class JobSchedulerExecutorInterface(ABC): - @abstractmethod - def executor_error_callback(self, exception): - ... - - -class PersistenceExecutorInterface(ABC): - @abstractmethod - def cleanup(self): - ... - - @property - @abstractmethod - def path(self): - ... - - @property - @abstractmethod - def aux_path(self): - ... - - -class WorkflowExecutorInterface(ABC): - @property - @abstractmethod - def latency_wait(self) -> int: - ... - - @property - @abstractmethod - def rerun_triggers(self) -> Optional[List[str]]: - ... - - @property - @abstractmethod - def shadow_prefix(self) -> Optional[str]: - ... - - @property - @abstractmethod - def conda_frontend(self) -> Optional[str]: - ... - - @property - @abstractmethod - def conda_prefix(self) -> Optional[str]: - ... - - @property - @abstractmethod - def conda_base_path(self) -> Optional[str]: - ... - - @property - @abstractmethod - def singularity_args(self) -> Optional[str]: - ... - - @property - @abstractmethod - def execute_subworkflows(self) -> bool: - ... - - @property - @abstractmethod - def max_threads(self) -> Optional[int]: - ... - - @property - @abstractmethod - def keep_metadata(self) -> bool: - ... - - @property - @abstractmethod - def wrapper_prefix(self) -> Optional[str]: - ... - - @property - @abstractmethod - def overwrite_threads(self) -> Dict[str, int]: - ... - - @property - @abstractmethod - def overwrite_scatter(self) -> Dict[str, int]: - ... - - @property - @abstractmethod - def local_groupid(self): - ... - - @property - @abstractmethod - def conda_not_block_search_path_envvars(self): - ... - - @property - @abstractmethod - def overwrite_configfiles(self): - ... - - @property - @abstractmethod - def config_args(self): - ... - - @property - @abstractmethod - def printshellcmds(self): - ... - - @property - @abstractmethod - def scheduler_type(self): - ... - - @property - @abstractmethod - def overwrite_resources(self): - ... - - @property - @abstractmethod - def default_resources(self): - ... - - @property - @abstractmethod - def overwrite_resource_scopes(self): - ... - - @property - @abstractmethod - def resource_scopes(self): - ... - - @abstractmethod - def get_cache_mode(self, rule): - ... - - @property - @abstractmethod - def output_file_cache(self): - ... - - @property - @abstractmethod - def main_snakefile(self): - ... - - @property - @abstractmethod - def persistence(self) -> PersistenceExecutorInterface: - ... - - @property - @abstractmethod - def keep_metadata(self): - ... - - @property - @abstractmethod - def linemaps(self): - ... - - @property - @abstractmethod - def workdir_init(self): - ... - - @property - @abstractmethod - def use_conda(self): - ... - - @property - @abstractmethod - def use_singularity(self): - ... - - @property - @abstractmethod - def use_env_modules(self): - ... - - @property - @abstractmethod - def debug(self): - ... - - @property - @abstractmethod - def cleanup_scripts(self): - ... - - @property - @abstractmethod - def edit_notebook(self): - ... - - @property - @abstractmethod - def sourcecache(self): - ... - - @property - @abstractmethod - def verbose(self): - ... - - @property - @abstractmethod - def jobscript(self): - ... - - @property - @abstractmethod - def envvars(self): - ... - - @property - @abstractmethod - def scheduler(self) -> JobSchedulerExecutorInterface: - ... - - @property - @abstractmethod - def immediate_submit(self): - ... - - @property - @abstractmethod - def default_remote_prefix(self): - ... - - @property - @abstractmethod - def rules(self): - ... - - @abstractmethod - def get_rule(self, name): - ... diff --git a/snakemake/io.py b/snakemake/io.py index 227f46eb2..638142efa 100755 --- a/snakemake/io.py +++ b/snakemake/io.py @@ -986,14 +986,6 @@ def format_match(match): return _wildcard_regex.sub(format_match, pattern) -def not_iterable(value): - return ( - isinstance(value, str) - or isinstance(value, dict) - or not isinstance(value, collections.abc.Iterable) - ) - - def is_callable(value): return ( callable(value) @@ -1015,6 +1007,8 @@ def new_from(self, new_value): def flag(value, flag_type, flag_value=True): + from snakemake_interface_executor_plugins.utils import not_iterable + if isinstance(value, AnnotatedString): value.flags[flag_type] = flag_value return value @@ -1109,6 +1103,8 @@ def dynamic(value): A flag for a file that shall be dynamic, i.e. the multiplicity (and wildcard values) will be expanded after a certain rule has been run""" + from snakemake_interface_executor_plugins.utils import not_iterable + annotated = flag(value, "dynamic", True) tocheck = [annotated] if not_iterable(annotated) else annotated for file in tocheck: diff --git a/snakemake/jobs.py b/snakemake/jobs.py index 3c428b2e7..2ff8a1f22 100644 --- a/snakemake/jobs.py +++ b/snakemake/jobs.py @@ -15,7 +15,9 @@ from operator import attrgetter from typing import Optional from abc import ABC, abstractmethod -from snakemake.interfaces import ( + +from snakemake_interface_executor_plugins.utils import lazy_property +from snakemake_interface_executor_plugins.jobs import ( ExecutorJobInterface, GroupJobExecutorInterface, SingleJobExecutorInterface, @@ -38,7 +40,6 @@ from snakemake.common import ( DYNAMIC_FILL, is_local_file, - lazy_property, get_uuid, TBDString, IO_PROP_LIMIT, @@ -141,8 +142,6 @@ def new( class Job(AbstractJob, SingleJobExecutorInterface): - HIGHEST_PRIORITY = sys.maxsize - obj_cache = dict() __slots__ = [ @@ -1082,7 +1081,9 @@ def log_info(self, skip_dynamic=False, indent=False, printshellcmd=True): wildcards=self.wildcards_dict, reason=str(self.dag.reason(self)), resources=self.resources, - priority="highest" if priority == Job.HIGHEST_PRIORITY else priority, + priority="highest" + if priority == ExecutorJobInterface.HIGHEST_PRIORITY + else priority, threads=self.threads, indent=indent, is_checkpoint=self.rule.is_checkpoint, diff --git a/snakemake/logging.py b/snakemake/logging.py index 5af979168..6617a7099 100644 --- a/snakemake/logging.py +++ b/snakemake/logging.py @@ -15,8 +15,9 @@ import inspect import textwrap +from snakemake_interface_executor_plugins.utils import ExecMode + from snakemake.common import DYNAMIC_FILL -from snakemake.common import Mode class ColorizingStreamHandler(_logging.StreamHandler): @@ -34,7 +35,7 @@ class ColorizingStreamHandler(_logging.StreamHandler): } def __init__( - self, nocolor=False, stream=sys.stderr, use_threads=False, mode=Mode.default + self, nocolor=False, stream=sys.stderr, use_threads=False, mode=ExecMode.default ): super().__init__(stream=stream) @@ -45,7 +46,7 @@ def __init__( def can_color_tty(self, mode): if "TERM" in os.environ and os.environ["TERM"] == "dumb": return False - if mode == Mode.subprocess: + if mode == ExecMode.subprocess: return True return self.is_tty and not platform.system() == "Windows" @@ -292,13 +293,13 @@ def __init__(self): self.quiet = set() self.logfile = None self.last_msg_was_job_info = False - self.mode = Mode.default + self.mode = ExecMode.default self.show_failed_logs = False self.logfile_handler = None self.dryrun = False def setup_logfile(self): - if self.mode == Mode.default and not self.dryrun: + if self.mode == ExecMode.default and not self.dryrun: os.makedirs(os.path.join(".snakemake", "log"), exist_ok=True) self.logfile = os.path.abspath( os.path.join( @@ -313,7 +314,7 @@ def setup_logfile(self): self.logger.addHandler(self.logfile_handler) def cleanup(self): - if self.mode == Mode.default and self.logfile_handler is not None: + if self.mode == ExecMode.default and self.logfile_handler is not None: self.logger.removeHandler(self.logfile_handler) self.logfile_handler.close() self.log_handler = [self.text_handler] @@ -338,7 +339,7 @@ def set_level(self, level): self.logger.setLevel(level) def logfile_hint(self): - if self.mode == Mode.default and not self.dryrun: + if self.mode == ExecMode.default and not self.dryrun: logfile = self.get_logfile() self.info(f"Complete log: {os.path.relpath(logfile)}") @@ -685,13 +686,13 @@ def setup_logger( handler=[], quiet=False, printshellcmds=False, - printreason=False, + printreason=True, debug_dag=False, nocolor=False, stdout=False, debug=False, use_threads=False, - mode=Mode.default, + mode=ExecMode.default, show_failed_logs=False, dryrun=False, ): diff --git a/snakemake/persistence.py b/snakemake/persistence.py index b13524952..90779c68c 100755 --- a/snakemake/persistence.py +++ b/snakemake/persistence.py @@ -15,8 +15,11 @@ from itertools import count from pathlib import Path +from snakemake_interface_executor_plugins.persistence import ( + PersistenceExecutorInterface, +) + import snakemake.exceptions -from snakemake.interfaces import PersistenceExecutorInterface from snakemake.logging import logger from snakemake.jobs import jobfiles from snakemake.utils import listfiles diff --git a/snakemake/remote/EGA.py b/snakemake/remote/EGA.py index 303d540bc..3952ab817 100644 --- a/snakemake/remote/EGA.py +++ b/snakemake/remote/EGA.py @@ -16,7 +16,7 @@ check_deprecated_retry, ) from snakemake.exceptions import WorkflowError -from snakemake.common import lazy_property +from snakemake_interface_executor_plugins.utils import lazy_property EGAFileInfo = namedtuple("EGAFileInfo", ["size", "status", "id", "checksum"]) diff --git a/snakemake/remote/GS.py b/snakemake/remote/GS.py index 5edc6a183..fcd712a63 100644 --- a/snakemake/remote/GS.py +++ b/snakemake/remote/GS.py @@ -10,8 +10,8 @@ from snakemake.remote import AbstractRemoteObject, AbstractRemoteProvider from snakemake.exceptions import WorkflowError, CheckSumMismatchException -from snakemake.common import lazy_property import snakemake.io +from snakemake_interface_executor_plugins.utils import lazy_property try: import google.cloud diff --git a/snakemake/report/__init__.py b/snakemake/report/__init__.py index 61c9c4afd..ea8424df0 100644 --- a/snakemake/report/__init__.py +++ b/snakemake/report/__init__.py @@ -42,12 +42,13 @@ from snakemake.script import Snakemake from snakemake.common import ( get_input_function_aux_params, - lazy_property, ) from snakemake import logging from snakemake.report import data from snakemake.report.rulegraph_spec import rulegraph_spec +from snakemake_interface_executor_plugins.utils import lazy_property + class EmbeddedMixin(object): """ @@ -572,7 +573,7 @@ def auto_report(dag, path, stylesheet=None): try: with open(stylesheet) as s: custom_stylesheet = s.read() - except (Exception, BaseException) as e: + except BaseException as e: raise WorkflowError("Unable to read custom report stylesheet.", e) logger.info("Creating report...") diff --git a/snakemake/rules.py b/snakemake/rules.py index 765d15230..3f51e5542 100644 --- a/snakemake/rules.py +++ b/snakemake/rules.py @@ -17,6 +17,8 @@ except ImportError: # python < 3.11 import sre_constants +from snakemake_interface_executor_plugins.utils import ExecMode + from snakemake.io import ( IOFile, _IOFile, @@ -37,7 +39,6 @@ apply_wildcards, is_flagged, flag, - not_iterable, is_callable, DYNAMIC_FILL, ReportObject, @@ -52,15 +53,14 @@ ) from snakemake.logging import logger from snakemake.common import ( - Mode, ON_WINDOWS, get_function_params, get_input_function_aux_params, - lazy_property, TBDString, mb_to_mib, ) from snakemake.resources import infer_resources +from snakemake_interface_executor_plugins.utils import not_iterable, lazy_property class Rule: @@ -579,7 +579,7 @@ def _set_inoutput_item(self, item, output=False, name=None): else: if ( contains_wildcard_constraints(item) - and self.workflow.mode != Mode.subprocess + and self.workflow.mode != ExecMode.subprocess ): logger.warning( "Wildcard constraints in inputs are ignored. (rule: {})".format( @@ -777,7 +777,7 @@ def apply_input_function( value = TBDString() else: raise e - except (Exception, BaseException) as e: + except BaseException as e: if raw_exceptions: raise e else: @@ -1091,7 +1091,7 @@ def apply(name, res, threads=None): raw_exceptions=True, **aux, ) - except (Exception, BaseException) as e: + except BaseException as e: raise InputFunctionException(e, rule=self, wildcards=wildcards) if isinstance(res, float): diff --git a/snakemake/scheduler.py b/snakemake/scheduler.py index 589bcdc9e..7526a97ac 100644 --- a/snakemake/scheduler.py +++ b/snakemake/scheduler.py @@ -10,6 +10,9 @@ from itertools import chain, accumulate from contextlib import ContextDecorator +from snakemake_interface_executor_plugins.scheduler import JobSchedulerExecutorInterface +from snakemake_interface_executor_plugins.registry import ExecutorPluginRegistry + from snakemake.executors import ( AbstractExecutor, DryrunExecutor, @@ -23,6 +26,7 @@ KubernetesExecutor, TibannaExecutor, ) + from snakemake.executors.slurm.slurm_submit import SlurmExecutor from snakemake.executors.slurm.slurm_jobstep import SlurmJobstepExecutor from snakemake.executors.flux import FluxExecutor @@ -30,10 +34,12 @@ from snakemake.executors.ga4gh_tes import TaskExecutionServiceExecutor from snakemake.exceptions import RuleException, WorkflowError, print_exception from snakemake.common import ON_WINDOWS -from snakemake.interfaces import JobSchedulerExecutorInterface from snakemake.logging import logger from fractions import Fraction +from snakemake.stats import Stats + +registry = ExecutorPluginRegistry() def cumsum(iterable, zero=[0]): @@ -70,7 +76,6 @@ def __init__( slurm_jobstep=None, cluster=None, cluster_status=None, - cluster_config=None, cluster_sync=None, cluster_cancel=None, cluster_cancel_nargs=None, @@ -101,38 +106,33 @@ def __init__( preemptible_rules=None, tibanna_config=False, jobname=None, - quiet=False, - printreason=False, - printshellcmds=False, keepgoing=False, max_jobs_per_second=None, max_status_checks_per_second=100, + # Note this argument doesn't seem to be used (greediness) greediness=1.0, force_use_threads=False, - assume_shared_fs=True, - keepincomplete=False, scheduler_type=None, scheduler_ilp_solver=None, + executor_args=None, ): """Create a new instance of KnapsackJobScheduler.""" cores = workflow.global_resources["_cores"] self.cluster = cluster - self.cluster_config = cluster_config self.cluster_sync = cluster_sync self.dag = dag self.workflow = workflow self.dryrun = dryrun self.touch = touch - self.quiet = quiet + self.quiet = workflow.quiet self.keepgoing = keepgoing self.running = set() self.failed = set() self.finished_jobs = 0 self.greediness = 1 self.max_jobs_per_second = max_jobs_per_second - self.keepincomplete = keepincomplete self.scheduler_type = scheduler_type self.scheduler_ilp_solver = scheduler_ilp_solver self._tofinish = [] @@ -164,35 +164,50 @@ def __init__( self._submit_callback = self._noop self._finish_callback = self._proceed + self._stats = Stats() + self._local_executor = None if dryrun: self._executor: AbstractExecutor = DryrunExecutor( workflow, dag, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, ) elif touch: self._executor = TouchExecutor( workflow, dag, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, + self.stats, + logger, + ) + + # We have chosen an executor custom plugin + elif executor_args is not None: + plugin = registry.plugins[executor_args._executor.name] + self._local_executor = CPUExecutor( + workflow, + dag, + self.stats, + logger, + local_cores, ) + self._executor = plugin.executor( + workflow, + dag, + self.stats, + logger, + cores, + executor_args=executor_args, + ) + elif slurm: if ON_WINDOWS: raise WorkflowError("SLURM execution is not supported on Windows.") self._local_executor = CPUExecutor( workflow, dag, + self.stats, + logger, local_cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cores=local_cores, - keepincomplete=keepincomplete, ) # we need to adjust the maximum status checks per second # on a SLURM cluster, to not overstrain the scheduler; @@ -217,11 +232,8 @@ def __init__( self._executor = SlurmExecutor( workflow, dag, - cores=None, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, + self.stats, + logger, max_status_checks_per_second=max_status_checks_per_second, ) @@ -229,10 +241,8 @@ def __init__( self._executor = SlurmJobstepExecutor( workflow, dag, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - env_modules=env_modules, + self.stats, + logger, ) self._local_executor = self._executor @@ -243,12 +253,9 @@ def __init__( self._local_executor = CPUExecutor( workflow, dag, + self.stats, + logger, local_cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cores=local_cores, - keepincomplete=keepincomplete, ) if cluster or cluster_sync: @@ -267,15 +274,10 @@ def __init__( self._executor = constructor( workflow, dag, - None, + self.stats, + logger, submitcmd=(cluster or cluster_sync), - cluster_config=cluster_config, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - assume_shared_fs=assume_shared_fs, - keepincomplete=keepincomplete, ) if workflow.immediate_submit: self._submit_callback = self._proceed @@ -287,88 +289,68 @@ def __init__( self._executor = DRMAAExecutor( workflow, dag, - None, + self.stats, + logger, drmaa_args=drmaa, drmaa_log_dir=drmaa_log_dir, jobname=jobname, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, - assume_shared_fs=assume_shared_fs, max_status_checks_per_second=max_status_checks_per_second, - keepincomplete=keepincomplete, ) elif kubernetes: self._local_executor = CPUExecutor( workflow, dag, + self.stats, + logger, local_cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cores=local_cores, - keepincomplete=keepincomplete, ) self._executor = KubernetesExecutor( workflow, dag, + self.stats, + logger, kubernetes, container_image=container_image, k8s_cpu_scalar=k8s_cpu_scalar, k8s_service_account_name=k8s_service_account_name, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cluster_config=cluster_config, - keepincomplete=keepincomplete, ) elif tibanna: self._local_executor = CPUExecutor( workflow, dag, + self.stats, + logger, local_cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, use_threads=use_threads, - cores=local_cores, - keepincomplete=keepincomplete, ) self._executor = TibannaExecutor( workflow, dag, + self.stats, + logger, cores, tibanna_sfn, precommand=precommand, tibanna_config=tibanna_config, container_image=container_image, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - keepincomplete=keepincomplete, ) elif flux: self._local_executor = CPUExecutor( workflow, dag, + self.stats, + logger, local_cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cores=local_cores, ) self._executor = FluxExecutor( workflow, dag, - cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, + self.stats, + logger, ) elif az_batch: @@ -383,39 +365,34 @@ def __init__( self._local_executor = CPUExecutor( workflow, dag, + self.stats, + logger, local_cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cores=local_cores, ) self._executor = AzBatchExecutor( workflow, dag, - cores, + self.stats, + logger, container_image=container_image, az_batch_account_url=az_batch_account_url, az_batch_enable_autoscale=az_batch_enable_autoscale, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, ) elif google_lifesciences: self._local_executor = CPUExecutor( workflow, dag, + self.stats, + logger, local_cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cores=local_cores, ) self._executor = GoogleLifeSciencesExecutor( workflow, dag, - cores, + self.stats, + logger, container_image=container_image, regions=google_lifesciences_regions, location=google_lifesciences_location, @@ -423,9 +400,6 @@ def __init__( service_account_email=google_lifesciences_service_account_email, network=google_lifesciences_network, subnetwork=google_lifesciences_subnetwork, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, preemption_default=preemption_default, preemptible_rules=preemptible_rules, ) @@ -433,21 +407,16 @@ def __init__( self._local_executor = CPUExecutor( workflow, dag, + self.stats, + logger, local_cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, - cores=local_cores, - keepincomplete=keepincomplete, ) self._executor = TaskExecutionServiceExecutor( workflow, dag, - cores=local_cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, + self.stats, + logger, tes_url=tes, container_image=container_image, ) @@ -456,13 +425,10 @@ def __init__( self._executor = CPUExecutor( workflow, dag, + self.stats, + logger, cores, - printreason=printreason, - quiet=quiet, - printshellcmds=printshellcmds, use_threads=use_threads, - cores=cores, - keepincomplete=keepincomplete, ) from throttler import Throttler @@ -507,10 +473,7 @@ def executor_error_callback(self, exception): @property def stats(self): - try: - return self._executor.stats - except AttributeError: - raise TypeError("Executor does not support stats") + return self._stats @property def open_jobs(self): diff --git a/snakemake/script.py b/snakemake/script.py index db16ea55b..b5783c021 100644 --- a/snakemake/script.py +++ b/snakemake/script.py @@ -34,8 +34,8 @@ from snakemake.shell import shell from snakemake.common import ( MIN_PY_VERSION, - SNAKEMAKE_SEARCHPATH, ON_WINDOWS, + get_snakemake_searchpaths, ) from snakemake.deployment import singularity @@ -543,33 +543,34 @@ def generate_preamble( # Obtain search path for current snakemake module. # The module is needed for unpickling in the script. # We append it at the end (as a fallback). - searchpath = SNAKEMAKE_SEARCHPATH + searchpaths = get_snakemake_searchpaths() if container_img is not None: - searchpath = singularity.SNAKEMAKE_MOUNTPOINT - searchpath = repr(searchpath) + searchpaths = singularity.get_snakemake_searchpath_mountpoints() # Add the cache path to the search path so that other cached source files in the same dir # can be imported. if cache_path: + # TODO handle this in case of container_img, analogously to above cache_searchpath = os.path.dirname(cache_path) if cache_searchpath: - searchpath += ", " + repr(cache_searchpath) + searchpaths.append(cache_searchpath) # For local scripts, add their location to the path in case they use path-based imports if is_local: - searchpath += ", " + repr(path.get_basedir().get_path_or_uri()) + searchpaths.append(path.get_basedir().get_path_or_uri()) - return textwrap.dedent( + preamble = textwrap.dedent( """ ######## snakemake preamble start (automatically inserted, do not edit) ######## - import sys; sys.path.extend([{searchpath}]); import pickle; snakemake = pickle.loads({snakemake}); from snakemake.logging import logger; logger.printshellcmds = {printshellcmds}; {preamble_addendum} + import sys; sys.path.extend({searchpaths}); import pickle; snakemake = pickle.loads({snakemake}); from snakemake.logging import logger; logger.printshellcmds = {printshellcmds}; {preamble_addendum} ######## snakemake preamble end ######### """ ).format( - searchpath=searchpath, + searchpaths=repr(searchpaths), snakemake=snakemake, printshellcmds=logger.printshellcmds, preamble_addendum=preamble_addendum, ) + return preamble def get_preamble(self): if isinstance(self.path, LocalSourceFile): @@ -1042,16 +1043,6 @@ def encode_namedlist(values): json_string = json.dumps(dict(snakemake)) - # Obtain search path for current snakemake module. - # We append it at the end (as a fallback). - searchpath = SNAKEMAKE_SEARCHPATH - if container_img is not None: - searchpath = singularity.SNAKEMAKE_MOUNTPOINT - searchpath = repr(searchpath) - # For local scripts, add their location to the path in case they use path-based imports - if is_local: - searchpath += ", " + repr(path.get_basedir().get_path_or_uri()) - return textwrap.dedent( """ json_typegen::json_typegen!("Snakemake", r###"{json_string}"###, {{ @@ -1150,17 +1141,6 @@ def encode_namedlist(values): .open(path)?; Ok(gag::Redirect::stdout(log)?) }} - - fn setup_path(&self) -> anyhow::Result<()> {{ - use std::env; - if let Some(path) = env::var_os("PATH") {{ - let mut paths = env::split_paths(&path).collect::>(); - paths.push(std::path::PathBuf::from("{searchpath}")); - let new_path = env::join_paths(paths)?; - env::set_var("PATH", &new_path); - }} - Ok(()) - }} }} lazy_static::lazy_static! {{ @@ -1168,14 +1148,12 @@ def encode_namedlist(values): #[allow(non_upper_case_globals)] static ref snakemake: Snakemake = {{ let s: Snakemake = serde_json::from_str(r###"{json_string}"###).expect("Failed parsing snakemake JSON"); - s.setup_path().expect("Failed setting PATH"); s }}; }} // TODO include addendum, if any {{preamble_addendum}} """ ).format( - searchpath=searchpath, json_string=json_string, preamble_addendum=preamble_addendum, ) diff --git a/snakemake/stats.py b/snakemake/stats.py index 60074dc0f..05d96075b 100644 --- a/snakemake/stats.py +++ b/snakemake/stats.py @@ -1,13 +1,8 @@ -__author__ = "Johannes Köster" -__copyright__ = "Copyright 2022, Johannes Köster" -__email__ = "johannes.koester@uni-due.de" -__license__ = "MIT" - -import time -import json from collections import defaultdict +import json +import time -import snakemake.jobs +from snakemake_interface_executor_plugins.jobs import ExecutorJobInterface fmt_time = time.ctime @@ -68,7 +63,7 @@ def to_json(self, path): "stop-time": stop, "duration": duration, "priority": job.priority - if job.priority != snakemake.jobs.Job.HIGHEST_PRIORITY + if job.priority != ExecutorJobInterface.HIGHEST_PRIORITY else "highest", "resources": dict(job.resources.items()), } diff --git a/snakemake/target_jobs.py b/snakemake/target_jobs.py index 30948319a..d74951750 100644 --- a/snakemake/target_jobs.py +++ b/snakemake/target_jobs.py @@ -1,10 +1,9 @@ from collections import namedtuple import typing -from snakemake.common import parse_key_value_arg - +from snakemake_interface_executor_plugins.utils import TargetSpec -TargetSpec = namedtuple("TargetSpec", ["rulename", "wildcards_dict"]) +from snakemake.common import parse_key_value_arg def parse_target_jobs_cli_args(args): @@ -25,15 +24,3 @@ def parse_wildcard(entry): else: target_jobs.append(TargetSpec(rulename, dict())) return target_jobs - - -def encode_target_jobs_cli_args( - target_jobs: typing.List[TargetSpec], -) -> typing.List[str]: - items = [] - for spec in target_jobs: - wildcards = ",".join( - f"{key}={value}" for key, value in spec.wildcards_dict.items() - ) - items.append(f"{spec.rulename}:{wildcards}") - return items diff --git a/snakemake/utils.py b/snakemake/utils.py index 0d427c3fc..7f7f17610 100644 --- a/snakemake/utils.py +++ b/snakemake/utils.py @@ -7,7 +7,6 @@ import json import re import inspect -from snakemake.sourcecache import LocalSourceFile, infer_source_file import textwrap from itertools import chain import collections @@ -38,6 +37,8 @@ def validate(data, schema, set_default=True): https://python-jsonschema.readthedocs.io/en/latest/faq/ for more information """ + from snakemake.sourcecache import LocalSourceFile, infer_source_file + frame = inspect.currentframe().f_back workflow = frame.f_globals.get("workflow") diff --git a/snakemake/workflow.py b/snakemake/workflow.py index 2819977ca..efbd668b6 100644 --- a/snakemake/workflow.py +++ b/snakemake/workflow.py @@ -11,8 +11,9 @@ from functools import partial import copy from pathlib import Path -from snakemake.interfaces import WorkflowExecutorInterface +from snakemake_interface_executor_plugins.workflow import WorkflowExecutorInterface +from snakemake_interface_executor_plugins.utils import ExecMode from snakemake.logging import logger, format_resources from snakemake.rules import Rule, Ruleorder, RuleProxy @@ -24,7 +25,6 @@ NoRulesException, WorkflowError, ) -from snakemake.shell import shell from snakemake.dag import DAG from snakemake.scheduler import JobScheduler from snakemake.parser import parse @@ -39,7 +39,6 @@ dynamic, glob_wildcards, flag, - not_iterable, touch, unpack, local, @@ -60,11 +59,10 @@ from snakemake.wrapper import wrapper from snakemake.cwl import cwl from snakemake.template_rendering import render_template - +from snakemake_interface_executor_plugins.utils import not_iterable import snakemake.wrapper from snakemake.common import ( - Mode, ON_WINDOWS, is_local_file, Rules, @@ -119,7 +117,7 @@ def __init__( shadow_prefix=None, scheduler_type="ilp", scheduler_ilp_solver=None, - mode=Mode.default, + mode=ExecMode.default, wrapper_prefix=None, printshellcmds=False, restart_times=None, @@ -147,8 +145,11 @@ def __init__( local_groupid="local", keep_metadata=True, latency_wait=3, + executor_args=None, cleanup_scripts=True, immediate_submit=False, + keep_incomplete=False, + quiet=False, ): """ Create the controller. @@ -215,7 +216,7 @@ def __init__( [] if overwrite_configfiles is None else list(overwrite_configfiles) ) self.run_local = run_local - self.assume_shared_fs = assume_shared_fs + self._assume_shared_fs = assume_shared_fs self.report_text = None self.conda_cleanup_pkgs = conda_cleanup_pkgs self._edit_notebook = edit_notebook @@ -238,14 +239,19 @@ def __init__( self.check_envvars = check_envvars self._max_threads = max_threads self.all_temp = all_temp + self._executor_args = executor_args self._scheduler = None self._local_groupid = local_groupid self._keep_metadata = keep_metadata self._latency_wait = latency_wait + self._keep_incomplete = keep_incomplete + self._quiet = quiet _globals = globals() + from snakemake.shell import shell + + _globals["shell"] = shell _globals["workflow"] = self - _globals["cluster_config"] = copy.deepcopy(self.overwrite_clusterconfig) _globals["checkpoints"] = Checkpoints() _globals["scatter"] = Scatter() _globals["gather"] = Gather() @@ -283,6 +289,22 @@ def __init__( if envvars is not None: self.register_envvars(*envvars) + @property + def quiet(self): + return self._quiet + + @property + def assume_shared_fs(self): + return self._assume_shared_fs + + @property + def keep_incomplete(self): + return self._keep_incomplete + + @property + def executor_args(self): + return self._executor_args + @property def default_remote_prefix(self): return self._default_remote_prefix @@ -653,10 +675,7 @@ def execute( until=[], omit_from=[], prioritytargets=None, - quiet=False, keepgoing=False, - printshellcmds=False, - printreason=False, printdag=False, slurm=None, slurm_jobstep=None, @@ -717,6 +736,7 @@ def execute( subsnakemake=None, updated_files=None, keep_target_files=False, + # Note that keep_shadow doesn't seem to be used? keep_shadow=False, keep_remote_local=False, allowed_rules=None, @@ -861,7 +881,7 @@ def files(items): or delete_temp_output, ) - if self.mode in [Mode.subprocess, Mode.cluster]: + if self.mode in [ExecMode.subprocess, ExecMode.remote]: self.persistence.deactivate_cache() if cleanup_metadata: @@ -1115,12 +1135,10 @@ def files(items): cluster_cancel=cluster_cancel, cluster_cancel_nargs=cluster_cancel_nargs, cluster_sidecar=cluster_sidecar, - cluster_config=cluster_config, cluster_sync=cluster_sync, jobname=jobname, max_jobs_per_second=max_jobs_per_second, max_status_checks_per_second=max_status_checks_per_second, - quiet=quiet, keepgoing=keepgoing, drmaa=drmaa, drmaa_log_dir=drmaa_log_dir, @@ -1146,18 +1164,17 @@ def files(items): precommand=precommand, tibanna_config=tibanna_config, container_image=container_image, - printreason=printreason, - printshellcmds=printshellcmds, greediness=greediness, force_use_threads=force_use_threads, - assume_shared_fs=self.assume_shared_fs, - keepincomplete=keepincomplete, scheduler_type=scheduler_type, scheduler_ilp_solver=scheduler_ilp_solver, + executor_args=self.executor_args, ) if not dryrun: if len(dag): + from snakemake.shell import shell + shell_exec = shell.get_executable() if shell_exec is not None: logger.info(f"Using shell: {shell_exec}") @@ -1192,7 +1209,7 @@ def files(items): ): logger.info("Singularity containers: ignored") - if self.mode == Mode.default: + if self.mode == ExecMode.default: logger.run_info("\n".join(dag.stats())) else: logger.info(NOTHING_TO_BE_DONE_MSG) @@ -1203,7 +1220,7 @@ def files(items): else: logger.info(NOTHING_TO_BE_DONE_MSG) return True - if quiet: + if self.quiet: # in case of dryrun and quiet, just print above info and exit return True @@ -1245,7 +1262,7 @@ def log_provenance_info(): log_provenance_info() raise e - if not self.immediate_submit and not dryrun and self.mode == Mode.default: + if not self.immediate_submit and not dryrun and self.mode == ExecMode.default: dag.cleanup_workdir() if success: diff --git a/tests/common.py b/tests/common.py index 383aa2930..7f5f4666f 100644 --- a/tests/common.py +++ b/tests/common.py @@ -18,7 +18,7 @@ import subprocess import tarfile -from snakemake import snakemake +from snakemake.api import snakemake from snakemake.shell import shell from snakemake.common import ON_WINDOWS from snakemake.resources import DefaultResources, GroupResources, ResourceScopes diff --git a/tests/conftest.py b/tests/conftest.py index 01f08aa9e..e43b87ba8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,7 @@ from snakemake.common import ON_WINDOWS from snakemake.utils import find_bash_on_windows -from snakemake import shell +from snakemake.shell import shell skip_on_windows = pytest.mark.skipif(ON_WINDOWS, reason="Unix stuff") only_on_windows = pytest.mark.skipif(not ON_WINDOWS, reason="Windows stuff") diff --git a/tests/test05/Snakefile b/tests/test05/Snakefile index a7a7fa54e..4c1cdefd2 100644 --- a/tests/test05/Snakefile +++ b/tests/test05/Snakefile @@ -1,41 +1,58 @@ -from snakemake import shell - # Only effects for tests on Win shell.executable("bash") -chromosomes = [1,2,3] +chromosomes = [1, 2, 3] + +# shell('rm test.*.inter 2> /dev/null | true') -#shell('rm test.*.inter 2> /dev/null | true') rule all: - input: 'test.predictions' + input: + "test.predictions", + rule compute1: - input: '{name}.in' - output: inter=expand('{{name}}.{chr}.inter', chr=chromosomes) - resources: gpu=1 - run: - assert len(output.inter) > 0 - print(output.inter) - for out in output: - shell('(cat {input[0]} && echo "Part {out}") > {out}') + input: + "{name}.in", + output: + inter=expand("{{name}}.{chr}.inter", chr=chromosomes), + resources: + gpu=1, + run: + assert len(output.inter) > 0 + print(output.inter) + for out in output: + shell('(cat {input[0]} && echo "Part {out}") > {out}') + rule compute2: - input: '{name}.{chromosome}.inter', 'other.txt' - output: '{name}.{chromosome}.inter2' - threads: 2 - resources: io=1 - shell: 'cp {input[0]} {output[0]}' + input: + "{name}.{chromosome}.inter", + "other.txt", + output: + "{name}.{chromosome}.inter2", + threads: 2 + resources: + io=1, + shell: + "cp {input[0]} {output[0]}" + rule gather: - input: ['{name}.%s.inter2'%c for c in chromosomes] - output: '{name}.predictions' - run: - shell('cat {} > {}'.format(' '.join(input), output[0])) + input: + ["{name}.%s.inter2" % c for c in chromosomes], + output: + "{name}.predictions", + run: + shell("cat {} > {}".format(" ".join(input), output[0])) + rule other: - output: 'other.txt' - priority: 50 - resources: gpu=1 - shell: 'touch other.txt' + output: + "other.txt", + priority: 50 + resources: + gpu=1, + shell: + "touch other.txt" diff --git a/tests/test09/Snakefile b/tests/test09/Snakefile index c1cebe358..ac54fa364 100644 --- a/tests/test09/Snakefile +++ b/tests/test09/Snakefile @@ -1,20 +1,26 @@ -from snakemake import shell - - def fail(input, output): - shell("false && cp {input} {output}") + shell("false && cp {input} {output}") + def x(input, output): - fail(input, output) + fail(input, output) + rule rule2: - input: 'test.inter' - output: 'test.out' - shell: 'cp {input} {output}' + input: + "test.inter", + output: + "test.out", + shell: + "cp {input} {output}" + rule rule1: - input: 'test.in' - output: 'test.inter' - log: "logs/test.log" - shell: - "touch {log} && false && cp {input} {output}" + input: + "test.in", + output: + "test.inter", + log: + "logs/test.log", + shell: + "touch {log} && false && cp {input} {output}" diff --git a/tests/test14/Snakefile.nonstandard b/tests/test14/Snakefile.nonstandard index ff91ea330..570e64c03 100644 --- a/tests/test14/Snakefile.nonstandard +++ b/tests/test14/Snakefile.nonstandard @@ -1,4 +1,4 @@ -from snakemake import shell +from snakemake.shell import shell chromosomes = [1, 2, 3, 4, 5] diff --git a/tests/test_cluster_cancelscript/Snakefile.nonstandard b/tests/test_cluster_cancelscript/Snakefile.nonstandard index 0fad46089..9521efc61 100644 --- a/tests/test_cluster_cancelscript/Snakefile.nonstandard +++ b/tests/test_cluster_cancelscript/Snakefile.nonstandard @@ -1,4 +1,4 @@ -from snakemake import shell + rule all: diff --git a/tests/test_cluster_sidecar/Snakefile b/tests/test_cluster_sidecar/Snakefile index fd46f32ee..80c388472 100644 --- a/tests/test_cluster_sidecar/Snakefile +++ b/tests/test_cluster_sidecar/Snakefile @@ -1,13 +1,21 @@ -from snakemake import shell + rule all: - input: 'f.1', 'f.2' + input: + "f.1", + "f.2", + rule one: - output: 'f.1' - shell: "touch {output}" + output: + "f.1", + shell: + "touch {output}" + rule two: - output: 'f.2' - shell: "touch {output}" + output: + "f.2", + shell: + "touch {output}" diff --git a/tests/test_cluster_statusscript/Snakefile.nonstandard b/tests/test_cluster_statusscript/Snakefile.nonstandard index 02a08c26f..ce066f7c7 100644 --- a/tests/test_cluster_statusscript/Snakefile.nonstandard +++ b/tests/test_cluster_statusscript/Snakefile.nonstandard @@ -1,4 +1,4 @@ -from snakemake import shell + chromosomes = [1,2,3,4,5] diff --git a/tests/test_cluster_statusscript_multi/Snakefile.nonstandard b/tests/test_cluster_statusscript_multi/Snakefile.nonstandard index 52c456c3e..eefffaefd 100644 --- a/tests/test_cluster_statusscript_multi/Snakefile.nonstandard +++ b/tests/test_cluster_statusscript_multi/Snakefile.nonstandard @@ -1,4 +1,4 @@ -from snakemake import shell + envvars: "TESTVAR" diff --git a/tests/test_schema.py b/tests/test_schema.py index 7219e4176..ab9529598 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -131,7 +131,7 @@ def test_config_ref(config_schema_ref): # Make sure regular validator works config["param"]["bar"] = 1 config["param"]["jsonbar"] = 2 - from snakemake import WorkflowError + from snakemake.exceptions import WorkflowError with pytest.raises(WorkflowError): validate(config, str(config_schema_ref), False) diff --git a/tests/testapi.py b/tests/testapi.py index e3c1aca9d..894ea272d 100644 --- a/tests/testapi.py +++ b/tests/testapi.py @@ -1,7 +1,7 @@ """ Tests for Snakemake’s API """ -from snakemake import snakemake +from snakemake.api import snakemake import asyncio import sys import tempfile diff --git a/tests/tests.py b/tests/tests.py index 3be9bb31f..8de59722e 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -9,7 +9,7 @@ import subprocess as sp from pathlib import Path -from snakemake import parse_cores_jobs +from snakemake.cli import parse_cores_jobs from snakemake.exceptions import CliException from snakemake.utils import available_cpu_count