feat: Azbatch executor (#1953) (#2246)

### Description Finish remaining issues with the original implementation of the Azure Batch Executor originally tackled by @andreas-wilm in #533. - simplified command line options - batch configuration as a class with sensible defaults, or environment variable overrides. - made use of existing AzBlob helper where possible. - updates dockerfile with necessary deps - container jakevc/snakemake:latest contains these changes for testing Toyed with the idea of having more configuration from command line options, but this litters the codebase with redundant code (each command line option needs to be repeated a few times in __init__.py, scheduler.py, workflow.py, azure_batch.py). Env Variables seem appropriate with sensible defaults. ### QC  * [x] The PR contains a test case for the changes or the changes are already covered by an existing test case. * [x] The documentation (`docs/`) is updated to reflect the changes or this is not necessary (e.g. if the change does neither modify the language nor the behavior or functionalities of Snakemake). ### Example Example invocation: ```bash export AZ_BLOB_ACCOUNT_URL=********* export AZ_BATCH_ACCOUNT_KEY=********* snakemake \ --jobs 3 \ -rpf --verbose --default-remote-prefix snaketest \ --use-conda \ --default-remote-provider AzBlob \ --envvars AZ_BLOB_ACCOUNT_URL \ --az-batch \ --container-image jakevc/snakemake \ --az-batch-account-url ********** ``` --------- ### Description  ### QC  * [x] The PR contains a test case for the changes or the changes are already covered by an existing test case. * [x] The documentation (`docs/`) is updated to reflect the changes or this is not necessary (e.g. if the change does neither modify the language nor the behavior or functionalities of Snakemake). --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Jake VanCampen <jake.vancampen7@gmail.com> Co-authored-by: Emmanouil "Manolis" Maragkakis <em.maragkakis@gmail.com> Co-authored-by: Jesse Connell <jesse08@gmail.com> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: roshnipatel <roshnipatel@berkeley.edu> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Mike Taves <mwtoews@gmail.com> Co-authored-by: Rich Abdill <rabdill@users.noreply.github.com> Co-authored-by: Koen van Greevenbroek <74298901+koen-vg@users.noreply.github.com> Co-authored-by: Chang Y <yech1990@gmail.com>
snakemake · Jun 11, 2023 · 0f9c49f · 0f9c49f
1 parent c18083a
commit 0f9c49f
Show file tree

Hide file tree

Showing 19 changed files with 1,709 additions and 135 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -244,6 +244,18 @@ jobs:
       #  shell: bash -el {0}
       #  run: |
       #    pytest -s -v -x tests/test_kubernetes.py
+
+      - name: Test Azure Batch Executor
+        shell: bash -el {0}
+        env:
+          AZ_BLOB_PREFIX: "${{ secrets.AZ_BLOB_PREFIX }}"
+          AZ_BLOB_ACCOUNT_URL: "${{ secrets.AZ_STORAGE_ACCOUNT_URL }}"
+          AZ_BLOB_CREDENTIAL: "${{ secrets.AZ_STORAGE_KEY }}"
+          AZ_BATCH_ACCOUNT_URL: "${{ secrets.AZ_BATCH_ACCOUNT_URL }}"
+          AZ_BATCH_ACCOUNT_KEY: "${{ secrets.AZ_BATCH_KEY }}"
+        run: |
+          pytest -s -v -x tests/test_azure_batch_executor.py
+
       - name: Test GA4GH TES executor
         shell: bash -el {0}
         run: |

diff --git a/.github/workflows/scripts/sacct-proxy.py b/.github/workflows/scripts/sacct-proxy.py
@@ -22,4 +22,4 @@
 elif args.name:
     sp.call(["squeue", "--noheader", "--format", "%F|%T", "--name", args.name])
 else:
-    raise ValueError("Unsupported arguments")
+    raise ValueError("Unsupported arguments")
diff --git a/Dockerfile b/Dockerfile
@@ -15,6 +15,6 @@ RUN /bin/bash -c "mamba create -q -y -c conda-forge -c bioconda -n snakemake sna
     mamba install -q -y -c conda-forge singularity && \
     conda clean --all -y && \
     which python && \
-    pip install .[reports,messaging,google-cloud]"
+    pip install .[reports,messaging,google-cloud,azure]"
 RUN echo "source activate snakemake" > ~/.bashrc
 ENV PATH /opt/conda/envs/snakemake/bin:${PATH}
diff --git a/docs/executor_tutorial/azure_batch.rst b/docs/executor_tutorial/azure_batch.rst
diff --git a/docs/executor_tutorial/tutorial.rst b/docs/executor_tutorial/tutorial.rst
@@ -23,8 +23,9 @@ We ensured that no bioinformatics knowledge is needed to understand the tutorial
 .. toctree::
    :maxdepth: 2
 
-   google_lifesciences
+   azure_batch
    azure_aks
+   google_lifesciences
    flux
 
 
diff --git a/setup.cfg b/setup.cfg
@@ -57,13 +57,21 @@ install_requires =
     toposort >=1.10
     wrapt
     yte >=1.0,<2.0
+    msrest
 
 [options.extras_require]
 google-cloud =
     google-api-python-client
     google-cloud-storage
     google-crc32c
     oauth2client
+azure = 
+    azure-storage-blob
+    azure-batch
+    azure-core
+    azure-identity
+    azure-mgmt-batch
+
 messaging = slacker
 pep =
     eido

diff --git a/snakemake/__init__.py b/snakemake/__init__.py
@@ -15,7 +15,9 @@
 import importlib
 import shlex
 from importlib.machinery import SourceFileLoader
+from snakemake.executors.common import url_can_parse
 from snakemake.target_jobs import parse_target_jobs_cli_args
+from snakemake.executors.common import url_can_parse
 
 from snakemake.workflow import Workflow
 from snakemake.dag import Batch
@@ -173,6 +175,9 @@ def snakemake(
     flux=False,
     tibanna=False,
     tibanna_sfn=None,
+    az_batch=False,
+    az_batch_enable_autoscale=False,
+    az_batch_account_url=None,
     google_lifesciences=False,
     google_lifesciences_regions=None,
     google_lifesciences_location=None,
@@ -313,6 +318,9 @@ def snakemake(
         default_remote_prefix (str): prefix for default remote provider (e.g. name of the bucket).
         tibanna (bool):             submit jobs to AWS cloud using Tibanna.
         tibanna_sfn (str):          Step function (Unicorn) name of Tibanna (e.g. tibanna_unicorn_monty). This must be deployed first using tibanna cli.
+        az_batch (bool):            Submit jobs to azure batch.
+        az_batch_enable_autoscale (bool): Enable autoscaling of the azure batch pool nodes. This sets the initial dedicated node pool count to zero and resizes the pool only after 5 minutes. So this flag is only recommended for relatively long running jobs.,
+        az_batch_account_url (str): Azure batch account url.
         google_lifesciences (bool): submit jobs to Google Cloud Life Sciences (pipelines API).
         google_lifesciences_regions (list): a list of regions (e.g., us-east1)
         google_lifesciences_location (str): Life Sciences API location (e.g., us-central1)
@@ -414,6 +422,11 @@ def snakemake(
                 tibanna_config_dict.update({k: v})
             tibanna_config = tibanna_config_dict
 
+    # Azure batch uses compute engine and storage
+    if az_batch:
+        assume_shared_fs = False
+        default_remote_provider = "AzBlob"
+
     # Google Cloud Life Sciences API uses compute engine and storage
     if google_lifesciences:
         assume_shared_fs = False
@@ -452,6 +465,7 @@ def snakemake(
         or drmaa
         or kubernetes
         or tibanna
+        or az_batch
         or google_lifesciences
         or tes
         or slurm
@@ -569,7 +583,7 @@ def snakemake(
                 raise WorkflowError("Unknown default remote provider.")
             if rmt.RemoteProvider.supports_default:
                 _default_remote_provider = rmt.RemoteProvider(
-                    keep_local=True, is_default=True
+                    keep_local=keep_remote_local, is_default=True
                 )
             else:
                 raise WorkflowError(
@@ -727,6 +741,9 @@ def snakemake(
                     default_remote_prefix=default_remote_prefix,
                     tibanna=tibanna,
                     tibanna_sfn=tibanna_sfn,
+                    az_batch=az_batch,
+                    az_batch_enable_autoscale=az_batch_enable_autoscale,
+                    az_batch_account_url=az_batch_account_url,
                     google_lifesciences=google_lifesciences,
                     google_lifesciences_regions=google_lifesciences_regions,
                     google_lifesciences_location=google_lifesciences_location,
@@ -784,6 +801,9 @@ def snakemake(
                     k8s_cpu_scalar=k8s_cpu_scalar,
                     tibanna=tibanna,
                     tibanna_sfn=tibanna_sfn,
+                    az_batch=az_batch,
+                    az_batch_enable_autoscale=az_batch_enable_autoscale,
+                    az_batch_account_url=az_batch_account_url,
                     google_lifesciences=google_lifesciences,
                     google_lifesciences_regions=google_lifesciences_regions,
                     google_lifesciences_location=google_lifesciences_location,
@@ -2362,9 +2382,10 @@ def get_argument_parser(profile=None):
     group_cloud = parser.add_argument_group("CLOUD")
     group_flux = parser.add_argument_group("FLUX")
     group_kubernetes = parser.add_argument_group("KUBERNETES")
-    group_tibanna = parser.add_argument_group("TIBANNA")
     group_google_life_science = parser.add_argument_group("GOOGLE_LIFE_SCIENCE")
+    group_kubernetes = parser.add_argument_group("KUBERNETES")
     group_tes = parser.add_argument_group("TES")
+    group_tibanna = parser.add_argument_group("TIBANNA")
 
     group_kubernetes.add_argument(
         "--kubernetes",
@@ -2474,6 +2495,26 @@ def get_argument_parser(profile=None):
         "are deleted at the shutdown step of the workflow.",
     )
 
+    group_azure_batch = parser.add_argument_group("AZURE_BATCH")
+
+    group_azure_batch.add_argument(
+        "--az-batch",
+        action="store_true",
+        help="Execute workflow on azure batch",
+    )
+
+    group_azure_batch.add_argument(
+        "--az-batch-enable-autoscale",
+        action="store_true",
+        help="Enable autoscaling of the azure batch pool nodes, this option will set the initial dedicated node count to zero, and requires five minutes to resize the cluster, so is only recommended for longer running jobs.",
+    )
+
+    group_azure_batch.add_argument(
+        "--az-batch-account-url",
+        nargs="?",
+        help="Azure batch account url, requires AZ_BATCH_ACCOUNT_KEY environment variable to be set.",
+    )
+
     group_flux.add_argument(
         "--flux",
         action="store_true",
@@ -2701,6 +2742,7 @@ def adjust_path(f):
         or args.tibanna
         or args.kubernetes
         or args.tes
+        or args.az_batch
         or args.google_lifesciences
         or args.drmaa
         or args.flux
@@ -2820,6 +2862,33 @@ def adjust_path(f):
                 )
                 sys.exit(1)
 
+    if args.az_batch:
+        if not args.default_remote_provider or not args.default_remote_prefix:
+            print(
+                "Error: --az-batch must be combined with "
+                "--default-remote-provider AzBlob and --default-remote-prefix to "
+                "provide a blob container name\n",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        elif args.az_batch_account_url is None:
+            print(
+                "Error: --az-batch-account-url must be set when --az-batch is used\n",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        elif not url_can_parse(args.az_batch_account_url):
+            print(
+                "Error: invalide azure batch account url, please use format: https://{account_name}.{location}.batch.azure.com."
+            )
+            sys.exit(1)
+        elif os.getenv("AZ_BATCH_ACCOUNT_KEY") is None:
+            print(
+                "Error: environment variable AZ_BATCH_ACCOUNT_KEY must be set when --az-batch is used\n",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
     if args.google_lifesciences:
         if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS"):
             print(
@@ -3018,6 +3087,9 @@ def open_browser():
             flux=args.flux,
             tibanna=args.tibanna,
             tibanna_sfn=args.tibanna_sfn,
+            az_batch=args.az_batch,
+            az_batch_enable_autoscale=args.az_batch_enable_autoscale,
+            az_batch_account_url=args.az_batch_account_url,
             google_lifesciences=args.google_lifesciences,
             google_lifesciences_regions=args.google_lifesciences_regions,
             google_lifesciences_location=args.google_lifesciences_location,

diff --git a/snakemake/executors/__init__.py b/snakemake/executors/__init__.py
@@ -484,7 +484,8 @@ def __init__(
 
         # Zero thread jobs do not need a thread, but they occupy additional workers.
         # Hence we need to reserve additional workers for them.
-        self.workers = workers + 5
+        workers = workers + 5 if workers is not None else 5
+        self.workers = workers
         self.pool = concurrent.futures.ThreadPoolExecutor(max_workers=self.workers)
 
     @property