From 55352530a9c0e7eb2fcfac81006fd2dd8ef83ef4 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 28 Feb 2023 16:24:24 -0800 Subject: [PATCH 1/3] =?UTF-8?q?runner.aws=5Fbatch:=20Fix=20error=20on=20jo?= =?UTF-8?q?b=20submission=20with=20botocore=20=E2=89=A51.28.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Job submissions which triggered the auto-registration of a new job definition (i.e. because a new image was needed) were failing with botocore ≥1.28.0 (released 24 Oct 2022), because a new field that is invalid for registering new job definitions started being returned with the existing job definition used as a base. This resulted in errors from `nextstrain build` like: Submitting job Parameter validation failed: Unknown parameter in input: "containerOrchestrationType", must be one of: jobDefinitionName, type, parameters, schedulingPriority, containerProperties, nodeProperties, retryStrategy, propagateTags, timeout, tags, platformCapabilities, eksProperties Job submission failed! So far this bug only manifests when installing Nextstrain CLI from the Bioconda package (and potentially only with Mamba's dependency solver) because of interactions between s3fs, botocore, and aiobotocore. With the Bioconda package (and Mamba), the dependency solution uses a new botocore and old s3fs, which doesn't use aiobotocore and thus doesn't pin an older botocore.¹ An old s3fs is allowed because the Bioconda package doesn't specify a lower bound. In contrast, our Python package installed by Pip _does_ specify a lower bound for s3fs², which then requires aiobotocore, which then requires an older botocore where we don't encounter this bug. A better approach than this hardcoded list of fields will come in the following commit. ¹ aiobotocore is a giant monkeypatch of botocore, and thus has very narrow version bounds on botocore. ² See for why. Related-to: Related-to: --- nextstrain/cli/runner/aws_batch/jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextstrain/cli/runner/aws_batch/jobs.py b/nextstrain/cli/runner/aws_batch/jobs.py index b79d3d84..69cf0e83 100644 --- a/nextstrain/cli/runner/aws_batch/jobs.py +++ b/nextstrain/cli/runner/aws_batch/jobs.py @@ -290,7 +290,7 @@ def override_definition(base_definition_name: str, image: str) -> str: # These are AWS-assigned keys returned by describe_job_definitions() which # aren't supported as keyword arguments by register_job_definition(). - for key in {'jobDefinitionArn', 'revision', 'status'}: + for key in {'jobDefinitionArn', 'revision', 'status', 'containerOrchestrationType'}: del derived_definition[key] batch.register_job_definition(**derived_definition) From c8c30bf072a78e8db9645f63837a891fe07e7853 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Tue, 28 Feb 2023 16:35:46 -0800 Subject: [PATCH 2/3] runner.aws_batch: Stop hardcoding a list of job definition fields that don't roundtrip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use boto3/botocore's models to discover the valid fields for job definition registration and pare down the derived input object to those. This approach avoids breakage when the API changes, e.g. like the one fixed by "runner.aws_batch: Fix error on job submission with botocore ≥1.28.0" (d1fd5f5). --- nextstrain/cli/runner/aws_batch/jobs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/nextstrain/cli/runner/aws_batch/jobs.py b/nextstrain/cli/runner/aws_batch/jobs.py index 69cf0e83..e4729930 100644 --- a/nextstrain/cli/runner/aws_batch/jobs.py +++ b/nextstrain/cli/runner/aws_batch/jobs.py @@ -288,9 +288,11 @@ def override_definition(base_definition_name: str, image: str) -> str: derived_definition["jobDefinitionName"] = derived_definition_name derived_definition["containerProperties"]["image"] = image - # These are AWS-assigned keys returned by describe_job_definitions() which + # Remove AWS-assigned keys returned by describe_job_definitions() which # aren't supported as keyword arguments by register_job_definition(). - for key in {'jobDefinitionArn', 'revision', 'status', 'containerOrchestrationType'}: + register_inputs = batch.meta.service_model.operation_model("RegisterJobDefinition").input_shape.members + + for key in set(derived_definition) - set(register_inputs): del derived_definition[key] batch.register_job_definition(**derived_definition) From 0b23ebfae0fd4a3ff5128dbab06e3ac0217b298c Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 1 Mar 2023 11:00:50 -0800 Subject: [PATCH 3/3] =?UTF-8?q?CHANGES:=20Document=20compatibility=20bug?= =?UTF-8?q?=20fix=20with=20botocore=20=E2=89=A51.28.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGES.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 415b0c51..0804d441 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -13,6 +13,13 @@ development source code and as such may not be routinely kept up to date. # __NEXT__ +## Bug fixes + +* We've fixed and future-proofed a compatibility bug with a third-party library + that can occur under very specific conditions when `nextstrain build` submits + AWS Batch jobs. + ([#261](https://github.com/nextstrain/cli/pull/261)) + # 6.2.0 (28 February 2023)