From f17c5a1e1a6444cfbe99c32bf3abf4d4770f398c Mon Sep 17 00:00:00 2001 From: tdayris Date: Fri, 14 Jun 2024 11:52:55 +0200 Subject: [PATCH 1/9] GATK utils for common IO files --- snakemake_wrapper_utils/gatk.py | 100 ++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 snakemake_wrapper_utils/gatk.py diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py new file mode 100644 index 0000000..b74f054 --- /dev/null +++ b/snakemake_wrapper_utils/gatk.py @@ -0,0 +1,100 @@ +import sys +from snakemake_wrapper_utils.snakemake import is_arg + +def get_gatk_opts( + snakemake, + parse_arg_file=True, + parse_bam_index=True, + parse_bam_md5=True, + parse_vcf_index=True, + parse_vcf_md5=True, + parse_ref=True, + parse_ref_dict=True, + param_name="extra", +): + """Obtain gatk_opts from input, output, params""" + + gatk_opts = "" + extra = snakemake.params.get(param_name, "") + + ########################## + ### Configuration file ### + ########################## + + if parse_arg_file: + if is_arg("--arguments_file", extra): + sys.exit( + "You have specified an argument file (`--argument_file`) in `params.extra; this is automatically infered from `input.arg_file`." + ) + + # Multiple argument files can be provided. Order matters. + arg_file = snakemake.input.get("arg_file", "") + if arg_file: + if isinstance(arg_file, list): + arg_file = " --argument_file ".join(arg_file) + + gatk_opts += f"--argument_file {arg_file}" + + + ###################### + ### Reference file ### + ###################### + + if parse_ref: + if is_arg("-R", extra) or is_arg("--reference", extra): + sys.exit( + "You have specified reference file (`-R,--reference`) in `params.extra`; this is automatically infered from `input.ref`." + ) + ref = snakemake.input.get("ref") + if ref: + gatk_opts += f" --reference {ref} " + + if parse_ref_dict: + if is_arg("--sequence-dictionary", extra): + sys.exit( + "You have specified reference sequence dictionary (`--sequence-dictionary`) in `params.extra`; this is automatically infered from `input.ref_dict`." + ) + ref_dict = snakemake.input.get("ref_dict", ""): + gatk_opts += f" --sequence-dictionary {ref_dict} " + + + ########################### + ### Optional BAM output ### + ########################### + if parse_bam_index: + if is_arg("--create-output-bam-index", extra) or is_arg("-OBI", extra): + sys.exit( + "You have specified bam index creation (`-OBI,--create-output-bam-index`) in `params.extra`; this is automatically infered from `output.bam_bai`." + ) + if snakemake.output.get("bam_bai"): + gatk_opts += " --create-output-bam-index " + + if parse_bam_md5: + if is_arg("--create-output-bam-md5", extra) or is_arg("-OBM", extra): + sys.exit( + "You have specified bam MD5-sum creation (`-OBM,--create-output-bam-md5`) in `params.extra`; this is automatically infered from `output.bam_md5`." + ) + if snakemake.output.get("bam_md5"): + gatk_opts += " --create-output-bam-md5 " + + ########################### + ### Optional VCF output ### + ########################### + if parse_vcf_index: + if is_arg("--create-output-variant-index", extra) or is_arg("-OVI", extra): + sys.exit( + "You have specified VCF index creation (`--OVI,--create-output-variant-index`) in `params.extra; this is automatically infered from `output.vcf_idx`." + ) + if snakemake.output.get("vcf_idx"): + gatk_opts += " --create-output-variant-index " + + if parse_vcf_md5: + if is_arg("--create-output-variant-md5", extra) or is_arg("-OVM", extra): + sys.exit( + "You have specified VCF MD5-sum creation (`--OVI,--create-output-variant-index`) in `params.extra; this is automatically infered from `output.vcf_md5`." + ) + if snakemake.output.get("vcf_md5"): + gatk_opts += " --create-output-variant-md5 " + + return gatk_opts + From 83f5277e6de263f7a061484a34800e5da4e33540 Mon Sep 17 00:00:00 2001 From: tdayris Date: Fri, 14 Jun 2024 11:58:38 +0200 Subject: [PATCH 2/9] Black formatting --- snakemake_wrapper_utils/gatk.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py index b74f054..b25dac4 100644 --- a/snakemake_wrapper_utils/gatk.py +++ b/snakemake_wrapper_utils/gatk.py @@ -1,6 +1,7 @@ import sys from snakemake_wrapper_utils.snakemake import is_arg + def get_gatk_opts( snakemake, parse_arg_file=True, @@ -34,7 +35,6 @@ def get_gatk_opts( arg_file = " --argument_file ".join(arg_file) gatk_opts += f"--argument_file {arg_file}" - ###################### ### Reference file ### @@ -54,10 +54,10 @@ def get_gatk_opts( sys.exit( "You have specified reference sequence dictionary (`--sequence-dictionary`) in `params.extra`; this is automatically infered from `input.ref_dict`." ) - ref_dict = snakemake.input.get("ref_dict", ""): + ref_dict = snakemake.input.get("ref_dict", "") + if ref_dict: gatk_opts += f" --sequence-dictionary {ref_dict} " - ########################### ### Optional BAM output ### ########################### @@ -97,4 +97,3 @@ def get_gatk_opts( gatk_opts += " --create-output-variant-md5 " return gatk_opts - From e8e738bb825bcec49b435d651c472cdd05c72d19 Mon Sep 17 00:00:00 2001 From: tdayris Date: Thu, 5 Dec 2024 16:00:01 +0100 Subject: [PATCH 3/9] Default set to False --- snakemake_wrapper_utils/gatk.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py index b25dac4..abbd6bc 100644 --- a/snakemake_wrapper_utils/gatk.py +++ b/snakemake_wrapper_utils/gatk.py @@ -4,13 +4,13 @@ def get_gatk_opts( snakemake, - parse_arg_file=True, - parse_bam_index=True, - parse_bam_md5=True, - parse_vcf_index=True, - parse_vcf_md5=True, - parse_ref=True, - parse_ref_dict=True, + parse_arg_file=False, + parse_bam_index=False, + parse_bam_md5=False, + parse_vcf_index=False, + parse_vcf_md5=False, + parse_ref=False, + parse_ref_dict=False, param_name="extra", ): """Obtain gatk_opts from input, output, params""" From 39417ce8a76c4acdc59ac7dcec763ce2cebc49b6 Mon Sep 17 00:00:00 2001 From: tdayris Date: Thu, 5 Dec 2024 16:05:17 +0100 Subject: [PATCH 4/9] Typo --- snakemake_wrapper_utils/gatk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py index abbd6bc..343d546 100644 --- a/snakemake_wrapper_utils/gatk.py +++ b/snakemake_wrapper_utils/gatk.py @@ -25,7 +25,7 @@ def get_gatk_opts( if parse_arg_file: if is_arg("--arguments_file", extra): sys.exit( - "You have specified an argument file (`--argument_file`) in `params.extra; this is automatically infered from `input.arg_file`." + "You have specified an argument file (`--argument_file`) in `params.extra`; this is automatically infered from `input.arg_file`." ) # Multiple argument files can be provided. Order matters. From 9e298284ee6e687e5540b7671f3c8477eaf4215d Mon Sep 17 00:00:00 2001 From: tdayris Date: Thu, 5 Dec 2024 16:06:58 +0100 Subject: [PATCH 5/9] inferred with two r --- snakemake_wrapper_utils/gatk.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py index 343d546..a3d8972 100644 --- a/snakemake_wrapper_utils/gatk.py +++ b/snakemake_wrapper_utils/gatk.py @@ -25,7 +25,7 @@ def get_gatk_opts( if parse_arg_file: if is_arg("--arguments_file", extra): sys.exit( - "You have specified an argument file (`--argument_file`) in `params.extra`; this is automatically infered from `input.arg_file`." + "You have specified an argument file (`--argument_file`) in `params.extra`; this is automatically inferred from `input.arg_file`." ) # Multiple argument files can be provided. Order matters. @@ -43,7 +43,7 @@ def get_gatk_opts( if parse_ref: if is_arg("-R", extra) or is_arg("--reference", extra): sys.exit( - "You have specified reference file (`-R,--reference`) in `params.extra`; this is automatically infered from `input.ref`." + "You have specified reference file (`-R,--reference`) in `params.extra`; this is automatically inferred from `input.ref`." ) ref = snakemake.input.get("ref") if ref: @@ -52,7 +52,7 @@ def get_gatk_opts( if parse_ref_dict: if is_arg("--sequence-dictionary", extra): sys.exit( - "You have specified reference sequence dictionary (`--sequence-dictionary`) in `params.extra`; this is automatically infered from `input.ref_dict`." + "You have specified reference sequence dictionary (`--sequence-dictionary`) in `params.extra`; this is automatically inferred from `input.ref_dict`." ) ref_dict = snakemake.input.get("ref_dict", "") if ref_dict: @@ -64,7 +64,7 @@ def get_gatk_opts( if parse_bam_index: if is_arg("--create-output-bam-index", extra) or is_arg("-OBI", extra): sys.exit( - "You have specified bam index creation (`-OBI,--create-output-bam-index`) in `params.extra`; this is automatically infered from `output.bam_bai`." + "You have specified bam index creation (`-OBI,--create-output-bam-index`) in `params.extra`; this is automatically inferred from `output.bam_bai`." ) if snakemake.output.get("bam_bai"): gatk_opts += " --create-output-bam-index " @@ -72,7 +72,7 @@ def get_gatk_opts( if parse_bam_md5: if is_arg("--create-output-bam-md5", extra) or is_arg("-OBM", extra): sys.exit( - "You have specified bam MD5-sum creation (`-OBM,--create-output-bam-md5`) in `params.extra`; this is automatically infered from `output.bam_md5`." + "You have specified bam MD5-sum creation (`-OBM,--create-output-bam-md5`) in `params.extra`; this is automatically inferred from `output.bam_md5`." ) if snakemake.output.get("bam_md5"): gatk_opts += " --create-output-bam-md5 " @@ -83,7 +83,7 @@ def get_gatk_opts( if parse_vcf_index: if is_arg("--create-output-variant-index", extra) or is_arg("-OVI", extra): sys.exit( - "You have specified VCF index creation (`--OVI,--create-output-variant-index`) in `params.extra; this is automatically infered from `output.vcf_idx`." + "You have specified VCF index creation (`--OVI,--create-output-variant-index`) in `params.extra`; this is automatically inferred from `output.vcf_idx`." ) if snakemake.output.get("vcf_idx"): gatk_opts += " --create-output-variant-index " @@ -91,7 +91,7 @@ def get_gatk_opts( if parse_vcf_md5: if is_arg("--create-output-variant-md5", extra) or is_arg("-OVM", extra): sys.exit( - "You have specified VCF MD5-sum creation (`--OVI,--create-output-variant-index`) in `params.extra; this is automatically infered from `output.vcf_md5`." + "You have specified VCF MD5-sum creation (`--OVI,--create-output-variant-index`) in `params.extra`; this is automatically inferred from `output.vcf_md5`." ) if snakemake.output.get("vcf_md5"): gatk_opts += " --create-output-variant-md5 " From 09a1062fa3de384879391f25a6298a9f561240de Mon Sep 17 00:00:00 2001 From: "Filipe G. Vieira" <1151762+fgvieira@users.noreply.github.com> Date: Thu, 5 Dec 2024 18:17:17 +0100 Subject: [PATCH 6/9] Code format --- snakemake_wrapper_utils/gatk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py index a3d8972..fdd614b 100644 --- a/snakemake_wrapper_utils/gatk.py +++ b/snakemake_wrapper_utils/gatk.py @@ -47,7 +47,7 @@ def get_gatk_opts( ) ref = snakemake.input.get("ref") if ref: - gatk_opts += f" --reference {ref} " + gatk_opts += f" --reference {ref}" if parse_ref_dict: if is_arg("--sequence-dictionary", extra): From 12c957e45ac1949abf60584240662fd9aa2a5da3 Mon Sep 17 00:00:00 2001 From: "Filipe G. Vieira" <1151762+fgvieira@users.noreply.github.com> Date: Thu, 5 Dec 2024 18:20:59 +0100 Subject: [PATCH 7/9] Code reformat --- snakemake_wrapper_utils/gatk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py index fdd614b..b2edf67 100644 --- a/snakemake_wrapper_utils/gatk.py +++ b/snakemake_wrapper_utils/gatk.py @@ -67,7 +67,7 @@ def get_gatk_opts( "You have specified bam index creation (`-OBI,--create-output-bam-index`) in `params.extra`; this is automatically inferred from `output.bam_bai`." ) if snakemake.output.get("bam_bai"): - gatk_opts += " --create-output-bam-index " + gatk_opts += " --create-output-bam-index" if parse_bam_md5: if is_arg("--create-output-bam-md5", extra) or is_arg("-OBM", extra): From a12835642e630794e71f10cec9a45cb44cb6677c Mon Sep 17 00:00:00 2001 From: "Filipe G. Vieira" <1151762+fgvieira@users.noreply.github.com> Date: Thu, 5 Dec 2024 18:21:23 +0100 Subject: [PATCH 8/9] Code reformat --- snakemake_wrapper_utils/gatk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py index b2edf67..97b9268 100644 --- a/snakemake_wrapper_utils/gatk.py +++ b/snakemake_wrapper_utils/gatk.py @@ -75,7 +75,7 @@ def get_gatk_opts( "You have specified bam MD5-sum creation (`-OBM,--create-output-bam-md5`) in `params.extra`; this is automatically inferred from `output.bam_md5`." ) if snakemake.output.get("bam_md5"): - gatk_opts += " --create-output-bam-md5 " + gatk_opts += " --create-output-bam-md5" ########################### ### Optional VCF output ### From 0b9a41d190aef0d50da914312fd23859faa95d5b Mon Sep 17 00:00:00 2001 From: "Filipe G. Vieira" <1151762+fgvieira@users.noreply.github.com> Date: Thu, 5 Dec 2024 18:22:02 +0100 Subject: [PATCH 9/9] Rename ref_dict --- snakemake_wrapper_utils/gatk.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/snakemake_wrapper_utils/gatk.py b/snakemake_wrapper_utils/gatk.py index 97b9268..1e06366 100644 --- a/snakemake_wrapper_utils/gatk.py +++ b/snakemake_wrapper_utils/gatk.py @@ -52,11 +52,11 @@ def get_gatk_opts( if parse_ref_dict: if is_arg("--sequence-dictionary", extra): sys.exit( - "You have specified reference sequence dictionary (`--sequence-dictionary`) in `params.extra`; this is automatically inferred from `input.ref_dict`." + "You have specified reference sequence dictionary (`--sequence-dictionary`) in `params.extra`; this is automatically inferred from `input.dict`." ) - ref_dict = snakemake.input.get("ref_dict", "") - if ref_dict: - gatk_opts += f" --sequence-dictionary {ref_dict} " + dict = snakemake.input.get("dict", "") + if dict: + gatk_opts += f" --sequence-dictionary {dict}" ########################### ### Optional BAM output ###