Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: added tmpdir to all GATK wrappers, plus some doc changes #449

Merged
merged 11 commits into from
Feb 21, 2022
Merged
4 changes: 2 additions & 2 deletions bio/gatk/applybqsr/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ channels:
- conda-forge
- defaults
dependencies:
- gatk4 ==4.1.4.1
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils ==0.1.3
- snakemake-wrapper-utils =0.3
7 changes: 4 additions & 3 deletions bio/gatk/applybqsr/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ authors:
- Christopher Schröder
- Johannes Köster
- Jake VanCampen
- Filipe G. Vieira
input:
- bam file
- fasta reference
- BAM file
- FASTA reference
- recalibration table for the bam
output:
- recalibrated bam file
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments for ApplyBSQR.
* The `extra` param allows for additional program arguments.
* For more information see, https://gatk.broadinstitute.org/hc/en-us/articles/360037055712-ApplyBQSR
10 changes: 5 additions & 5 deletions bio/gatk/applybqsr/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@ rule gatk_applybqsr:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
recal_table="recal/{sample}.grp"
recal_table="recal/{sample}.grp",
output:
bam="recal/{sample}.bam"
bam="recal/{sample}.bam",
log:
"logs/gatk/gatk_applybqsr/{sample}.log"
"logs/gatk/gatk_applybqsr/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
java_opts="", # optional
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/gatk/applybqsr"
19 changes: 13 additions & 6 deletions bio/gatk/applybqsr/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,23 @@
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)

log = snakemake.log_fmt_shell(stdout=True, stderr=True, append=True)
shell(
"gatk --java-options '{java_opts}' ApplyBQSR {extra} "
"-R {snakemake.input.ref} -I {snakemake.input.bam} "
"--bqsr-recal-file {snakemake.input.recal_table} "
"-O {snakemake.output.bam} {log}"
)

with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' ApplyBQSR"
" --input {snakemake.input.bam}"
" --bqsr-recal-file {snakemake.input.recal_table}"
" --reference {snakemake.input.ref}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.bam}"
" {log}"
)
2 changes: 1 addition & 1 deletion bio/gatk/applybqsrspark/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ channels:
dependencies:
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils =0.1.3
- snakemake-wrapper-utils =0.3
10 changes: 5 additions & 5 deletions bio/gatk/applybqsrspark/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ rule gatk_applybqsr_spark:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
recal_table="recal/{sample}.grp"
recal_table="recal/{sample}.grp",
output:
bam="recal/{sample}.bam"
bam="recal/{sample}.bam",
log:
"logs/gatk/gatk_applybqsr_spark/{sample}.log"
"logs/gatk/gatk_applybqsr_spark/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
java_opts="", # optional
#spark_runner="", # optional, local by default
#spark_master="", # optional
#spark_extra="", # optional
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/gatk/applybqsrspark"
17 changes: 10 additions & 7 deletions bio/gatk/applybqsrspark/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@
tmpdir_shards = Path(tmpdir) / "shards_{:06d}".format(random.randrange(10 ** 6))

shell(
"gatk --java-options '{java_opts}' ApplyBQSRSpark {extra} "
"--reference {snakemake.input.ref} --input {snakemake.input.bam} "
"--bqsr-recal-file {snakemake.input.recal_table} "
"--tmp-dir {tmpdir} --output-shard-tmp-dir {tmpdir_shards} "
"--output {snakemake.output.bam} "
"-- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra} "
"{log}"
"gatk --java-options '{java_opts}' ApplyBQSRSpark"
" --input {snakemake.input.bam}"
" --bqsr-recal-file {snakemake.input.recal_table}"
" --reference {snakemake.input.ref}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output-shard-tmp-dir {tmpdir_shards}"
" --output {snakemake.output.bam}"
" -- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra}"
" {log}"
)
4 changes: 2 additions & 2 deletions bio/gatk/applyvqsr/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ channels:
- conda-forge
- defaults
dependencies:
- gatk4 ==4.2.0.0
- snakemake-wrapper-utils ==0.1.3
- gatk4 =4.2
- snakemake-wrapper-utils =0.3
3 changes: 2 additions & 1 deletion bio/gatk/applyvqsr/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ description: |
Run gatk ApplyVQSR.
authors:
- Brett Copeland
- Filipe G. Vieira
input:
- VCF file
- Recalibration file
- Tranches file
output:
- Variant QualityScore-Recalibrated VCF
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-Xmx4G" for one, and "-Xmx4G -XX:ParallelGCThreads=10" for two options.
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* For more information, see https://gatk.broadinstitute.org/hc/en-us/articles/360037226332-ApplyVQSR
16 changes: 8 additions & 8 deletions bio/gatk/applyvqsr/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
rule apply_vqsr:
input:
vcf="test.vcf",
recal="snps.recal",
tranches="snps.tranches",
ref="ref.fasta"
recal="snps.recal",
tranches="snps.tranches",
ref="ref.fasta",
output:
vcf="test.snp_recal.vcf"
vcf="test.snp_recal.vcf",
log:
"logs/gatk/applyvqsr.log"
"logs/gatk/applyvqsr.log",
params:
mode="SNP", # set mode, must be either SNP, INDEL or BOTH
extra="" # optional
extra="", # optional
resources:
mem_mb=50
mem_mb=50,
wrapper:
"master/bio/gatk/applyvqsr"
"master/bio/gatk/applyvqsr"
25 changes: 15 additions & 10 deletions bio/gatk/applyvqsr/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,25 @@


import os

import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts


extra = snakemake.params.get("extra", "")
java_opts = get_java_opts(snakemake)
log = snakemake.log_fmt_shell(stdout=True, stderr=True)
shell(
"gatk --java-options '{java_opts}' ApplyVQSR {extra} "
"-R {snakemake.input.ref} -V {snakemake.input.vcf} "
"--recal-file {snakemake.input.recal} "
"--tranches-file {snakemake.input.tranches} "
"-mode {snakemake.params.mode} "
"--output {snakemake.output.vcf} "
"{log}"
)

with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' ApplyVQSR"
" --variant {snakemake.input.vcf}"
" --recal-file {snakemake.input.recal}"
" --reference {snakemake.input.ref}"
" --tranches-file {snakemake.input.tranches}"
" --mode {snakemake.params.mode}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.vcf}"
" {log}"
)
4 changes: 2 additions & 2 deletions bio/gatk/baserecalibrator/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ channels:
- conda-forge
- defaults
dependencies:
- gatk4 ==4.1.4.1
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils ==0.1.3
- snakemake-wrapper-utils =0.3
3 changes: 2 additions & 1 deletion bio/gatk/baserecalibrator/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ authors:
- Christopher Schröder
- Johannes Köster
- Jake VanCampen
- Filipe G. Vieira
input:
- bam file
- fasta reference
- vcf.gz of known variants
output:
- recalibration table for the bam
notes: |
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-Xmx4G" for one, and "-Xmx4G -XX:ParallelGCThreads=10" for two options.
* The `java_opts` param allows for additional arguments to be passed to the java compiler, e.g. "-XX:ParallelGCThreads=10" (not for `-XmX` or `-Djava.io.tmpdir`, since they are handled automatically).
* The `extra` param allows for additional program arguments.
* For more information see, https://software.broadinstitute.org/gatk/documentation/article?id=11050

14 changes: 5 additions & 9 deletions bio/gatk/baserecalibrator/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,15 @@ rule gatk_baserecalibrator:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
known="dbsnp.vcf.gz" # optional known sites - single or a list
known="dbsnp.vcf.gz", # optional known sites - single or a list
output:
recal_table="recal/{sample}.grp"
recal_table="recal/{sample}.grp",
log:
"logs/gatk/baserecalibrator/{sample}.log"
"logs/gatk/baserecalibrator/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
# optional specification of memory usage of the JVM that snakemake will respect with global
# resource restrictions (https://snakemake.readthedocs.io/en/latest/snakefiles/rules.html#resources)
# and which can be used to request RAM during cluster job submission as `{resources.mem_mb}`:
# https://snakemake.readthedocs.io/en/latest/executing/cluster.html#job-properties
java_opts="", # optional
resources:
mem_mb=1024
mem_mb=1024,
wrapper:
"master/bio/gatk/baserecalibrator"
17 changes: 12 additions & 5 deletions bio/gatk/baserecalibrator/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
__license__ = "MIT"


import tempfile
from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

Expand All @@ -17,8 +18,14 @@
known = [known]
known = list(map("--known-sites {}".format, known))

shell(
"gatk --java-options '{java_opts}' BaseRecalibrator {extra} "
"-R {snakemake.input.ref} -I {snakemake.input.bam} "
"-O {snakemake.output.recal_table} {known} {log}"
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' BaseRecalibrator"
" --input {snakemake.input.bam}"
" --reference {snakemake.input.ref}"
" {known}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.recal_table}"
" {log}"
)
2 changes: 1 addition & 1 deletion bio/gatk/baserecalibratorspark/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ channels:
dependencies:
- gatk4 =4.2
- openjdk =8
- snakemake-wrapper-utils =0.1.3
- snakemake-wrapper-utils =0.3
1 change: 1 addition & 0 deletions bio/gatk/baserecalibratorspark/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ authors:
- Christopher Schröder
- Johannes Köster
- Jake VanCampen
- Filipe G. Vieira
input:
- bam file
- fasta reference
Expand Down
10 changes: 5 additions & 5 deletions bio/gatk/baserecalibratorspark/test/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,19 @@ rule gatk_baserecalibratorspark:
bam="mapped/{sample}.bam",
ref="genome.fasta",
dict="genome.dict",
known="dbsnp.vcf.gz" # optional known sites
known="dbsnp.vcf.gz", # optional known sites
output:
recal_table="recal/{sample}.grp"
recal_table="recal/{sample}.grp",
log:
"logs/gatk/baserecalibrator/{sample}.log"
"logs/gatk/baserecalibrator/{sample}.log",
params:
extra="", # optional
java_opts="", # optional
java_opts="", # optional
#spark_runner="", # optional, local by default
#spark_master="", # optional
#spark_extra="", # optional
resources:
mem_mb=1024
mem_mb=1024,
threads: 8
wrapper:
"master/bio/gatk/baserecalibratorspark"
22 changes: 11 additions & 11 deletions bio/gatk/baserecalibratorspark/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
__license__ = "MIT"

import tempfile

from snakemake.shell import shell
from snakemake_wrapper_utils.java import get_java_opts

Expand All @@ -16,18 +15,19 @@
spark_extra = snakemake.params.get("spark_extra", "")
java_opts = get_java_opts(snakemake)

tmpdir = tempfile.gettempdir()

log = snakemake.log_fmt_shell(stdout=True, stderr=True)
known = snakemake.input.get("known", "")
if known:
known = "--known-sites {}".format(known)

shell(
"gatk --java-options '{java_opts}' BaseRecalibratorSpark {extra} "
"-R {snakemake.input.ref} -I {snakemake.input.bam} "
"--output {snakemake.output.recal_table} {known} "
"--tmp-dir {tmpdir} "
"-- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra} "
"{log}"
)
with tempfile.TemporaryDirectory() as tmpdir:
shell(
"gatk --java-options '{java_opts}' BaseRecalibratorSpark"
" --input {snakemake.input.bam}"
" --reference {snakemake.input.ref}"
" {extra}"
" --tmp-dir {tmpdir}"
" --output {snakemake.output.recal_table} {known}"
" -- --spark-runner {spark_runner} --spark-master {spark_master} {spark_extra}"
" {log}"
)
4 changes: 2 additions & 2 deletions bio/gatk/cleansam/environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ channels:
- conda-forge
- defaults
dependencies:
- gatk4 ==4.2.0.0
- snakemake-wrapper-utils ==0.1.3
- gatk4 =4.2
- snakemake-wrapper-utils =0.3
8 changes: 4 additions & 4 deletions bio/gatk/cleansam/test/Snakefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
rule gatk_clean_sam:
input:
bam="{sample}.bam"
bam="{sample}.bam",
output:
clean="{sample}.clean.bam"
clean="{sample}.clean.bam",
log:
"logs/{sample}.log"
"logs/{sample}.log",
params:
extra="",
java_opts="", # optional
java_opts="", # optional
resources:
mem_mb=1024,
wrapper:
Expand Down