feat: upgrade to WDLv1.1 (#101)

* feat: upgrade to WDLv1.1 * fix(ValidateSam): brackets were causing expr to always eval to 'false' * fix: our method for allowing gzipped or uncompressed wasn't working * chore: simplify symlink command * docs: 'FastQ' -> 'FASTQ' & 'Bam' -> 'BAM' * docs: specify GTF can be gzipped or uncompressed * fix: name collision on global_phred_scores * Update workflows/scrnaseq/10x-bam-to-fastqs.wdl Co-authored-by: Andrew Thrasher <adthrasher@gmail.com> * fix(ngsderive): use correct sep. ',' -> ' ' --------- Co-authored-by: Andrew Thrasher <adthrasher@gmail.com>
stjudecloud · Jul 11, 2023 · b0070be · b0070be
1 parent 48492b3
commit b0070be
Show file tree

Hide file tree

Showing 36 changed files with 294 additions and 294 deletions.
diff --git a/README.md b/README.md
@@ -54,15 +54,15 @@ The repository is laid out as follows:
 
 The current workflows exist in this repo with the following statuses:
 
-| Name                          | Version         | Description                                                                                                                                           | Specification                                                                                         | Workflow                                                                                                                       | Status                                                                                                              |
-| ----------------------------- | --------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- |
-| RNA-Seq Standard              | v2.0.0          | Standard RNA-Seq harmonization pipeline.                                                                                                              | [Specification](https://stjudecloud.github.io/rfcs/0001-rnaseq-workflow-v2.0.html)                    | [Realign BAM Workflow](./workflows/rnaseq/rnaseq-standard.wdl), [FastQ Workflow](./workflows/rnaseq/rnaseq-standard-fastq.wdl) | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
-| Build STAR References         | N/A             | Build [STAR aligner](https://github.com/alexdobin/STAR) reference files used in RNA-Seq Standard harmonization pipelines.                             | None                                                                                                  | [Workflow](./workflows/rnaseq/rnaseq-star-db-build.wdl)                                                                        | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
-| Quality Check Standard        | v1.0.0          | Perform ~10 different QC analyses on a BAM file and compile the results using [MultiQC](https://multiqc.info/).                                       | [Specification](https://rfcs.stjude.cloud/branches/rfcs/qc-workflow/0002-quality-check-workflow.html) | [Workflow](./workflows/qc/quality-check-standard.wdl)                                                                          | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
-| ESTIMATE                      | v1.0.0 (*beta*) | Runs the [ESTIMATE software package](https://bioinformatics.mdanderson.org/estimate/) on a feature counts file.                                       | None                                                                                                  | [Workflow](./workflows/rnaseq/ESTIMATE.wdl)                                                                                    | ![In Development](https://img.shields.io/static/v1?label=Status&message=Development&color=orange&style=flat-square) |
-| Calculate Gene Lengths        | N/A             | Produces a gene length file from a GTF.                                                                                                               | None                                                                                                  | [Workflow](./workflows/rnaseq/calc-gene-lengths.wdl)                                                                           | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
-| Build BWA References          | N/A             | Builds reference files used by the [BWA aligner](https://github.com/lh3/bwa).                                                                         | None                                                                                                  | [Workflow](./workflows/general/bwa-db-build.wdl)                                                                               | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
-| BAM to FastQs                 | v1.0.0          | Split a BAM file into read groups, then read 1 FastQs and  read 2 FastQs.                                                                             | None                                                                                                  | [Workflow](./workflows/general/bam-to-fastqs.wdl)                                                                              | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
+| Name                   | Version         | Description                                                                                                               | Specification                                                                                         | Workflow                                                                                                                       | Status                                                                                                              |
+| ---------------------- | --------------- | ------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------ | ------------------------------------------------------------------------------------------------------------------- |
+| RNA-Seq Standard       | v2.0.0          | Standard RNA-Seq harmonization pipeline.                                                                                  | [Specification](https://stjudecloud.github.io/rfcs/0001-rnaseq-workflow-v2.0.html)                    | [Realign BAM Workflow](./workflows/rnaseq/rnaseq-standard.wdl), [FASTQ Workflow](./workflows/rnaseq/rnaseq-standard-fastq.wdl) | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
+| Build STAR References  | N/A             | Build [STAR aligner](https://github.com/alexdobin/STAR) reference files used in RNA-Seq Standard harmonization pipelines. | None                                                                                                  | [Workflow](./workflows/rnaseq/rnaseq-star-db-build.wdl)                                                                        | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
+| Quality Check Standard | v1.0.0          | Perform ~10 different QC analyses on a BAM file and compile the results using [MultiQC](https://multiqc.info/).           | [Specification](https://rfcs.stjude.cloud/branches/rfcs/qc-workflow/0002-quality-check-workflow.html) | [Workflow](./workflows/qc/quality-check-standard.wdl)                                                                          | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
+| ESTIMATE               | v1.0.0 (*beta*) | Runs the [ESTIMATE software package](https://bioinformatics.mdanderson.org/estimate/) on a feature counts file.           | None                                                                                                  | [Workflow](./workflows/rnaseq/ESTIMATE.wdl)                                                                                    | ![In Development](https://img.shields.io/static/v1?label=Status&message=Development&color=orange&style=flat-square) |
+| Calculate Gene Lengths | N/A             | Produces a gene length file from a GTF.                                                                                   | None                                                                                                  | [Workflow](./workflows/rnaseq/calc-gene-lengths.wdl)                                                                           | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
+| Build BWA References   | N/A             | Builds reference files used by the [BWA aligner](https://github.com/lh3/bwa).                                             | None                                                                                                  | [Workflow](./workflows/general/bwa-db-build.wdl)                                                                               | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
+| BAM to FASTQs          | v1.0.0          | Split a BAM file into read groups, then read 1 FASTQs and  read 2 FASTQs.                                                 | None                                                                                                  | [Workflow](./workflows/general/bam-to-fastqs.wdl)                                                                              | ![In Production](https://img.shields.io/static/v1?label=Status&message=Production&color=green&style=flat-square)    |
 
 ## Author
 

diff --git a/docker/STAR/2.7.10a-0/sort_star_input.py b/docker/STAR/2.7.10a-0/sort_star_input.py
@@ -1,6 +1,6 @@
-"""Sort FastQ pairs and read group information prior to being passed to STAR.
+"""Sort FASTQ pairs and read group information prior to being passed to STAR.
 
-We discovered that when providing STAR with read group split FastQs and also
+We discovered that when providing STAR with read group split FASTQs and also
 supplying STAR with header information via the `--outSAMattrRGline` parameter,
 that the read group information for individual reads may become jumbled.
 
@@ -59,8 +59,8 @@ def validate(
     """Ensure that the final strings are ready to be passed to STAR.
 
     The first check is that there is the same number of read groups as
-    FastQ pairs. Next, it's checked that each read group ID is
-    present in a synced FastQ pair.
+    FASTQ pairs. Next, it's checked that each read group ID is
+    present in a synced FASTQ pair.
 
     Args:
         read_one_fastqs (list): list of file paths
@@ -69,14 +69,14 @@ def validate(
     """
     if len(read_one_fastqs) != len(rgids):
         raise argparse.ArgumentError(
-            "Must have same number of read groups as FastQ pairs"
+            "Must have same number of read groups as FASTQ pairs"
         )
 
     for i, id in enumerate(rgids):
         if (id not in read_one_fastqs[i]) or (id not in read_two_fastqs[i]):
             raise SystemExit(
                 "Error: There's a mismatch between "
-                "read group IDs and FastQ file names"
+                "read group IDs and FASTQ file names"
             )
 
 
@@ -102,12 +102,12 @@ def write_outfiles(
     parser.add_argument(
         "--read_one_fastqs",
         required=True,
-        help="Comma delimited (without spaces) list of read one FastQ file paths",
+        help="Comma delimited (without spaces) list of read one FASTQ file paths",
     )
     parser.add_argument(
         "--read_two_fastqs",
         required=False,
-        help="Comma delimited (without spaces) list of read two FastQ file paths",
+        help="Comma delimited (without spaces) list of read two FASTQ file paths",
     )
     parser.add_argument(
         "--read_groups",
@@ -126,7 +126,7 @@ def write_outfiles(
     if args.read_two_fastqs:
         if len(read_one_fastqs) != len(read_two_fastqs):
             raise argparse.ArgumentError(
-                "Must have the same number of read one FastQs as read two FastQs"
+                "Must have the same number of read one FASTQs as read two FASTQs"
             )
 
     sorted_read_one_fastqs = sort_fastqs(read_one_fastqs)

diff --git a/template/common-parameter-meta.txt b/template/common-parameter-meta.txt
@@ -9,7 +9,7 @@ PERMITTED META DESCRIPTIONS:
         max_retries: "<for a workflow> Number of times to retry failed steps. Overrides task level defaults."
         outfile_name: "Name for the <type of file> file"
         prefix: "Prefix for the <type of file> file. The extension `<extension>` will be added."
-        gtf: "GTF features file. Either gzipped or uncompressed."
+        gtf: "GTF features file. Gzipped or uncompressed."
         gtf_gz: "Gzipped GTF features file"
         bam: "Input BAM format file to <brief description of task>"
         bam_index: "BAM index file corresponding to the input BAM"
@@ -23,10 +23,10 @@ PERMITTED META DESCRIPTIONS:
                 'Unstranded'
             ]
         },
-        read_one_fastq_gz: "Gzipped FastQ file with 1st reads in pair"
-        read_two_fastq_gz: "Gzipped FastQ file with 2nd reads in pair"
-        read_one_fastqs_gz: "Array of gzipped FastQ files with 1st reads in pair"
-        read_two_fastqs_gz: "Array of gzipped FastQ files with 2nd reads in pair"
+        read_one_fastq_gz: "Gzipped FASTQ file with 1st reads in pair"
+        read_two_fastq_gz: "Gzipped FASTQ file with 2nd reads in pair"
+        read_one_fastqs_gz: "Array of gzipped FASTQ files with 1st reads in pair"
+        read_two_fastqs_gz: "Array of gzipped FASTQ files with 2nd reads in pair"
         paired_end: "Is the data paired-end?"
         subsample_n_reads: "Only process a random sampling of `n` reads. Any `n`<=`0` for processing entire input."
 

diff --git a/template/task-templates.wdl b/template/task-templates.wdl
@@ -1,6 +1,6 @@
 ## # WDL tool template
 
-version 1.0
+version 1.1
 
 task static_disk_and_ram_task {
     meta {
@@ -28,8 +28,8 @@ task static_disk_and_ram_task {
     }
 
     runtime {
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: ""
         maxRetries: max_retries
     }
@@ -66,8 +66,8 @@ task dynamic_disk_and_ram_task {
     }
 
     runtime {
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: ""
         maxRetries: max_retries
     }
@@ -109,8 +109,8 @@ task use_all_cores_task {
 
     runtime {
         cpu: ncpu
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: ""
         maxRetries: max_retries
     }
@@ -159,8 +159,8 @@ task localize_files_task {
     }
 
     runtime {
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: ""
         maxRetries: max_retries
     }
@@ -194,8 +194,8 @@ task outfile_name_task {
     }
 
     runtime {
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: ""
         maxRetries: max_retries
     }
@@ -229,8 +229,8 @@ task prefix_task {
     }
 
     runtime {
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: ""
         maxRetries: max_retries
     }
@@ -271,8 +271,8 @@ task string_choices_task {
     }
 
     runtime {
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: ""
         maxRetries: max_retries
     }

diff --git a/tools/bwa.wdl b/tools/bwa.wdl
@@ -3,15 +3,15 @@
 ## This WDL file wraps [BWA](https://github.com/lh3/bwa).
 ## BWA aligns sequencing reads to a reference genome.
 
-version 1.0
+version 1.1
 
 task bwa_aln {
     meta {
-        description: "This WDL task maps single-end FastQ files to BAM format using bwa aln."
+        description: "This WDL task maps single-end FASTQ files to BAM format using bwa aln."
     }
 
     parameter_meta {
-        fastq: "Input FastQ file to align with bwa"
+        fastq: "Input FASTQ file to align with bwa"
         bwa_db_tar_gz: "Gzipped tar archive of the bwa reference files. Files should be at the root of the archive."
     }
 
@@ -70,21 +70,21 @@ task bwa_aln {
 
     runtime {
         cpu: ncpu
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: 'ghcr.io/stjudecloud/bwa:0.7.17-0'
         maxRetries: max_retries
     }
 }
 
 task bwa_aln_pe {
     meta {
-        description: "This WDL task maps paired-end FastQ files to BAM format using bwa aln."
+        description: "This WDL task maps paired-end FASTQ files to BAM format using bwa aln."
     }
 
     parameter_meta {
-        read_one_fastq_gz: "Input FastQ read 1 file to align with bwa"
-        read_two_fastq_gz: "Input FastQ read 2 file to align with bwa"
+        read_one_fastq_gz: "Input FASTQ read 1 file to align with bwa"
+        read_two_fastq_gz: "Input FASTQ read 2 file to align with bwa"
         bwa_db_tar_gz: "Gzipped tar archive of the bwa reference files. Files should be at the root of the archive."
     }
 
@@ -147,20 +147,20 @@ task bwa_aln_pe {
 
     runtime {
         cpu: ncpu
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: 'ghcr.io/stjudecloud/bwa:0.7.17-0'
         maxRetries: max_retries
     }
 }
 
 task bwa_mem {
     meta {
-        description: "This WDL task maps FastQ files to BAM format using bwa mem."
+        description: "This WDL task maps FASTQ files to BAM format using bwa mem."
     }
 
     parameter_meta {
-        fastq: "Input FastQ file to align with bwa"
+        fastq: "Input FASTQ file to align with bwa"
         bwa_db_tar_gz: "Gzipped tar archive of the bwa reference files. Files should be at the root of the archive."
     }
 
@@ -217,8 +217,8 @@ task bwa_mem {
 
     runtime {
         cpu: ncpu
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: 'ghcr.io/stjudecloud/bwa:0.7.17-0'
         maxRetries: max_retries
     }
@@ -251,7 +251,7 @@ task build_bwa_db {
 
         ref_fasta=~{basename(reference_fasta, ".gz")}
         gunzip -c ~{reference_fasta} > "$ref_fasta" \
-            || ln -s ~{reference_fasta} "$ref_fasta"
+            || ln -sf ~{reference_fasta} "$ref_fasta"
 
         bwa index "$ref_fasta"
 
@@ -263,8 +263,8 @@ task build_bwa_db {
     }
 
     runtime {
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: 'ghcr.io/stjudecloud/bwa:0.7.17-0'
         maxRetries: max_retries
     }
@@ -295,8 +295,8 @@ task format_rg_for_bwa {
     }
 
     runtime {
-        memory: memory_gb + " GB"
-        disk: disk_size_gb + " GB"
+        memory: "~{memory_gb} GB"
+        disk: "~{disk_size_gb} GB"
         docker: 'ghcr.io/stjudecloud/util:1.3.0'
         maxRetries: max_retries
     }