From 3f5f65981c5ce1c0a337dccc69304ba798b2f0ac Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 12 Mar 2021 12:13:40 +0100 Subject: [PATCH 1/3] Add `--qualitymax` to AdapterRemoval --- CHANGELOG.md | 3 ++- main.nf | 18 +++++++++--------- nextflow_schema.json | 25 +++++++------------------ 3 files changed, 18 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bc5a6a2a..fafddaa16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` -- [#676](https://github.com/nf-core/eager/issues/676) - Added Lib Checks and automatic help message / summary message formatting +- [#676](https://github.com/nf-core/eager/issues/676) - Refactor help message / summary message formatting to automatic versions using nf-core library +- [#682](https://github.com/nf-core/eager/issues/682) - Add AdapterRemoval `--qualitymax` flag to allow FASTQ Phred score range max more than 41 ### `Fixed` diff --git a/main.nf b/main.nf index 1f127c4dd..fc5f2d553 100644 --- a/main.nf +++ b/main.nf @@ -856,7 +856,7 @@ process adapter_removal { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} cat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz @@ -872,7 +872,7 @@ process adapter_removal { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} cat *.collapsed.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz @@ -887,7 +887,7 @@ process adapter_removal { } else if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && params.mergedonly && !params.preserve5p ) { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} cat *.collapsed.gz *.collapsed.truncated.gz > output/${base}.pe.combined.tmp.fq.gz @@ -902,7 +902,7 @@ process adapter_removal { } else if ( seqtype == 'PE' && !params.skip_collapse && !params.skip_trim && params.mergedonly && params.preserve5p ) { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz @@ -918,7 +918,7 @@ process adapter_removal { } else if ( seqtype == 'PE' && !params.skip_collapse && params.skip_trim && !params.mergedonly ) { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --collapse ${preserve5p} --adapter1 "" --adapter2 "" + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --adapter1 "" --adapter2 "" cat *.collapsed.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz @@ -934,7 +934,7 @@ process adapter_removal { } else if ( seqtype == 'PE' && !params.skip_collapse && params.skip_trim && params.mergedonly ) { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --collapse ${preserve5p} --adapter1 "" --adapter2 "" + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --adapter1 "" --adapter2 "" cat *.collapsed.gz > output/${base}.pe.combined.tmp.fq.gz @@ -949,7 +949,7 @@ process adapter_removal { } else if ( seqtype == 'PE' && params.skip_collapse && !params.skip_trim ) { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} mv ${base}.pe.pair*.truncated.gz *.settings output/ """ @@ -957,14 +957,14 @@ process adapter_removal { //SE, collapse not possible, trim reads only """ mkdir -p output - AdapterRemoval --file1 ${r1} --basename ${base}.se --gzip --threads ${task.cpus} ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --basename ${base}.se --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} mv *.settings *.se.truncated.gz output/ """ } else if ( seqtype != 'PE' && params.skip_trim ) { //SE, collapse not possible, trim reads only """ mkdir -p output - AdapterRemoval --file1 ${r1} --basename ${base}.se --gzip --threads ${task.cpus} ${preserve5p} --adapter1 "" --adapter2 "" + AdapterRemoval --file1 ${r1} --basename ${base}.se --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${preserve5p} --adapter1 "" --adapter2 "" mv *.settings *.se.truncated.gz output/ """ } diff --git a/nextflow_schema.json b/nextflow_schema.json index 10683154d..53065b024 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -181,17 +181,6 @@ "copyNoFollow", "move" ] - }, - "saveTrimmed": { - "type": "boolean", - "default": true, - "description": "Turn this on if you want to keep trimmed reads.", - "hidden": true - }, - "saveAlignedIntermediates": { - "type": "boolean", - "description": "Turn this on if you want to keep intermediate alignment files (SAM, BAM, non-dedupped BAM)", - "hidden": true } }, "fa_icon": "fas fa-cloud-download-alt" @@ -511,6 +500,13 @@ "description": "Only use merged reads downstream (un-merged reads and singletons are discarded).", "fa_icon": "fas fa-handshake", "help_text": "Specify that only merged reads are sent downstream for analysis.\n\nSingletons (i.e. reads missing a pair), or un-merged reads (where there wasn't sufficient overlap) are discarded.\n\nYou may want to use this if you want ensure only the best quality reads for your analysis, but with the penalty of potentially losing still valid data (even if some reads have slightly lower quality). It is highly recommended when using `--dedupper 'dedup'` (see below)." + }, + "qualitymax": { + "type": "integer", + "description": "Specify the maximum Phred score used in input FASTQ files", + "help_text": "Specify maximum Phred score of the quality field of FASTQ files. The quality-score range can vary depending on the machine and version (e.g. see diagram [here](https://en.wikipedia.org/wiki/FASTQ_format#Encoding), and this allows you to increase from the default AdapterRemoval value of `41`.\n\n> Modifies AdapterRemoval parameters: `--qualitymax`", + "default": "41", + "fa_icon": "fas fa-arrow-up" } }, "fa_icon": "fas fa-cut", @@ -605,7 +601,6 @@ "bt2n": { "type": "integer", "description": "Specify the -N parameter for bowtie2 (mismatches in seed). This will override defaults from alignmode/sensitivity.", - "default": 0, "fa_icon": "fas fa-sort-numeric-down", "help_text": "The number of mismatches allowed in the seed during seed-and-extend procedure of Bowtie2. This will override any values set with `--bt2_sensitivity`. Can either be 0 or 1. Default: 0 (i.e. use`--bt2_sensitivity` defaults).\n\n> Modifies Bowtie2 parameters: `-N`", "enum": [ @@ -615,21 +610,18 @@ }, "bt2l": { "type": "integer", - "default": 0, "description": "Specify the -L parameter for bowtie2 (length of seed substrings). This will override defaults from alignmode/sensitivity.", "fa_icon": "fas fa-ruler-horizontal", "help_text": "The length of the seed sub-string to use during seeding. This will override any values set with `--bt2_sensitivity`. Default: 0 (i.e. use`--bt2_sensitivity` defaults: [20 for local and 22 for end-to-end](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#command-line).\n\n> Modifies Bowtie2 parameters: `-L`" }, "bt2_trim5": { "type": "integer", - "default": 0, "description": "Specify number of bases to trim off from 5' (left) end of read before alignment.", "fa_icon": "fas fa-cut", "help_text": "Number of bases to trim at the 5' (left) end of read prior alignment. Maybe useful when left-over sequencing artefacts of in-line barcodes present Default: 0\n\n> Modifies Bowtie2 parameters: `-bt2_trim5`" }, "bt2_trim3": { "type": "integer", - "default": 0, "description": "Specify number of bases to trim off from 3' (right) end of read before alignment.", "fa_icon": "fas fa-cut", "help_text": "Number of bases to trim at the 3' (right) end of read prior alignment. Maybe useful when left-over sequencing artefacts of in-line barcodes present Default: 0.\n\n> Modifies Bowtie2 parameters: `-bt2_trim3`" @@ -680,14 +672,12 @@ }, "bam_mapping_quality_threshold": { "type": "integer", - "default": 0, "description": "Minimum mapping quality for reads filter.", "fa_icon": "fas fa-greater-than-equal", "help_text": "Specify a mapping quality threshold for mapped reads to be kept for downstream analysis. By default keeps all reads and is therefore set to `0` (basically doesn't filter anything).\n\n> Modifies samtools view parameter: `-q`" }, "bam_filter_minreadlength": { "type": "integer", - "default": 0, "fa_icon": "fas fa-ruler-horizontal", "description": "Specify minimum read length to be kept after mapping.", "help_text": "Specify minimum length of mapped reads. This filtering will apply at the same time as mapping quality filtering.\n\nIf used _instead_ of minimum length read filtering at AdapterRemoval, this can be useful to get more realistic endogenous DNA percentages, when most of your reads are very short (e.g. in single-stranded libraries) and would otherwise be discarded by AdapterRemoval (thus making an artificially small denominator for a typical endogenous DNA calculation). Note in this context you should not perform mapping quality filtering nor discarding of unmapped reads to ensure a correct denominator of all reads, for the endogenous DNA calculation.\n\n> Modifies filter_bam_fragment_length.py parameter: `-l`" @@ -1046,7 +1036,6 @@ }, "freebayes_g": { "type": "integer", - "default": 0, "description": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than specified in --freebayes_C.", "fa_icon": "fab fa-think-peaks", "help_text": "Specify to skip over regions of high depth by discarding alignments overlapping positions where total read depth is greater than specified C. Not set by default.\n\n> Modifies freebayes parameter: `-g`" From 89d86844e2283cb1f403bf4540498f4955886346 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 12 Mar 2021 12:18:38 +0100 Subject: [PATCH 2/3] Remove accidently duplicated qualitymax param in AR command --- main.nf | 2 +- nextflow.config | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index fc5f2d553..081bbd33a 100644 --- a/main.nf +++ b/main.nf @@ -856,7 +856,7 @@ process adapter_removal { """ mkdir -p output - AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} + AdapterRemoval --file1 ${r1} --file2 ${r2} --basename ${base}.pe --gzip --threads ${task.cpus} --qualitymax ${params.qualitymax} --collapse ${preserve5p} --trimns --trimqualities --adapter1 ${params.clip_forward_adaptor} --adapter2 ${params.clip_reverse_adaptor} --minlength ${params.clip_readlength} --minquality ${params.clip_min_read_quality} --minadapteroverlap ${params.min_adap_overlap} cat *.collapsed.gz *.collapsed.truncated.gz *.singleton.truncated.gz *.pair1.truncated.gz *.pair2.truncated.gz > output/${base}.pe.combined.tmp.fq.gz diff --git a/nextflow.config b/nextflow.config index d9f7a32b8..77391e562 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,6 +67,7 @@ params { skip_trim = false preserve5p = false mergedonly = false + qualitymax = 41 //Mapping algorithm mapper = 'bwaaln' From c0b47963f6e560299909333f80a6f0d337f90647 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Fri, 12 Mar 2021 12:26:26 +0100 Subject: [PATCH 3/3] Fix new param JSON schema type --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 53065b024..328b11dcb 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -505,7 +505,7 @@ "type": "integer", "description": "Specify the maximum Phred score used in input FASTQ files", "help_text": "Specify maximum Phred score of the quality field of FASTQ files. The quality-score range can vary depending on the machine and version (e.g. see diagram [here](https://en.wikipedia.org/wiki/FASTQ_format#Encoding), and this allows you to increase from the default AdapterRemoval value of `41`.\n\n> Modifies AdapterRemoval parameters: `--qualitymax`", - "default": "41", + "default": 41, "fa_icon": "fas fa-arrow-up" } },