From e9e86d7d4f4550b72fa5a4260ea6a5f1d1fad5e0 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 21 Jan 2019 14:54:03 +0100 Subject: [PATCH 1/8] Add changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d953aa88c..fca3bf8b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unpublished / Dev Branch] +### `Added` +* [#129](https://github.com/nf-core/eager/pull/129) - Support BAM files as [input format](https://github.com/nf-core/eager/issues/41) + ## [2.0.4] - 2019-01-09 ### `Added` From 6db3057304248fc4570a3ee6dff349352eef8e7a Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 21 Jan 2019 14:54:46 +0100 Subject: [PATCH 2/8] Add defaults for convertBam --- conf/base.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index 2f206c128..c65d74400 100644 --- a/conf/base.config +++ b/conf/base.config @@ -28,7 +28,10 @@ process { //errorStrategy = 'ignore' } - + withName:convertBam { + cpus = { check_max(8 * task.attempt, 'cpus') } + } + withName:bwa { memory = { check_max( 16.GB * task.attempt, 'memory' ) } cpus = { check_max(8 * task.attempt, 'cpus') } From 04581145781d4a6161f0ae6d1be61d0c9e0864ac Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 21 Jan 2019 15:34:49 +0100 Subject: [PATCH 3/8] Started working on BAM input possibilities --- conf/testbam.config | 22 ++++++++++ main.nf | 100 ++++++++++++++++++++++++++++++++------------ nextflow.config | 4 +- 3 files changed, 98 insertions(+), 28 deletions(-) create mode 100644 conf/testbam.config diff --git a/conf/testbam.config b/conf/testbam.config new file mode 100644 index 000000000..7b7012ed6 --- /dev/null +++ b/conf/testbam.config @@ -0,0 +1,22 @@ +/* + * ------------------------------------------------- + * Nextflow config file for running tests + * ------------------------------------------------- + * Defines bundled input files and everything required + * to run a fast and simple test. Use as follows: + * nextflow run nf-core/eager -profile test, docker (or singularity, or conda) + */ + +params { + max_cpus = 2 + max_memory = 6.GB + max_time = 48.h + genome = "Custom" + //Input data + readPaths = [ + 'https://github.com/nf-core/test-datasets/raw/eager2/testdata/Mammoth/bam/test1.bam', + 'https://github.com/nf-core/test-datasets/raw/eager2/testdata/Mammoth/bam/test2.bam' + ] + // Genome references + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Mammoth_MT_Krause.fasta' +} diff --git a/main.nf b/main.nf index 901488a4b..1fa336f74 100644 --- a/main.nf +++ b/main.nf @@ -29,6 +29,7 @@ def helpMessage() { -profile Hardware config to use (e.g. standard, docker, singularity, conda, aws). Ask your system admin if unsure, or check documentatoin. --singleEnd Specifies that the input is single end reads (required if not pairedEnd) --pairedEnd Specifies that the input is paired end reads (required if not singleend) + --bam Specifies that the input is in BAM format --fasta Path to Fasta reference (required if not iGenome reference) --genome Name of iGenomes reference (required if not fasta reference) @@ -263,9 +264,9 @@ if( params.bwa_index && (params.aligner == 'bwa' | params.bwamem)){ } //Validate that either pairedEnd or singleEnd has been specified by the user! -if( params.singleEnd || params.pairedEnd ){ +if( params.singleEnd || params.pairedEnd || params.bam){ } else { - exit 1, "Please specify either --singleEnd or --pairedEnd to execute the pipeline!" + exit 1, "Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously mapped BAM files!" } @@ -289,34 +290,51 @@ if( !(workflow.runName ==~ /[a-z]+_[a-z]+/) ){ * Dump can be used for debugging purposes, e.g. using the -dump-channels operator on run */ -if(params.readPaths){ - if(params.singleEnd){ +if( params.readPaths ){ + if( params.singleEnd && !params.bam) { Channel - .from(params.readPaths) - .map { row -> [ row[0], [file(row[1][0])]] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .dump(tag:'input') + .from( params.readPaths ) + .map { row -> [ row[0], [ file( row[1][0] ) ] ] } + .ifEmpty { exit 1, "params.readPaths or params.bams was empty - no input files supplied!" } .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering } - - } else { + } else if (!params.bam){ Channel - .from(params.readPaths) - .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } - .ifEmpty { exit 1, "params.readPaths was empty - no input files supplied" } - .dump(tag:'input') + .from( params.readPaths ) + .map { row -> [ row[0], [ file( row[1][0] ), file( row[1][1] ) ] ] } + .ifEmpty { exit 1, "params.readPaths or params.bams was empty - no input files supplied!" } .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering } - + } else { + Channel + .from( params.readPaths ) + .ifEmpty { exit 1, "params.readPaths or params.bams was empty - no input files supplied!" } + .dump() + .into { ch_bam_to_fastq_convert } + + //Set up clean channels + ch_read_files_fastqc = Channel.empty() + ch_read_files_complexity_filtering = Channel.empty() + ch_read_files_clip = Channel.empty() } -} else { - Channel +} else if (!params.bam){ + Channel .fromFilePairs( params.reads, size: params.singleEnd ? 1 : 2 ) - .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs to be enclosed in quotes!\nNB: Path requires at least one * wildcard!\nIf this is single-end data, please specify --singleEnd on the command line." } - .dump(tag:'input') + .ifEmpty { exit 1, "Cannot find any reads matching: ${params.reads}\nNB: Path needs" + + "to be enclosed in quotes!\nNB: Path requires at least one * wildcard!\nIf this is single-end data, please specify --singleEnd on the command line." } .into { ch_read_files_clip; ch_read_files_fastqc; ch_read_files_complexity_filtering } - -} - +} else { + Channel + .fromPath( params.reads ) + .ifEmpty { exit 1, "Cannot find any bam file matching: ${params.reads}\nNB: Path needs" + + "to be enclosed in quotes!\n" } + .dump() //For debugging purposes + .into { ch_bam_to_fastq_convert } + + //Set up clean channels + ch_read_files_fastqc = Channel.empty() + ch_read_files_complexity_filtering = Channel.empty() + ch_read_files_clip = Channel.empty() +} // Header log info log.info "=========================================" @@ -457,6 +475,30 @@ process makeSeqDict { """ } +/* +* Convert BAM to FastQ if BAM input is specified instead of FastQ file(s) +* +*/ + +process convertBam { + tag "$bam" + + when: params.bam + + input: + file bam from ch_bam_to_fastq_convert + + output: + set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp) + file(reads) into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) + + script: + base = bam - '.bam' + """ + samtools fastq -tn ${bam} | pigz -p ${task.cpus} > ${base}.fastq.gz + """ +} + /* @@ -468,7 +510,7 @@ process fastqc { saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} input: - set val(name), file(reads) from ch_read_files_fastqc + set val(name), file(reads) from ch_read_files_fastqc.mix(ch_read_files_converted_fastqc) output: file "*_fastqc.{zip,html}" into ch_fastqc_results @@ -492,7 +534,7 @@ process fastp { when: params.complexity_filter input: - set val(name), file(reads) from ch_read_files_complexity_filtering + set val(name), file(reads) from ch_read_files_complexity_filtering.mix(ch_read_files_converted_fastp) output: set val(name), file("*pG.fq.gz") into ch_clipped_reads_complexity_filtered @@ -520,6 +562,8 @@ process adapter_removal { tag "$name" publishDir "${params.outdir}/read_merging", mode: 'copy' + when: !params.bam + input: set val(name), file(reads) from ( params.complexity_filter ? ch_clipped_reads_complexity_filtered : ch_read_files_clip ) @@ -557,6 +601,8 @@ process fastqc_after_clipping { publishDir "${params.outdir}/FastQC/after_clipping", mode: 'copy', saveAs: {filename -> filename.indexOf(".zip") > 0 ? "zips/$filename" : "$filename"} + when: !params.bam + input: file(reads) from ch_clipped_reads_for_fastqc @@ -580,7 +626,7 @@ process bwa { when: !params.circularmapper && !params.bwamem input: - file(reads) from ch_clipped_reads + file(reads) from ch_clipped_reads.mix(ch_read_files_converted_mapping_bwa) file "*" from ch_bwa_index.mix(ch_bwa_index_existing).collect() file fasta from ch_fasta_for_bwa_mapping @@ -630,7 +676,7 @@ process circularmapper{ when: params.circularmapper input: - file reads from ch_clipped_reads_circularmapper + file reads from ch_clipped_reads_circularmapper.mix(ch_read_files_converted_mapping_cm) file fasta from ch_fasta_for_circularmapper file "*" from ch_circularmapper_indices @@ -657,7 +703,7 @@ process bwamem { when: params.bwamem && !params.circularmapper input: - file(reads) from ch_clipped_reads_bwamem + file(reads) from ch_clipped_reads_bwamem.mix(ch_read_files_converted_mapping_bwamem) file "*" from ch_bwa_index_bwamem.mix(ch_bwa_index_bwamem_existing).collect() file fasta from ch_fasta_for_bwamem_mapping diff --git a/nextflow.config b/nextflow.config index a0d265081..d9b5d0d36 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,7 +10,7 @@ // Global default params, used in configs params { - container = 'nfcore/eager:2.0.4' + container = 'nfcore/eager:latest' //Pipeline options aligner = 'bwa' @@ -22,6 +22,7 @@ params { outdir = './results' tracedir = "${params.outdir}/pipeline_info" readPaths = false + bam = false //More defaults complexity_filter = false @@ -54,6 +55,7 @@ profiles { singularity.enabled = true } test { includeConfig 'conf/test.config' } + testbam { includeConfig 'conf/testbam.config' } } // Load igenomes.config if required From 6f1e62efbc55387a66796e5f2e4bdb7b4e1e0fa7 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 21 Jan 2019 15:38:31 +0100 Subject: [PATCH 4/8] Fixing BAM output --- main.nf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 1fa336f74..604c9fe70 100644 --- a/main.nf +++ b/main.nf @@ -306,6 +306,7 @@ if( params.readPaths ){ } else { Channel .from( params.readPaths ) + .map { row -> [ file( row ) ] } .ifEmpty { exit 1, "params.readPaths or params.bams was empty - no input files supplied!" } .dump() .into { ch_bam_to_fastq_convert } @@ -324,6 +325,7 @@ if( params.readPaths ){ } else { Channel .fromPath( params.reads ) + .map { row -> [ file( row ) ] } .ifEmpty { exit 1, "Cannot find any bam file matching: ${params.reads}\nNB: Path needs" + "to be enclosed in quotes!\n" } .dump() //For debugging purposes @@ -490,7 +492,7 @@ process convertBam { output: set val("${base}"), file("*.fastq.gz") into (ch_read_files_converted_fastqc, ch_read_files_converted_fastp) - file(reads) into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) + file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) script: base = bam - '.bam' From c2d153398d2d6aec3d3e5340dd0927e6317f6b8a Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 21 Jan 2019 15:40:56 +0100 Subject: [PATCH 5/8] Add missing statement in conversion --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 604c9fe70..062431600 100644 --- a/main.nf +++ b/main.nf @@ -495,7 +495,7 @@ process convertBam { file("*.fastq.gz") into (ch_read_files_converted_mapping_bwa, ch_read_files_converted_mapping_cm, ch_read_files_converted_mapping_bwamem) script: - base = bam - '.bam' + base = "${bam.baseName}" """ samtools fastq -tn ${bam} | pigz -p ${task.cpus} > ${base}.fastq.gz """ From 61d954493ddf186ed709bbd6280bb452144690fb Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 21 Jan 2019 15:42:15 +0100 Subject: [PATCH 6/8] Add Testcase for Travis CI testing BAM input --- .travis.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index fadae641a..ba2579ddc 100644 --- a/.travis.yml +++ b/.travis.yml @@ -50,5 +50,5 @@ script: - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --bwamem --bwa_index results/reference_genome/bwa_index/ # Test with zipped reference input - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker --pairedEnd --fasta 'https://raw.githubusercontent.com/nf-core/test-datasets/eager2/reference/Test.fasta.gz' - # Test basic pipeline with Conda too - - travis_wait 25 nextflow run ${TRAVIS_BUILD_DIR} -profile test,conda --pairedEnd --bwa_index results/reference_genome/bwa_index/ \ No newline at end of file + # Run the basic pipeline with the bam input profile + - nextflow run ${TRAVIS_BUILD_DIR} -profile testbam,docker --bam \ No newline at end of file From 9190e20e11b2164bdf756e913f6c7badc56cf3d6 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Mon, 21 Jan 2019 15:51:28 +0100 Subject: [PATCH 7/8] Bump versions to keep tests fine --- .travis.yml | 2 +- Dockerfile | 2 +- Singularity | 4 ++-- environment.yml | 2 +- nextflow.config | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index ba2579ddc..57e20c79d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ before_install: # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/eager:dev # Fake the tag locally so that the pipeline runs properly - - docker tag nfcore/eager:dev nfcore/eager:2.0.4 + - docker tag nfcore/eager:dev nfcore/eager:latest install: # Install Nextflow diff --git a/Dockerfile b/Dockerfile index 895b865b8..aac095bf1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,4 +3,4 @@ FROM nfcore/base LABEL description="Docker image containing all requirements for nf-core/eager pipeline" COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-eager-2.0.4/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-eager-2.0.5dev/bin:$PATH diff --git a/Singularity b/Singularity index c9525f0ef..c87369c4f 100644 --- a/Singularity +++ b/Singularity @@ -4,10 +4,10 @@ Bootstrap:docker %labels MAINTAINER Alexander Peltzer DESCRIPTION Container image containing all requirements for the nf-core/eager pipeline - VERSION 2.0.4 + VERSION 2.0.5dev %environment - PATH=/opt/conda/envs/nf-core-eager-2.0.4/bin:$PATH + PATH=/opt/conda/envs/nf-core-eager-2.0.5dev/bin:$PATH export PATH %files diff --git a/environment.yml b/environment.yml index ad61587e4..cbf5fb4f7 100644 --- a/environment.yml +++ b/environment.yml @@ -1,4 +1,4 @@ -name: nf-core-eager-2.0.4 +name: nf-core-eager-2.0.5dev channels: - defaults - bioconda diff --git a/nextflow.config b/nextflow.config index d9b5d0d36..8dd5a0b5b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -87,7 +87,7 @@ manifest { name = 'nf-core/eager' author = 'Alexander Peltzer, Stephen Clayton, James A Fellows-Yates' homePage = 'https://github.com/nf-core/eager' - version = '2.0.4' + version = '2.0.5dev' description = 'A fully reproducible and modern ancient DNA pipeline in Nextflow and with cloud support.' mainScript = 'main.nf' nextflowVersion = '>=0.32.0' From 8c238a02d9d437d5632378cc572e6a2e848c0313 Mon Sep 17 00:00:00 2001 From: Alexander Peltzer Date: Fri, 25 Jan 2019 22:45:18 +0100 Subject: [PATCH 8/8] Add minor change to help text [skip ci] --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index b9ed06664..f985d8706 100644 --- a/main.nf +++ b/main.nf @@ -262,7 +262,7 @@ if( params.bwa_index && (params.aligner == 'bwa' | params.bwamem)){ //Validate that either pairedEnd or singleEnd has been specified by the user! if( params.singleEnd || params.pairedEnd || params.bam){ } else { - exit 1, "Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously mapped BAM files!" + exit 1, "Please specify either --singleEnd, --pairedEnd to execute the pipeline on FastQ files and --bam for previously processed BAM files!" }