diff --git a/modules.json b/modules.json index 9f8734c97..677ab814f 100644 --- a/modules.json +++ b/modules.json @@ -97,7 +97,7 @@ }, "picard/markduplicates": { "branch": "master", - "git_sha": "ec833ac4c29db6005d18baccf3306f557c46b006", + "git_sha": "1943aa60f7490c3d6740e8872e6e69122ccc8087", "installed_by": ["bam_markduplicates_picard"] }, "preseq/lcextrap": { @@ -280,7 +280,7 @@ }, "bam_markduplicates_picard": { "branch": "master", - "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "git_sha": "1943aa60f7490c3d6740e8872e6e69122ccc8087", "installed_by": ["subworkflows"] }, "bam_rseqc": { diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf index 80930cc41..ad0b29636 100644 --- a/modules/nf-core/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -8,13 +8,14 @@ process PICARD_MARKDUPLICATES { 'biocontainers/picard:3.1.1--hdfd78af_0' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(reads) tuple val(meta2), path(fasta) tuple val(meta3), path(fai) output: - tuple val(meta), path("*.bam") , emit: bam - tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.bai") , emit: bai, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true tuple val(meta), path("*.metrics.txt"), emit: metrics path "versions.yml" , emit: versions @@ -24,6 +25,8 @@ process PICARD_MARKDUPLICATES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" def avail_mem = 3072 if (!task.memory) { log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -31,16 +34,16 @@ process PICARD_MARKDUPLICATES { avail_mem = (task.memory.mega*0.8).intValue() } - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ picard \\ -Xmx${avail_mem}M \\ MarkDuplicates \\ $args \\ - --INPUT $bam \\ - --OUTPUT ${prefix}.bam \\ - --REFERENCE_SEQUENCE $fasta \\ + --INPUT $reads \\ + --OUTPUT ${prefix}.${suffix} \\ + $reference \\ --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt cat <<-END_VERSIONS > versions.yml @@ -51,10 +54,10 @@ process PICARD_MARKDUPLICATES { stub: def prefix = task.ext.prefix ?: "${meta.id}" - if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ - touch ${prefix}.bam - touch ${prefix}.bam.bai + touch ${prefix}.${suffix} touch ${prefix}.MarkDuplicates.metrics.txt cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml index 1ab90c075..1f0ffe16c 100644 --- a/modules/nf-core/picard/markduplicates/meta.yml +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -21,9 +21,9 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - reads: type: file - description: BAM file + description: Sequence reads file, can be SAM/BAM/CRAM format pattern: "*.{bam,cram,sam}" - meta2: type: map @@ -32,7 +32,7 @@ input: e.g. [ id:'genome' ] - fasta: type: file - description: Reference genome fasta file + description: Reference genome fasta file, required for CRAM input pattern: "*.{fasta,fa}" - meta3: type: map @@ -57,6 +57,10 @@ output: type: file description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag pattern: "*.{bai}" + - cram: + type: file + description: Output CRAM file + pattern: "*.{cram}" - metrics: type: file description: Duplicate metrics file generated by picard diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test b/modules/nf-core/picard/markduplicates/tests/main.nf.test index f0caf0fa3..e3e97f6cc 100644 --- a/modules/nf-core/picard/markduplicates/tests/main.nf.test +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test @@ -4,6 +4,10 @@ nextflow_process { script "../main.nf" process "PICARD_MARKDUPLICATES" config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/markduplicates" test("sarscov2 [unsorted bam]") { @@ -14,14 +18,8 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) ]) - input[1] = Channel.of([ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[2] = Channel.of([ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) - ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] """ } } @@ -45,14 +43,8 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) ]) - input[1] = Channel.of([ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) - ]) - input[2] = Channel.of([ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) - ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] """ } } @@ -91,7 +83,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.bam[0][1]).name).match("cram_name") }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_name") }, { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("cram_metrics") }, { assert snapshot(process.out.versions).match("cram_versions") } ) diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap index 31c9130dc..eb17111e4 100644 --- a/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap @@ -5,39 +5,59 @@ "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" ] ], - "timestamp": "2024-01-19T10:26:45.092349" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:31:50.928021" }, "unsorted_bam_name": { "content": [ "test.marked.bam" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2024-01-19T10:26:28.100755" }, "cram_metrics": { "content": [ [ "## htsjdk.samtools.metrics.StringHeader", - "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.marked.cram --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", "## htsjdk.samtools.metrics.StringHeader" ] ], - "timestamp": "2024-01-19T10:27:03.253071" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:25:47.518152" }, "sorted_bam_metrics": { "content": [ [ "## htsjdk.samtools.metrics.StringHeader", - "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", "## htsjdk.samtools.metrics.StringHeader" ] ], - "timestamp": "2024-01-19T10:26:45.086503" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T11:39:10.318331" }, "cram_name": { "content": [ - "test.marked.bam" + "test.marked.cram" ], - "timestamp": "2024-01-19T10:27:03.241617" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:25:47.459663" }, "cram_versions": { "content": [ @@ -45,6 +65,10 @@ "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" ] ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2024-01-19T10:27:03.26989" }, "unsorted_bam_versions": { @@ -53,22 +77,34 @@ "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" ] ], - "timestamp": "2024-01-19T10:26:28.159071" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:31:24.040403" }, "unsorted_bam_metrics": { "content": [ [ "## htsjdk.samtools.metrics.StringHeader", - "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", "## htsjdk.samtools.metrics.StringHeader" ] ], - "timestamp": "2024-01-19T10:26:28.143979" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T10:51:12.831787" }, "sorted_bam_name": { "content": [ "test.marked.bam" ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, "timestamp": "2024-01-19T10:26:45.080116" } } \ No newline at end of file diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf index de8130fb5..2de059b84 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/main.nf +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -9,7 +9,7 @@ include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' workflow BAM_MARKDUPLICATES_PICARD { take: - ch_bam // channel: [ val(meta), path(bam) ] + ch_reads // channel: [ val(meta), path(reads) ] ch_fasta // channel: [ path(fasta) ] ch_fai // channel: [ path(fai) ] @@ -17,27 +17,33 @@ workflow BAM_MARKDUPLICATES_PICARD { ch_versions = Channel.empty() - PICARD_MARKDUPLICATES ( ch_bam, ch_fasta, ch_fai ) + PICARD_MARKDUPLICATES ( ch_reads, ch_fasta, ch_fai ) ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) - SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) + ch_markdup = PICARD_MARKDUPLICATES.out.bam.mix(PICARD_MARKDUPLICATES.out.cram) + + SAMTOOLS_INDEX ( ch_markdup ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - ch_bam_bai = PICARD_MARKDUPLICATES.out.bam - .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) - .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) - .map{meta, bam, bai, csi -> - if (bai) [ meta, bam, bai ] - else [ meta, bam, csi ] + ch_reads_index = ch_markdup + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.crai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map{meta, reads, bai, crai, csi -> + if (bai) [ meta, reads, bai ] + else if (crai) [ meta, reads, crai ] + else [ meta, reads, csi ] } - BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + BAM_STATS_SAMTOOLS ( ch_reads_index, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] - metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(bam) ] + cram = PICARD_MARKDUPLICATES.out.cram // channel: [ val(meta), path(cram) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ] bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + crai = SAMTOOLS_INDEX.out.crai // channel: [ val(meta), path(crai) ] csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml index fe63068e6..433d35b2b 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -14,13 +14,13 @@ components: - samtools/flagstat - bam_stats_samtools input: - - ch_bam: + - ch_reads: description: | - BAM/CRAM/SAM file - Structure: [ val(meta), path(bam) ] + Sequence reads in BAM/CRAM/SAM format + Structure: [ val(meta), path(reads) ] - ch_fasta: description: | - Reference genome fasta file + Reference genome fasta file required for CRAM input Structure: [ path(fasta) ] - ch_fasta: description: | @@ -29,12 +29,20 @@ input: output: - bam: description: | - processed BAM/CRAM/SAM file + processed BAM/SAM file Structure: [ val(meta), path(bam) ] - bai: description: | - BAM/CRAM/SAM samtools index + BAM/SAM samtools index Structure: [ val(meta), path(bai) ] + - cram: + description: | + processed CRAM file + Structure: [ val(meta), path(cram) ] + - crai: + description: | + CRAM samtools index + Structure: [ val(meta), path(crai) ] - csi: description: | CSI samtools index diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test index bb3ef8f4d..5ef337dc5 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test @@ -4,9 +4,19 @@ nextflow_workflow { script "../main.nf" workflow "BAM_MARKDUPLICATES_PICARD" - tag "PICARD_MARKDUPLICATES" - tag "SAMTOOLS_INDEX" - tag "BAM_STATS_SAMTOOLS" + tag "picard" + tag "picard/markduplicates" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_markduplicates_picard" + tag "subworkflows/bam_markduplicates_picard" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/index" + tag "samtools/stats" test("sarscov2 - bam") { @@ -69,8 +79,8 @@ nextflow_workflow { assertAll( { assert workflow.success}, { assert snapshot( - path(workflow.out.bam[0][1]), - path(workflow.out.bai[0][1]), + file(workflow.out.cram[0][1]).name, + path(workflow.out.crai[0][1]), path(workflow.out.flagstat[0][1]), path(workflow.out.idxstats[0][1]), path(workflow.out.stats[0][1]), diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap index 2f02f747a..caf4ac8ad 100644 --- a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap @@ -1,17 +1,17 @@ { "homo_sapiens - cram": { "content": [ - "test.bam:md5,6641dc05efa8384a061f378d86d922cd", - "test.bam.bai:md5,c41c60d8a94adebe53b6df80b6e90d38", + "test.cram", + "test.cram.crai:md5,78d47ba01ac4e05f3ae1e353902a989e", "test.flagstat:md5,93b0ef463df947ede1f42ff60396c34d", "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15", - "test.stats:md5,9ac28e327a7797d7bb6a5922fde59ed1" + "test.stats:md5,c2f74a4d9b2377bcf4f4f184da3801af" ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.10.1" }, - "timestamp": "2024-02-13T16:46:16.302755774" + "timestamp": "2024-03-20T20:45:38.364189" }, "sarscov2 - bam": { "content": [ @@ -23,8 +23,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.10.1" }, - "timestamp": "2024-02-13T16:46:02.942115679" + "timestamp": "2024-03-21T11:38:08.434529" } } \ No newline at end of file