From 1fd779d9e4db77507e9862a2206d86c03688a532 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 11 Oct 2022 16:39:01 +0200 Subject: [PATCH 01/31] WIP. Just concatenating germline-vcfs from strelka and hyplotypecaller --- conf/modules.config | 9 ++++ modules.json | 4 ++ modules/nf-core/bcftools/concat/main.nf | 35 ++++++++++++++ modules/nf-core/bcftools/concat/meta.yml | 48 +++++++++++++++++++ .../local/germline_variant_calling.nf | 10 +++- .../main.nf | 13 +++++ .../variantcalling/haplotypecaller/main.nf | 31 +++++++++++- .../variantcalling/strelka/single/main.nf | 23 ++++++++- workflows/sarek.nf | 30 ++++++++++++ 9 files changed, 200 insertions(+), 3 deletions(-) create mode 100644 modules/nf-core/bcftools/concat/main.nf create mode 100644 modules/nf-core/bcftools/concat/meta.yml diff --git a/conf/modules.config b/conf/modules.config index 35d8cd4ba5..d57c8d3729 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1254,6 +1254,15 @@ process{ } } + // CONCAT UNANNOTATED VCFS + withName: 'BCFTOOLS_CONCAT_GERMLINE_VCFS'{ + ext.prefix = { "${meta.id}.germline" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/concat/${meta.id}/" } + ] + } + // VCF QC withName: 'BCFTOOLS_STATS'{ ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } diff --git a/modules.json b/modules.json index a8b793321c..f2a35b9a97 100644 --- a/modules.json +++ b/modules.json @@ -9,6 +9,10 @@ "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" }, + "bcftools/concat": { + "branch": "master", + "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" + }, "bcftools/sort": { "branch": "master", "git_sha": "5e34754d42cd2d5d248ca8673c0a53cdf5624905" diff --git a/modules/nf-core/bcftools/concat/main.nf b/modules/nf-core/bcftools/concat/main.nf new file mode 100644 index 0000000000..d2a58a557a --- /dev/null +++ b/modules/nf-core/bcftools/concat/main.nf @@ -0,0 +1,35 @@ +process BCFTOOLS_CONCAT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + + input: + tuple val(meta), path(vcfs), path(tbi) + + output: + tuple val(meta), path("*.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + bcftools concat \\ + --output ${prefix}.vcf.gz \\ + $args \\ + --threads $task.cpus \\ + ${vcfs} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/concat/meta.yml b/modules/nf-core/bcftools/concat/meta.yml new file mode 100644 index 0000000000..167dbe5a05 --- /dev/null +++ b/modules/nf-core/bcftools/concat/meta.yml @@ -0,0 +1,48 @@ +name: bcftools_concat +description: Concatenate VCF files +keywords: + - variant calling + - concat + - bcftools + - VCF + +tools: + - concat: + description: | + Concatenate VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: files + description: | + List containing 2 or more vcf files + e.g. [ 'file1.vcf', 'file2.vcf' ] + - tbi: + type: files + description: | + List containing 2 or more index files (optional) + e.g. [ 'file1.tbi', 'file2.tbi' ] +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF concatenated output file + pattern: "*.{vcf.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 101c45448d..b445920f7f 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -43,6 +43,9 @@ workflow GERMLINE_VARIANT_CALLING { strelka_vcf = Channel.empty() tiddit_vcf = Channel.empty() + haplotypecaller_vcf_tbi = Channel.empty() + strelka_vcf_tbi = Channel.empty() + // Remap channel with intervals cram_recalibrated_intervals = cram_recalibrated.combine(intervals) .map{ meta, cram, crai, intervals, num_intervals -> @@ -182,7 +185,8 @@ workflow GERMLINE_VARIANT_CALLING { intervals_bed_combined_haplotypec) haplotypecaller_vcf = RUN_HAPLOTYPECALLER.out.filtered_vcf - ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions) + haplotypecaller_vcf_tbi = RUN_HAPLOTYPECALLER.out.filtered_vcf_tbi + ch_versions = ch_versions.mix(RUN_HAPLOTYPECALLER.out.versions) } // MANTA @@ -208,6 +212,7 @@ workflow GERMLINE_VARIANT_CALLING { ) strelka_vcf = RUN_STRELKA_SINGLE.out.strelka_vcf + strelka_vcf_tbi = RUN_STRELKA_SINGLE.out.strelka_vcf_tbi ch_versions = ch_versions.mix(RUN_STRELKA_SINGLE.out.versions) } @@ -232,5 +237,8 @@ workflow GERMLINE_VARIANT_CALLING { strelka_vcf tiddit_vcf + haplotypecaller_vcf_tbi + strelka_vcf_tbi + versions = ch_versions } diff --git a/subworkflows/nf-core/gatk4/single_sample_germline_variant_calling/main.nf b/subworkflows/nf-core/gatk4/single_sample_germline_variant_calling/main.nf index 86e1223782..b7a9905991 100644 --- a/subworkflows/nf-core/gatk4/single_sample_germline_variant_calling/main.nf +++ b/subworkflows/nf-core/gatk4/single_sample_germline_variant_calling/main.nf @@ -59,10 +59,23 @@ workflow GATK_SINGLE_SAMPLE_GERMLINE_VARIANT_CALLING{ ], vcf] } + filtered_vcf_tbi = FILTERVARIANTTRANCHES.out.tbi.map{ meta, tbi -> + [[ + id: meta.sample, + num_intervals: meta.num_intervals, + patient: meta.patient, + sample: meta.sample, + sex: meta.sex, + status: meta.status, + variantcaller: "haplotypecaller" + ], tbi] + } + ch_versions = ch_versions.mix(CNNSCOREVARIANTS.out.versions) ch_versions = ch_versions.mix(FILTERVARIANTTRANCHES.out.versions) emit: versions = ch_versions filtered_vcf + filtered_vcf_tbi } diff --git a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf index e1540cd085..88cab78de3 100644 --- a/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf +++ b/subworkflows/nf-core/variantcalling/haplotypecaller/main.nf @@ -22,6 +22,7 @@ workflow RUN_HAPLOTYPECALLER { ch_versions = Channel.empty() filtered_vcf = Channel.empty() + filtered_vcf_tbi = Channel.empty() HAPLOTYPECALLER( cram, @@ -108,7 +109,34 @@ workflow RUN_HAPLOTYPECALLER { known_sites_indels.concat(known_sites_snps).flatten().unique().collect(), known_sites_indels_tbi.concat(known_sites_snps_tbi).flatten().unique().collect()) - filtered_vcf = SINGLE_SAMPLE.out.filtered_vcf.map{ meta, vcf-> [[patient:meta.patient, sample:meta.sample, status:meta.status, sex:meta.sex, id:meta.sample, num_intervals:meta.num_intervals, variantcaller:"haplotypecaller"], vcf]} + filtered_vcf = SINGLE_SAMPLE.out.filtered_vcf.map{ meta, vcf-> [ + [ + patient:meta.patient, + sample:meta.sample, + status:meta.status, + sex:meta.sex, + id:meta.sample, + num_intervals:meta.num_intervals, + variantcaller:"haplotypecaller" + ], + vcf + ] + } + + filtered_vcf_tbi = SINGLE_SAMPLE.out.filtered_vcf_tbi.map{ meta, tbi-> [ + [ + patient:meta.patient, + sample:meta.sample, + status:meta.status, + sex:meta.sex, + id:meta.sample, + num_intervals:meta.num_intervals, + variantcaller:"haplotypecaller" + ], + tbi + ] + } + ch_versions = ch_versions.mix( SINGLE_SAMPLE.out.versions, HAPLOTYPECALLER.out.versions, MERGE_HAPLOTYPECALLER.out.versions) @@ -117,4 +145,5 @@ workflow RUN_HAPLOTYPECALLER { emit: versions = ch_versions filtered_vcf + filtered_vcf_tbi } diff --git a/subworkflows/nf-core/variantcalling/strelka/single/main.nf b/subworkflows/nf-core/variantcalling/strelka/single/main.nf index da721fb5cd..59fa4127ee 100644 --- a/subworkflows/nf-core/variantcalling/strelka/single/main.nf +++ b/subworkflows/nf-core/variantcalling/strelka/single/main.nf @@ -21,6 +21,11 @@ workflow RUN_STRELKA_SINGLE { no_intervals: it[0].num_intervals <= 1 }.set{strelka_vcf} + STRELKA_SINGLE.out.vcf_tbi.branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + }.set{strelka_vcf_tbi} + STRELKA_SINGLE.out.genome_vcf.branch{ intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 @@ -76,14 +81,30 @@ workflow RUN_STRELKA_SINGLE { sex: meta.sex, status: meta.status, variantcaller: "strelka" - ],vcf] + ], vcf] } + strelka_vcf_tbi = Channel.empty().mix( + MERGE_STRELKA.out.tbi, + strelka_vcf_tbi.no_intervals) + .map{ meta, tbi -> + [[ + id: meta.sample, + num_intervals: meta.num_intervals, + patient: meta.patient, + sample: meta.sample, + sex: meta.sex, + status: meta.status, + variantcaller: "strelka" + ], tbi] + } + ch_versions = ch_versions.mix(MERGE_STRELKA.out.versions) ch_versions = ch_versions.mix(MERGE_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(STRELKA_SINGLE.out.versions) emit: strelka_vcf + strelka_vcf_tbi versions = ch_versions } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index edfc919221..fd0f0cda7e 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -273,6 +273,8 @@ include { TUMOR_ONLY_VARIANT_CALLING } from '../subwor // Variant calling on tumor/normal pair include { PAIR_VARIANT_CALLING } from '../subworkflows/local/pair_variant_calling' +include { BCFTOOLS_CONCAT as BCFTOOLS_CONCAT_GERMLINE_VCFS } from '../modules/nf-core/bcftools/concat/main' + include { VCF_QC } from '../subworkflows/nf-core/vcf_qc' // Annotation @@ -977,6 +979,34 @@ workflow SAREK { rt_file ) + // Gather vcfs and vcf-tbis for concatenating germline-vcfs + germline_vcfs_with_tbis = Channel.empty() + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + GERMLINE_VARIANT_CALLING.out.strelka_vcf.join(GERMLINE_VARIANT_CALLING.out.strelka_vcf_tbi) + ) + + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + GERMLINE_VARIANT_CALLING.out.haplotypecaller_vcf.join(GERMLINE_VARIANT_CALLING.out.haplotypecaller_vcf_tbi) + ) + + // TO-DO: also mix in vcf+tbi from + // deepvariant + // freebayes + // manta + // tiddit + + germline_vcfs_with_tbis = germline_vcfs_with_tbis.map{ + meta, vcf, tbi -> + def new_meta = meta.clone() + new_meta.remove('variantcaller') + [new_meta, vcf, tbi] + }.groupTuple() + + BCFTOOLS_CONCAT_GERMLINE_VCFS(germline_vcfs_with_tbis) + // TO-DO: The concatenation should be optional, and be default it shouldn't be done. + // TO-DO: Similar concatenation should also be done for tumor-vcfs, somatic-vcfs and something (?) + // TO-DO: Should all different kinds of variant be concatenated? Probably not. + // Gather vcf files for annotation and QC vcf_to_annotate = Channel.empty() vcf_to_annotate = vcf_to_annotate.mix(GERMLINE_VARIANT_CALLING.out.deepvariant_vcf) From 5ee59a8555b3866e36e5e26af3bc88ae1cbb8d59 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Wed, 12 Oct 2022 13:02:28 +0200 Subject: [PATCH 02/31] Adding the germline vcf-file from manta to the list of germline vcf-files being concatenated --- .../local/germline_variant_calling.nf | 5 ++++- .../variantcalling/manta/germline/main.nf | 22 +++++++++++++++++++ workflows/sarek.nf | 10 ++++++--- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index b445920f7f..609d040e86 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -198,7 +198,9 @@ workflow GERMLINE_VARIANT_CALLING { fasta_fai ) - manta_vcf = RUN_MANTA_GERMLINE.out.manta_vcf + manta_vcf = RUN_MANTA_GERMLINE.out.manta_vcf + manta_vcf_tbi = RUN_MANTA_GERMLINE.out.manta_vcf_tbi + ch_versions = ch_versions.mix(RUN_MANTA_GERMLINE.out.versions) } @@ -238,6 +240,7 @@ workflow GERMLINE_VARIANT_CALLING { tiddit_vcf haplotypecaller_vcf_tbi + manta_vcf_tbi strelka_vcf_tbi versions = ch_versions diff --git a/subworkflows/nf-core/variantcalling/manta/germline/main.nf b/subworkflows/nf-core/variantcalling/manta/germline/main.nf index 3a3ff8f174..ac30c6d3fc 100644 --- a/subworkflows/nf-core/variantcalling/manta/germline/main.nf +++ b/subworkflows/nf-core/variantcalling/manta/germline/main.nf @@ -33,6 +33,12 @@ workflow RUN_MANTA_GERMLINE { no_intervals: it[0].num_intervals <= 1 }.set{manta_diploid_sv_vcf} + MANTA_GERMLINE.out.diploid_sv_vcf_tbi.branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + }.set{manta_diploid_sv_vcf_tbi} + + // Only when using intervals MERGE_MANTA_SMALL_INDELS( manta_small_indels_vcf.intervals @@ -104,6 +110,21 @@ workflow RUN_MANTA_GERMLINE { vcf] } + manta_vcf_tbi = Channel.empty().mix( + MERGE_MANTA_DIPLOID.out.tbi, + manta_diploid_sv_vcf_tbi.no_intervals) + .map{ meta, tbi -> + [[ + id: meta.sample, + num_intervals: meta.num_intervals, + patient: meta.patient, + sample: meta.sample, + status: meta.status, + sex: meta.sex, + variantcaller: "manta"], + tbi] + } + ch_versions = ch_versions.mix(MERGE_MANTA_DIPLOID.out.versions) ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) @@ -111,5 +132,6 @@ workflow RUN_MANTA_GERMLINE { emit: manta_vcf + manta_vcf_tbi versions = ch_versions } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index fd0f0cda7e..e9c4b45f5c 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -981,18 +981,22 @@ workflow SAREK { // Gather vcfs and vcf-tbis for concatenating germline-vcfs germline_vcfs_with_tbis = Channel.empty() + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - GERMLINE_VARIANT_CALLING.out.strelka_vcf.join(GERMLINE_VARIANT_CALLING.out.strelka_vcf_tbi) + GERMLINE_VARIANT_CALLING.out.haplotypecaller_vcf.join(GERMLINE_VARIANT_CALLING.out.haplotypecaller_vcf_tbi) ) germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - GERMLINE_VARIANT_CALLING.out.haplotypecaller_vcf.join(GERMLINE_VARIANT_CALLING.out.haplotypecaller_vcf_tbi) + GERMLINE_VARIANT_CALLING.out.manta_vcf.join(GERMLINE_VARIANT_CALLING.out.manta_vcf_tbi) + ) + + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + GERMLINE_VARIANT_CALLING.out.strelka_vcf.join(GERMLINE_VARIANT_CALLING.out.strelka_vcf_tbi) ) // TO-DO: also mix in vcf+tbi from // deepvariant // freebayes - // manta // tiddit germline_vcfs_with_tbis = germline_vcfs_with_tbis.map{ From 624b6ebeae25b4edcfabd73c2d57036e2d0e9afb Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Wed, 12 Oct 2022 13:51:51 +0200 Subject: [PATCH 03/31] Making sure the channel manta_vcf_tbi is defined even if manta isnt run --- subworkflows/local/germline_variant_calling.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/subworkflows/local/germline_variant_calling.nf b/subworkflows/local/germline_variant_calling.nf index 609d040e86..b15d5162f4 100644 --- a/subworkflows/local/germline_variant_calling.nf +++ b/subworkflows/local/germline_variant_calling.nf @@ -44,6 +44,7 @@ workflow GERMLINE_VARIANT_CALLING { tiddit_vcf = Channel.empty() haplotypecaller_vcf_tbi = Channel.empty() + manta_vcf_tbi = Channel.empty() strelka_vcf_tbi = Channel.empty() // Remap channel with intervals From b89f0880aa9e41dc64a4fc24a10302df54ebca68 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Sun, 13 Nov 2022 12:30:56 +0100 Subject: [PATCH 04/31] Adding support for concatenation of germline vcf-files. Now also for vcf-files from deepvariant. --- modules/nf-core/deepvariant/main.nf | 2 + .../bam_variant_calling_deepvariant/main.nf | 22 +++++++ .../bam_variant_calling_germline_all/main.nf | 3 + workflows/sarek.nf | 62 +++++++++++-------- 4 files changed, 62 insertions(+), 27 deletions(-) diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index 344fdb030a..ca4cf23971 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -16,7 +16,9 @@ process DEEPVARIANT { output: tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: vcf_tbi tuple val(meta), path("${prefix}.g.vcf.gz"), emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.tbi"), emit: gvcf_tbi path "versions.yml" , emit: versions when: diff --git a/subworkflows/local/bam_variant_calling_deepvariant/main.nf b/subworkflows/local/bam_variant_calling_deepvariant/main.nf index 537d1d0d3f..39d0279176 100644 --- a/subworkflows/local/bam_variant_calling_deepvariant/main.nf +++ b/subworkflows/local/bam_variant_calling_deepvariant/main.nf @@ -23,6 +23,11 @@ workflow BAM_VARIANT_CALLING_DEEPVARIANT { no_intervals: it[0].num_intervals <= 1 }.set{deepvariant_vcf_out} + DEEPVARIANT.out.vcf_tbi.branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + }.set{deepvariant_tbi_out} + DEEPVARIANT.out.gvcf.branch{ intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 @@ -98,6 +103,22 @@ workflow BAM_VARIANT_CALLING_DEEPVARIANT { ], vcf] } + deepvariant_vcf_tbi = Channel.empty().mix( + MERGE_DEEPVARIANT_VCF.out.tbi, + deepvariant_tbi_out.no_intervals) + .map{ meta, tbi -> + [[ + id: meta.sample, + num_intervals: meta.num_intervals, + patient: meta.patient, + sample: meta.sample, + sex: meta.sex, + status: meta.status, + variantcaller: "deepvariant" + ], tbi] + } + + ch_versions = ch_versions.mix(MERGE_DEEPVARIANT_GVCF.out.versions) ch_versions = ch_versions.mix(MERGE_DEEPVARIANT_VCF.out.versions) ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) @@ -106,6 +127,7 @@ workflow BAM_VARIANT_CALLING_DEEPVARIANT { emit: deepvariant_vcf + deepvariant_vcf_tbi deepvariant_gvcf versions = ch_versions } diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index e80a1b8518..01a5847401 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -43,6 +43,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { strelka_vcf = Channel.empty() tiddit_vcf = Channel.empty() + deepvariant_vcf_tbi = Channel.empty() haplotypecaller_vcf_tbi = Channel.empty() manta_vcf_tbi = Channel.empty() strelka_vcf_tbi = Channel.empty() @@ -130,6 +131,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) deepvariant_vcf = Channel.empty().mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.deepvariant_vcf) + deepvariant_vcf_tbi = Channel.empty().mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.deepvariant_vcf_tbi) ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.versions) } @@ -241,6 +243,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { strelka_vcf tiddit_vcf + deepvariant_vcf_tbi haplotypecaller_vcf_tbi manta_vcf_tbi strelka_vcf_tbi diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 1c19e2d664..fd0db1eec7 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -999,37 +999,45 @@ workflow SAREK { rt_file ) - // Gather vcfs and vcf-tbis for concatenating germline-vcfs - germline_vcfs_with_tbis = Channel.empty() - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf_tbi) - ) + if (params.concatenate_vcf) { + // Concatenate vcf-files - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf_tbi) - ) + // Gather vcfs and vcf-tbis for concatenating germline-vcfs + germline_vcfs_with_tbis = Channel.empty() + + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf_tbi) + ) + + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf_tbi) + ) + + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf_tbi) + ) + + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf_tbi) + ) - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf_tbi) - ) - // TO-DO: also mix in vcf+tbi from - // deepvariant - // freebayes - // tiddit - - germline_vcfs_with_tbis = germline_vcfs_with_tbis.map{ - meta, vcf, tbi -> - def new_meta = meta.clone() - new_meta.remove('variantcaller') - [new_meta, vcf, tbi] - }.groupTuple() - - CONCAT_GERMLINE_VCFS(germline_vcfs_with_tbis) - // TO-DO: The concatenation should be optional, and be default it shouldn't be done. - // TO-DO: Similar concatenation should also be done for tumor-vcfs, somatic-vcfs and something (?) - // TO-DO: Should all different kinds of variant be concatenated? Probably not. + // TO-DO: also mix in vcf+tbi from + // freebayes + // tiddit + + germline_vcfs_with_tbis = germline_vcfs_with_tbis.map{ + meta, vcf, tbi -> + def new_meta = meta.clone() + new_meta.remove('variantcaller') + [new_meta, vcf, tbi] + }.groupTuple() + + CONCAT_GERMLINE_VCFS(germline_vcfs_with_tbis) + // TO-DO: Similar concatenation should also be done for tumor-vcfs, somatic-vcfs and something (?) + // TO-DO: Should all different kinds of variant be concatenated? Probably not. + } // Gather vcf files for annotation and QC vcf_to_annotate = Channel.empty() From a936722dc5725674bc37b3f3fca579190b204f6f Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Sun, 13 Nov 2022 20:35:00 +0100 Subject: [PATCH 05/31] Adding CLI-open concatenate_vcf to the schema-json --- nextflow_schema.json | 37 +++++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index bd50522344..deffb7eb65 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,10 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["step", "outdir"], + "required": [ + "step", + "outdir" + ], "properties": { "step": { "type": "string", @@ -194,7 +197,11 @@ "type": "string", "default": "bwa-mem", "fa_icon": "fas fa-puzzle-piece", - "enum": ["bwa-mem", "bwa-mem2", "dragmap"], + "enum": [ + "bwa-mem", + "bwa-mem2", + "dragmap" + ], "description": "Specify aligner to be used to map reads to reference genome.", "help_text": "`Sarek` will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf `DragMap` is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. See [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode) for more info.\n", "hidden": true @@ -278,7 +285,7 @@ }, "cf_chrom_len": { "type": "string", - "default": null, + "default": "None", "fa_icon": "fas fa-ruler-horizontal", "description": "Specify a custom chromosome length file.", "help_text": "Control-FREEC requires a file containing all chromosome lenghts. By default the fasta.fai is used. If the fasta.fai file contains chromosomes not present in the intervals, it fails (see: https://github.com/BoevaLab/FREEC/issues/106).\n\nIn this case, a custom chromosome length can be specified. It must be of the same format as the fai, but only contain the relevant chromosomes.\n\n\n\n", @@ -478,7 +485,11 @@ "type": "string", "default": "vcf", "description": "VEP output-file format.", - "enum": ["json", "tab", "vcf"], + "enum": [ + "json", + "tab", + "vcf" + ], "help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.", "fa_icon": "fas fa-table", "hidden": true @@ -845,7 +856,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], "hidden": true }, "email": { @@ -973,5 +991,12 @@ { "$ref": "#/definitions/generic_options" } - ] + ], + "properties": { + "concatenate_vcf": { + "type": "boolean", + "description": "Option for concatenating the vcf-files produced by the variant-callers.", + "help_text": "Concatenating the germline vcf-files from each applied variant-caller into one vcf-file using bfctools concat. Similarly, for any somatic vcf-files or tumor-only vcf-files." + } + } } From f91d40b65429b7e5648e62e3af9ec0fc904b4f20 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 14 Nov 2022 09:05:54 +0100 Subject: [PATCH 06/31] WIP: Adding support for concatenation of germline vcf-files. Now also for vcf-files from freebayes. --- .../bam_variant_calling_freebayes/main.nf | 17 +++++++++++++ .../bam_variant_calling_germline_all/main.nf | 7 ++++-- workflows/sarek.nf | 25 ++++++++++++++----- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_freebayes/main.nf b/subworkflows/local/bam_variant_calling_freebayes/main.nf index 148b20795e..4778db2224 100644 --- a/subworkflows/local/bam_variant_calling_freebayes/main.nf +++ b/subworkflows/local/bam_variant_calling_freebayes/main.nf @@ -72,6 +72,22 @@ workflow BAM_VARIANT_CALLING_FREEBAYES { vcf] } + freebayes_vcf_tbi = Channel.empty().mix( + MERGE_FREEBAYES.out.tbi, + TABIX_VC_FREEBAYES.out.tbi) + .map{ meta, tbi -> + [ [ + id: meta.id, + normal_id: meta.normal_id, + num_intervals: meta.num_intervals, + patient: meta.patient, + sex: meta.sex, + tumor_id: meta.tumor_id, + variantcaller: "freebayes" + ], + tbi] + } + ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) ch_versions = ch_versions.mix(MERGE_FREEBAYES.out.versions) ch_versions = ch_versions.mix(FREEBAYES.out.versions) @@ -79,5 +95,6 @@ workflow BAM_VARIANT_CALLING_FREEBAYES { emit: freebayes_vcf + freebayes_vcf_tbi versions = ch_versions } diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 01a5847401..21853394ee 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -44,6 +44,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { tiddit_vcf = Channel.empty() deepvariant_vcf_tbi = Channel.empty() + freebayes_vcf_tbi = Channel.empty() haplotypecaller_vcf_tbi = Channel.empty() manta_vcf_tbi = Channel.empty() strelka_vcf_tbi = Channel.empty() @@ -150,8 +151,9 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { fasta_fai ) - freebayes_vcf = BAM_VARIANT_CALLING_FREEBAYES.out.freebayes_vcf - ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) + freebayes_vcf = BAM_VARIANT_CALLING_FREEBAYES.out.freebayes_vcf + freebayes_vcf_tbi = BAM_VARIANT_CALLING_FREEBAYES.out.freebayes_vcf_tbi + ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) } // HAPLOTYPECALLER @@ -244,6 +246,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { tiddit_vcf deepvariant_vcf_tbi + freebayes_vcf_tbi haplotypecaller_vcf_tbi manta_vcf_tbi strelka_vcf_tbi diff --git a/workflows/sarek.nf b/workflows/sarek.nf index fd0db1eec7..1ecd1ab9e7 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1007,33 +1007,46 @@ workflow SAREK { germline_vcfs_with_tbis = Channel.empty() germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf_tbi) + BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf_tbi) ) germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf_tbi) + BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf_tbi) ) germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf_tbi) + BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf_tbi) ) germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf_tbi) + BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf_tbi) ) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf_tbi) + ) // TO-DO: also mix in vcf+tbi from - // freebayes // tiddit germline_vcfs_with_tbis = germline_vcfs_with_tbis.map{ meta, vcf, tbi -> def new_meta = meta.clone() new_meta.remove('variantcaller') - [new_meta, vcf, tbi] + def new_meta2 = new_meta.clone() + new_meta2.remove('tumor_id') + def new_meta3 = new_meta2.clone() + new_meta3.remove('normal_id') + def new_meta4 = new_meta3.clone() + new_meta4.remove('sample') + def new_meta5 = new_meta4.clone() + new_meta5.remove('status') // TO-DO: Better way of removing the unwanted entries in the dict "meta"? + // The remaining entries should just be id, num_intervals, patient and sex + [new_meta5, vcf, tbi] }.groupTuple() + germline_vcfs_with_tbis.view() + CONCAT_GERMLINE_VCFS(germline_vcfs_with_tbis) // TO-DO: Similar concatenation should also be done for tumor-vcfs, somatic-vcfs and something (?) // TO-DO: Should all different kinds of variant be concatenated? Probably not. From d859e045f499ce0d61d315b765be8c2701768039 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 14 Nov 2022 10:58:31 +0100 Subject: [PATCH 07/31] WIP: Adding support for concatenation of germline vcf-files. Now also for vcf-files from tiddit. --- .../bam_variant_calling_germline_all/main.nf | 25 +++++++++++-------- .../bam_variant_calling_single_tiddit/main.nf | 23 +++++++++++++++++ workflows/sarek.nf | 15 +++++------ 3 files changed, 45 insertions(+), 18 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 21853394ee..feaaffe9c4 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -48,6 +48,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { haplotypecaller_vcf_tbi = Channel.empty() manta_vcf_tbi = Channel.empty() strelka_vcf_tbi = Channel.empty() + tiddit_vcf_tbi = Channel.empty() // Remap channel with intervals cram_recalibrated_intervals = cram_recalibrated.combine(intervals) @@ -100,7 +101,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) mpileup_germline = BAM_VARIANT_CALLING_MPILEUP.out.mpileup - ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_MPILEUP.out.versions) + ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_MPILEUP.out.versions) } // CNVKIT @@ -119,7 +120,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { [] ) - ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) + ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_CNVKIT.out.versions) } // DEEPVARIANT @@ -131,9 +132,9 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { fasta_fai ) - deepvariant_vcf = Channel.empty().mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.deepvariant_vcf) + deepvariant_vcf = Channel.empty().mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.deepvariant_vcf) deepvariant_vcf_tbi = Channel.empty().mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.deepvariant_vcf_tbi) - ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.versions) + ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.versions) } // FREEBAYES @@ -189,9 +190,9 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { known_sites_snps_tbi, intervals_bed_combined_haplotypec) - haplotypecaller_vcf = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.filtered_vcf + haplotypecaller_vcf = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.filtered_vcf haplotypecaller_vcf_tbi = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.filtered_vcf_tbi - ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.versions) + ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.versions) } @@ -207,7 +208,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { manta_vcf = BAM_VARIANT_CALLING_GERMLINE_MANTA.out.manta_vcf manta_vcf_tbi = BAM_VARIANT_CALLING_GERMLINE_MANTA.out.manta_vcf_tbi - ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions) + ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions) } // STRELKA @@ -219,9 +220,9 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { fasta_fai ) - strelka_vcf = BAM_VARIANT_CALLING_SINGLE_STRELKA.out.strelka_vcf + strelka_vcf = BAM_VARIANT_CALLING_SINGLE_STRELKA.out.strelka_vcf strelka_vcf_tbi = BAM_VARIANT_CALLING_SINGLE_STRELKA.out.strelka_vcf_tbi - ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_SINGLE_STRELKA.out.versions) + ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_SINGLE_STRELKA.out.versions) } //TIDDIT @@ -232,8 +233,9 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { bwa ) - tiddit_vcf = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.tiddit_vcf - ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.versions) + tiddit_vcf = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.tiddit_vcf + tiddit_vcf_tbi = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.tiddit_vcf_tbi + ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.versions) } emit: @@ -250,6 +252,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { haplotypecaller_vcf_tbi manta_vcf_tbi strelka_vcf_tbi + tiddit_vcf_tbi versions = ch_versions } diff --git a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf index 5cd4e18bd8..4301be22a7 100644 --- a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf +++ b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf @@ -40,6 +40,28 @@ workflow BAM_VARIANT_CALLING_SINGLE_TIDDIT { ] [new_meta, gz]} + tiddit_vcf_tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi -> + + new_meta = meta.tumor_id ? [ + id: meta.tumor_id + "_vs_" + meta.normal_id, + normal_id: meta.normal_id, + num_intervals: meta.num_intervals, + patient: meta.patient, + sex: meta.sex, + tumor_id: meta.tumor_id, + variantcaller: 'tiddit' + ] + : [ + id: meta.sample, + num_intervals: meta.num_intervals, + patient: meta.patient, + sample: meta.sample, + sex: meta.sex, + status: meta.status, + variantcaller: 'tiddit' + ] + [new_meta, tbi]} + ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) @@ -47,5 +69,6 @@ workflow BAM_VARIANT_CALLING_SINGLE_TIDDIT { versions = ch_versions tiddit_vcf = tiddit_vcf_gz + tiddit_vcf_tbi tiddit_ploidy } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 1ecd1ab9e7..af6d7b5f29 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1026,8 +1026,9 @@ workflow SAREK { BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf_tbi) ) - // TO-DO: also mix in vcf+tbi from - // tiddit + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf_tbi) + ) germline_vcfs_with_tbis = germline_vcfs_with_tbis.map{ meta, vcf, tbi -> @@ -1040,13 +1041,13 @@ workflow SAREK { def new_meta4 = new_meta3.clone() new_meta4.remove('sample') def new_meta5 = new_meta4.clone() - new_meta5.remove('status') // TO-DO: Better way of removing the unwanted entries in the dict "meta"? - // The remaining entries should just be id, num_intervals, patient and sex - [new_meta5, vcf, tbi] + new_meta5.remove('status') + def new_meta6 = new_meta5.clone() + new_meta6.remove('num_intervals') // TO-DO: Better way of removing the unwanted entries in the dict "meta"? + // The remaining entries should just be id, patient and sex + [new_meta6, vcf, tbi] }.groupTuple() - germline_vcfs_with_tbis.view() - CONCAT_GERMLINE_VCFS(germline_vcfs_with_tbis) // TO-DO: Similar concatenation should also be done for tumor-vcfs, somatic-vcfs and something (?) // TO-DO: Should all different kinds of variant be concatenated? Probably not. From 38ac53df5508dae3e5f222dfb15ee331100b1858 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 28 Nov 2022 13:39:01 +0100 Subject: [PATCH 08/31] Adding support for concatenation of vcf from mpileup --- .../bam_variant_calling_germline_all/main.nf | 5 +++-- .../local/bam_variant_calling_mpileup/main.nf | 7 ++++++- workflows/sarek.nf | 17 +++++++++++------ 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index 4d7e15af12..f7e4f2b547 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -37,7 +37,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { //TODO: Temporary until the if's can be removed and printing to terminal is prevented with "when" in the modules.config deepvariant_vcf = Channel.empty() freebayes_vcf = Channel.empty() - genotype_gvcf = Channel.empty() haplotypecaller_vcf = Channel.empty() manta_vcf = Channel.empty() mpileup_vcf = Channel.empty() @@ -48,6 +47,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { freebayes_vcf_tbi = Channel.empty() haplotypecaller_vcf_tbi = Channel.empty() manta_vcf_tbi = Channel.empty() + mpileup_vcf_tbi = Channel.empty() strelka_vcf_tbi = Channel.empty() tiddit_vcf_tbi = Channel.empty() @@ -103,6 +103,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) mpileup_vcf = BAM_VARIANT_CALLING_MPILEUP.out.vcf + mpileup_vcf_tbi = BAM_VARIANT_CALLING_MPILEUP.out.tbi ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_MPILEUP.out.versions) } @@ -243,7 +244,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { emit: deepvariant_vcf freebayes_vcf - genotype_gvcf haplotypecaller_vcf manta_vcf mpileup_vcf @@ -254,6 +254,7 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { freebayes_vcf_tbi haplotypecaller_vcf_tbi manta_vcf_tbi + mpileup_vcf_tbi strelka_vcf_tbi tiddit_vcf_tbi diff --git a/subworkflows/local/bam_variant_calling_mpileup/main.nf b/subworkflows/local/bam_variant_calling_mpileup/main.nf index 211b206388..ef01065677 100644 --- a/subworkflows/local/bam_variant_calling_mpileup/main.nf +++ b/subworkflows/local/bam_variant_calling_mpileup/main.nf @@ -21,6 +21,10 @@ workflow BAM_VARIANT_CALLING_MPILEUP { intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 } + tbis = BCFTOOLS_MPILEUP.out.tbi.branch{ + intervals: it[0].num_intervals > 1 + no_intervals: it[0].num_intervals <= 1 + } mpileup = SAMTOOLS_MPILEUP.out.mpileup.branch{ intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 @@ -72,5 +76,6 @@ workflow BAM_VARIANT_CALLING_MPILEUP { emit: versions = ch_versions mpileup = Channel.empty().mix(CAT_MPILEUP.out.file_out, mpileup.no_intervals) - vcf = Channel.empty().mix(GATK4_MERGEVCFS.out.vcf,vcfs.no_intervals) + vcf = Channel.empty().mix(GATK4_MERGEVCFS.out.vcf, vcfs.no_intervals) + tbi = Channel.empty().mix(GATK4_MERGEVCFS.out.tbi, tbis.no_intervals) } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 48330dd839..673a9be74f 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1026,6 +1026,10 @@ workflow SAREK { BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf_tbi) ) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( + BAM_VARIANT_CALLING_GERMLINE_ALL.out.mpileup_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.mpileup_vcf_tbi) + ) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf_tbi) ) @@ -1046,15 +1050,16 @@ workflow SAREK { new_meta4.remove('sample') def new_meta5 = new_meta4.clone() new_meta5.remove('status') - def new_meta6 = new_meta5.clone() - new_meta6.remove('num_intervals') // TO-DO: Better way of removing the unwanted entries in the dict "meta"? - // The remaining entries should just be id, patient and sex - [new_meta6, vcf, tbi] + def new_meta6 = new_meta5.clone() // TO-DO: Better way of removing the unwanted entries in the dict "meta"? + new_meta6.remove('num_intervals') // The remaining entries should just be id, patient and sex + def new_meta7 = new_meta6.clone() + new_meta7.remove('data_type') + [new_meta7, vcf, tbi] }.groupTuple() + germline_vcfs_with_tbis.view() + CONCAT_GERMLINE_VCFS(germline_vcfs_with_tbis) - // TO-DO: Similar concatenation should also be done for tumor-vcfs, somatic-vcfs and something (?) - // TO-DO: Should all different kinds of variant be concatenated? Probably not. } // Gather vcf files for annotation and QC From dba9993f83d546d569590dc17a11ef9f05b31c0b Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 28 Nov 2022 13:52:20 +0100 Subject: [PATCH 09/31] Changing CLI-option concatenate_vcf to concatenate_vcfs. --- nextflow_schema.json | 6 +++--- workflows/sarek.nf | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 7d6d70f5b4..007c4c0f3f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -998,10 +998,10 @@ } ], "properties": { - "concatenate_vcf": { + "concatenate_vcfs": { "type": "boolean", - "description": "Option for concatenating the vcf-files produced by the variant-callers.", - "help_text": "Concatenating the germline vcf-files from each applied variant-caller into one vcf-file using bfctools concat. Similarly, for any somatic vcf-files or tumor-only vcf-files." + "description": "Option for concatenating germline vcf-files.", + "help_text": "Concatenating the germline vcf-files from each applied variant-caller into one vcf-file using bfctools concat." } } } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 673a9be74f..125cc8ff69 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1003,8 +1003,7 @@ workflow SAREK { rt_file ) - - if (params.concatenate_vcf) { + if (params.concatenate_vcfs) { // Concatenate vcf-files // Gather vcfs and vcf-tbis for concatenating germline-vcfs From 34baf9aea48e3aae107f36d261c5c863673e8ae8 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 28 Nov 2022 15:37:16 +0100 Subject: [PATCH 10/31] Initializing CLI-option concatenate_vcfs to false. --- nextflow.config | 1 + workflows/sarek.nf | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/nextflow.config b/nextflow.config index c057d0c43d..571248858b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -63,6 +63,7 @@ params { cf_minqual = 0 // ControlFreec default values cf_window = null // by default we are not using this in Control-FREEC cnvkit_reference = null // by default the reference is build from the fasta file + concatenate_vcfs = false // by default we don't concatenate the germline-vcf-files ignore_soft_clipped_bases = false // no --dont-use-soft-clipped-bases for GATK Mutect2 wes = false // Set to true, if data is exome/targeted sequencing data. Used to use correct models in various variant callers joint_germline = false // g.vcf & joint germline calling are not run by default if HaplotypeCaller is selected diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 125cc8ff69..227d41fa8f 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1056,8 +1056,6 @@ workflow SAREK { [new_meta7, vcf, tbi] }.groupTuple() - germline_vcfs_with_tbis.view() - CONCAT_GERMLINE_VCFS(germline_vcfs_with_tbis) } From d3a4578d77768f07795f58ee6a2012160ff8b31f Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 28 Nov 2022 16:54:53 +0100 Subject: [PATCH 11/31] Sorting concatenated germline-vcf-file and adding tbi. --- conf/modules/modules.config | 13 +++++++++++-- workflows/sarek.nf | 11 +++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/conf/modules/modules.config b/conf/modules/modules.config index d19171f317..5ea013c532 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -87,8 +87,8 @@ process { } } - // CONCAT UNANNOTATED VCFS - withName: 'CONCAT_GERMLINE_VCFS'{ + // CONCATENATED, SORT, UNANNOTATED VCFS + withName: 'GERMLINE_VCFS_CONCAT_SORT'{ ext.prefix = { "${meta.id}.germline" } publishDir = [ mode: params.publish_dir_mode, @@ -96,6 +96,15 @@ process { ] } + withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT'{ + ext.prefix = { "${meta.id}.germline" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/concat/${meta.id}/" } + ] + } + + // VCF withName: 'BCFTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 227d41fa8f..4bb3557f1e 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -267,7 +267,12 @@ include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows // Variant calling on tumor/normal pair include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main' -include { BCFTOOLS_CONCAT as CONCAT_GERMLINE_VCFS } from '../modules/nf-core/bcftools/concat/main' +// Concatenation of germline vcf-files +include { BCFTOOLS_CONCAT } from '../modules/nf-core/bcftools/concat/main' + +include { BCFTOOLS_SORT as GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/bcftools/sort/main' + +include { TABIX_TABIX as TABIX_GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/tabix/tabix/main' // QC on VCF files include { VCF_QC_BCFTOOLS_VCFTOOLS } from '../subworkflows/local/vcf_qc_bcftools_vcftools/main' @@ -1056,7 +1061,9 @@ workflow SAREK { [new_meta7, vcf, tbi] }.groupTuple() - CONCAT_GERMLINE_VCFS(germline_vcfs_with_tbis) + BCFTOOLS_CONCAT(germline_vcfs_with_tbis) + GERMLINE_VCFS_CONCAT_SORT(BCFTOOLS_CONCAT.out.vcf) + TABIX_GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT_SORT.out.vcf) } // Gather vcf files for annotation and QC From 9e216312aff282b3128682a560f6b083b7abe2f1 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 28 Nov 2022 17:47:19 +0100 Subject: [PATCH 12/31] Updating schema. Grouping the CLI-option concatenate_vcfs together with the other variant-calling-parameters. --- nextflow_schema.json | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 007c4c0f3f..050628298b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -233,6 +233,12 @@ "default": "", "fa_icon": "fas fa-toolbox", "properties": { + "concatenate_vcfs": { + "type": "boolean", + "fa_icon": "fas fa-merge", + "description": "Option for concatenating germline vcf-files.", + "help_text": "Concatenating the germline vcf-files from each applied variant-caller into one vcf-file using bfctools concat." + }, "only_paired_variant_calling": { "type": "boolean", "fa_icon": "fas fa-forward", @@ -996,12 +1002,5 @@ { "$ref": "#/definitions/generic_options" } - ], - "properties": { - "concatenate_vcfs": { - "type": "boolean", - "description": "Option for concatenating germline vcf-files.", - "help_text": "Concatenating the germline vcf-files from each applied variant-caller into one vcf-file using bfctools concat." - } - } + ] } From f8edc0034b9f01e3644ae75d7eaf57449581659c Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 28 Nov 2022 17:49:54 +0100 Subject: [PATCH 13/31] prettier --- nextflow_schema.json | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index 050628298b..f84f1bed2f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "step", - "outdir" - ], + "required": ["step", "outdir"], "properties": { "step": { "type": "string", @@ -197,11 +194,7 @@ "type": "string", "default": "bwa-mem", "fa_icon": "fas fa-puzzle-piece", - "enum": [ - "bwa-mem", - "bwa-mem2", - "dragmap" - ], + "enum": ["bwa-mem", "bwa-mem2", "dragmap"], "description": "Specify aligner to be used to map reads to reference genome.", "help_text": "`Sarek` will build missing indices automatically if not provided. Set `--bwa false` if indices should be (re-)built.\nIf `DragMap` is selected as aligner, it is recommended to skip baserecalibration with `--skip_tools baserecalibrator`. See [here](https://gatk.broadinstitute.org/hc/en-us/articles/4407897446939--How-to-Run-germline-single-sample-short-variant-discovery-in-DRAGEN-mode) for more info.\n", "hidden": true @@ -496,11 +489,7 @@ "type": "string", "default": "vcf", "description": "VEP output-file format.", - "enum": [ - "json", - "tab", - "vcf" - ], + "enum": ["json", "tab", "vcf"], "help_text": "Sets the format of the output-file from VEP. Available formats: json, tab and vcf.", "fa_icon": "fas fa-table", "hidden": true @@ -867,14 +856,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email": { From e4476026f42e632d3f0d16e02dd7114c6d24188f Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Thu, 1 Dec 2022 11:47:11 +0100 Subject: [PATCH 14/31] Moving some config to new config-file for post-processing of vcfs --- conf/modules/modules.config | 18 -------- conf/modules/postprocessing_vcfs.config | 34 +++++++++++++++ nextflow.config | 2 + workflows/sarek.nf | 57 ++++++------------------- 4 files changed, 50 insertions(+), 61 deletions(-) create mode 100644 conf/modules/postprocessing_vcfs.config diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 5ea013c532..0c36169370 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -87,24 +87,6 @@ process { } } - // CONCATENATED, SORT, UNANNOTATED VCFS - withName: 'GERMLINE_VCFS_CONCAT_SORT'{ - ext.prefix = { "${meta.id}.germline" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/concat/${meta.id}/" } - ] - } - - withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT'{ - ext.prefix = { "${meta.id}.germline" } - publishDir = [ - mode: params.publish_dir_mode, - path: { "${params.outdir}/variant_calling/concat/${meta.id}/" } - ] - } - - // VCF withName: 'BCFTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } diff --git a/conf/modules/postprocessing_vcfs.config b/conf/modules/postprocessing_vcfs.config new file mode 100644 index 0000000000..77a2260ecf --- /dev/null +++ b/conf/modules/postprocessing_vcfs.config @@ -0,0 +1,34 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. + ext.when = When to run the module. +---------------------------------------------------------------------------------------- +*/ + +// POSTPROCESSING VCFS +// Like, for instance, concatenating the unannotated, germline vcf-files + +process { + + withName: 'GERMLINE_VCFS_CONCAT_SORT'{ + ext.prefix = { "${meta.id}.germline" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/concat/${meta.id}/" } + ] + } + + withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT'{ + ext.prefix = { "${meta.id}.germline" } + publishDir = [ + mode: params.publish_dir_mode, + path: { "${params.outdir}/variant_calling/concat/${meta.id}/" } + ] + } +} diff --git a/nextflow.config b/nextflow.config index 571248858b..4e9726f946 100644 --- a/nextflow.config +++ b/nextflow.config @@ -317,6 +317,8 @@ includeConfig 'conf/modules/mutect2.config' includeConfig 'conf/modules/strelka.config' includeConfig 'conf/modules/tiddit.config' +includeConfig 'conf/modules/postprocessing_vcfs.config' + //annotate includeConfig 'conf/modules/annotate.config' diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 4bb3557f1e..bb3c16bcec 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -269,9 +269,7 @@ include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows // Concatenation of germline vcf-files include { BCFTOOLS_CONCAT } from '../modules/nf-core/bcftools/concat/main' - include { BCFTOOLS_SORT as GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/bcftools/sort/main' - include { TABIX_TABIX as TABIX_GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/tabix/tabix/main' // QC on VCF files @@ -1013,52 +1011,25 @@ workflow SAREK { // Gather vcfs and vcf-tbis for concatenating germline-vcfs germline_vcfs_with_tbis = Channel.empty() - - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf_tbi) - ) - - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf_tbi) - ) - - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf_tbi) - ) - - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf_tbi) - ) - - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.mpileup_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.mpileup_vcf_tbi) - ) - - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf_tbi) - ) - - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix( - BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf_tbi) - ) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf_tbi)) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf_tbi)) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf_tbi)) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf_tbi)) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.mpileup_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.mpileup_vcf_tbi)) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf_tbi)) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf_tbi)) germline_vcfs_with_tbis = germline_vcfs_with_tbis.map{ meta, vcf, tbi -> def new_meta = meta.clone() new_meta.remove('variantcaller') - def new_meta2 = new_meta.clone() - new_meta2.remove('tumor_id') - def new_meta3 = new_meta2.clone() - new_meta3.remove('normal_id') - def new_meta4 = new_meta3.clone() - new_meta4.remove('sample') - def new_meta5 = new_meta4.clone() - new_meta5.remove('status') - def new_meta6 = new_meta5.clone() // TO-DO: Better way of removing the unwanted entries in the dict "meta"? - new_meta6.remove('num_intervals') // The remaining entries should just be id, patient and sex - def new_meta7 = new_meta6.clone() - new_meta7.remove('data_type') - [new_meta7, vcf, tbi] + new_meta.remove('tumor_id') + new_meta.remove('normal_id') + new_meta.remove('sample') + new_meta.remove('status') + new_meta.remove('num_intervals') + new_meta.remove('data_type') + [new_meta, vcf, tbi] }.groupTuple() BCFTOOLS_CONCAT(germline_vcfs_with_tbis) From 00c5a9db76ff319b41309b4e5f937511b660e149 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Thu, 1 Dec 2022 12:05:51 +0100 Subject: [PATCH 15/31] renaming postprocessing_vcfs.config to post_variant_calling.config --- .../{postprocessing_vcfs.config => post_variant_calling.config} | 0 nextflow.config | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename conf/modules/{postprocessing_vcfs.config => post_variant_calling.config} (100%) diff --git a/conf/modules/postprocessing_vcfs.config b/conf/modules/post_variant_calling.config similarity index 100% rename from conf/modules/postprocessing_vcfs.config rename to conf/modules/post_variant_calling.config diff --git a/nextflow.config b/nextflow.config index 4e9726f946..cc59e939e7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -317,7 +317,7 @@ includeConfig 'conf/modules/mutect2.config' includeConfig 'conf/modules/strelka.config' includeConfig 'conf/modules/tiddit.config' -includeConfig 'conf/modules/postprocessing_vcfs.config' +includeConfig 'conf/modules/post_variant_calling.config' //annotate includeConfig 'conf/modules/annotate.config' From 70b10270dbca49b0422ba6ecca3156cc386694da Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Thu, 1 Dec 2022 22:37:36 +0100 Subject: [PATCH 16/31] Adding INFO-field SOURCE= to germline-vcf-files before concatenating them. --- conf/modules/modules.config | 6 +++ modules/local/add_info_to_vcf/main.nf | 40 ++++++++++++++++++++ workflows/sarek.nf | 53 +++++++++++++++++++++++---- 3 files changed, 92 insertions(+), 7 deletions(-) create mode 100644 modules/local/add_info_to_vcf/main.nf diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 0c36169370..4abfb4b56c 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -87,6 +87,12 @@ process { } } + withName: 'TABIX_EXT_VCF_.*' { + ext.prefix = { "${input.baseName}" } + } + + + // VCF withName: 'BCFTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } diff --git a/modules/local/add_info_to_vcf/main.nf b/modules/local/add_info_to_vcf/main.nf new file mode 100644 index 0000000000..b7c635ee05 --- /dev/null +++ b/modules/local/add_info_to_vcf/main.nf @@ -0,0 +1,40 @@ +process ADD_INFO_TO_VCF { + tag "$meta.id" + + conda (params.enable_conda ? "anaconda::gawk=5.1.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.1.0' : + 'quay.io/biocontainers/gawk:5.1.0' }" + + input: + tuple val(meta), path(vcf_gz) + + output: + tuple val(meta), path("*.added_info.vcf"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + """ + input="input.vcf" + output="${vcf_gz.baseName.minus(".vcf")}.added_info.vcf" + zcat $vcf_gz > \$input + ## Add info header lines + grep -E "^##" \$input > \$output + ## Add description of new INFO value + echo '##INFO=' >> \$output + ## Add column header + grep -E "^#CHROM" \$input >> \$output + ## Add SET value to INFO column of variant calls + if grep -Ev "^#" \$input; then + grep -Ev "^#" \$input | awk 'BEGIN{FS=OFS="\t"} { \$8=="." ? \$8="SOURCE=$vcf_gz" : \$8=\$8";SOURCE==$vcf_gz"; print }' >> \$output + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/workflows/sarek.nf b/workflows/sarek.nf index bb3c16bcec..b0c7899446 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -268,6 +268,20 @@ include { BAM_VARIANT_CALLING_TUMOR_ONLY_ALL } from '../subworkflows include { BAM_VARIANT_CALLING_SOMATIC_ALL } from '../subworkflows/local/bam_variant_calling_somatic_all/main' // Concatenation of germline vcf-files +include { ADD_INFO_TO_VCF as ADD_INFO_TO_DV_VCF } from '../modules/local/add_info_to_vcf/main' +include { ADD_INFO_TO_VCF as ADD_INFO_TO_FB_VCF } from '../modules/local/add_info_to_vcf/main' +include { ADD_INFO_TO_VCF as ADD_INFO_TO_HTC_VCF } from '../modules/local/add_info_to_vcf/main' +include { ADD_INFO_TO_VCF as ADD_INFO_TO_MANTA_VCF } from '../modules/local/add_info_to_vcf/main' +include { ADD_INFO_TO_VCF as ADD_INFO_TO_MPILEUP_VCF } from '../modules/local/add_info_to_vcf/main' +include { ADD_INFO_TO_VCF as ADD_INFO_TO_STRELKA_VCF } from '../modules/local/add_info_to_vcf/main' +include { ADD_INFO_TO_VCF as ADD_INFO_TO_TIDDIT_VCF } from '../modules/local/add_info_to_vcf/main' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_DV } from '../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_FB } from '../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_HTC } from '../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_MANTA } from '../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_MPILEUP } from '../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_STRELKA } from '../modules/nf-core/tabix/bgziptabix/main' +include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_TIDDIT } from '../modules/nf-core/tabix/bgziptabix/main' include { BCFTOOLS_CONCAT } from '../modules/nf-core/bcftools/concat/main' include { BCFTOOLS_SORT as GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/bcftools/sort/main' include { TABIX_TABIX as TABIX_GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/tabix/tabix/main' @@ -1009,15 +1023,38 @@ workflow SAREK { if (params.concatenate_vcfs) { // Concatenate vcf-files + ADD_INFO_TO_DV_VCF(BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf) + TABIX_EXT_VCF_DV(ADD_INFO_TO_DV_VCF.out.vcf) + + ADD_INFO_TO_FB_VCF(BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf) + TABIX_EXT_VCF_FB(ADD_INFO_TO_FB_VCF.out.vcf) + + ADD_INFO_TO_HTC_VCF(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf) + TABIX_EXT_VCF_HTC(ADD_INFO_TO_HTC_VCF.out.vcf) + + ADD_INFO_TO_MANTA_VCF(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf) + TABIX_EXT_VCF_MANTA(ADD_INFO_TO_MANTA_VCF.out.vcf) + + ADD_INFO_TO_MPILEUP_VCF(BAM_VARIANT_CALLING_GERMLINE_ALL.out.mpileup_vcf) + TABIX_EXT_VCF_MPILEUP(ADD_INFO_TO_MPILEUP_VCF.out.vcf) + + ADD_INFO_TO_STRELKA_VCF(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf) + TABIX_EXT_VCF_STRELKA(ADD_INFO_TO_STRELKA_VCF.out.vcf) + + ADD_INFO_TO_TIDDIT_VCF(BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf) + TABIX_EXT_VCF_TIDDIT(ADD_INFO_TO_TIDDIT_VCF.out.vcf) + // Gather vcfs and vcf-tbis for concatenating germline-vcfs germline_vcfs_with_tbis = Channel.empty() - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.deepvariant_vcf_tbi)) - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf_tbi)) - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf_tbi)) - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf_tbi)) - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.mpileup_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.mpileup_vcf_tbi)) - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf_tbi)) - germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf.join(BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf_tbi)) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(TABIX_EXT_VCF_DV.out.gz_tbi) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(TABIX_EXT_VCF_FB.out.gz_tbi) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(TABIX_EXT_VCF_HTC.out.gz_tbi) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(TABIX_EXT_VCF_MANTA.out.gz_tbi) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(TABIX_EXT_VCF_MPILEUP.out.gz_tbi) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(TABIX_EXT_VCF_STRELKA.out.gz_tbi) + germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(TABIX_EXT_VCF_TIDDIT.out.gz_tbi) + + germline_vcfs_with_tbis.view() germline_vcfs_with_tbis = germline_vcfs_with_tbis.map{ meta, vcf, tbi -> @@ -1032,6 +1069,8 @@ workflow SAREK { [new_meta, vcf, tbi] }.groupTuple() + germline_vcfs_with_tbis.view() + BCFTOOLS_CONCAT(germline_vcfs_with_tbis) GERMLINE_VCFS_CONCAT_SORT(BCFTOOLS_CONCAT.out.vcf) TABIX_GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT_SORT.out.vcf) From c999a8ffcec94c6bb344c3827f73e172a3a3b3ea Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Thu, 1 Dec 2022 22:56:53 +0100 Subject: [PATCH 17/31] cleaner --- conf/modules/modules.config | 2 -- modules/local/add_info_to_vcf/main.nf | 4 ++-- workflows/sarek.nf | 4 ---- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/conf/modules/modules.config b/conf/modules/modules.config index 4abfb4b56c..f2ac98a43e 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -91,8 +91,6 @@ process { ext.prefix = { "${input.baseName}" } } - - // VCF withName: 'BCFTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } diff --git a/modules/local/add_info_to_vcf/main.nf b/modules/local/add_info_to_vcf/main.nf index b7c635ee05..bc20df161a 100644 --- a/modules/local/add_info_to_vcf/main.nf +++ b/modules/local/add_info_to_vcf/main.nf @@ -11,7 +11,7 @@ process ADD_INFO_TO_VCF { output: tuple val(meta), path("*.added_info.vcf"), emit: vcf - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,7 +24,7 @@ process ADD_INFO_TO_VCF { ## Add info header lines grep -E "^##" \$input > \$output ## Add description of new INFO value - echo '##INFO=' >> \$output + echo '##INFO=' >> \$output ## Add column header grep -E "^#CHROM" \$input >> \$output ## Add SET value to INFO column of variant calls diff --git a/workflows/sarek.nf b/workflows/sarek.nf index b0c7899446..0215cc3da8 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -1054,8 +1054,6 @@ workflow SAREK { germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(TABIX_EXT_VCF_STRELKA.out.gz_tbi) germline_vcfs_with_tbis = germline_vcfs_with_tbis.mix(TABIX_EXT_VCF_TIDDIT.out.gz_tbi) - germline_vcfs_with_tbis.view() - germline_vcfs_with_tbis = germline_vcfs_with_tbis.map{ meta, vcf, tbi -> def new_meta = meta.clone() @@ -1069,8 +1067,6 @@ workflow SAREK { [new_meta, vcf, tbi] }.groupTuple() - germline_vcfs_with_tbis.view() - BCFTOOLS_CONCAT(germline_vcfs_with_tbis) GERMLINE_VCFS_CONCAT_SORT(BCFTOOLS_CONCAT.out.vcf) TABIX_GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT_SORT.out.vcf) From 24ad87a9f568cb129fba9aa9c72d17f42a3d4ae3 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Thu, 1 Dec 2022 23:24:29 +0100 Subject: [PATCH 18/31] Fixed typo in INFO-field SOURCE in concatenated germline-vcf --- modules/local/add_info_to_vcf/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/add_info_to_vcf/main.nf b/modules/local/add_info_to_vcf/main.nf index bc20df161a..385f8d1f9b 100644 --- a/modules/local/add_info_to_vcf/main.nf +++ b/modules/local/add_info_to_vcf/main.nf @@ -29,7 +29,7 @@ process ADD_INFO_TO_VCF { grep -E "^#CHROM" \$input >> \$output ## Add SET value to INFO column of variant calls if grep -Ev "^#" \$input; then - grep -Ev "^#" \$input | awk 'BEGIN{FS=OFS="\t"} { \$8=="." ? \$8="SOURCE=$vcf_gz" : \$8=\$8";SOURCE==$vcf_gz"; print }' >> \$output + grep -Ev "^#" \$input | awk 'BEGIN{FS=OFS="\t"} { \$8=="." ? \$8="SOURCE=$vcf_gz" : \$8=\$8";SOURCE=$vcf_gz"; print }' >> \$output fi cat <<-END_VERSIONS > versions.yml From 04da3defb912f21f3f47b5d5cfcf254d562f7a69 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 5 Dec 2022 20:12:48 +0100 Subject: [PATCH 19/31] Temporary and fixed copy of mapped_joint_bam.csv in which sample-id and patient-id correspond to id in bam-files. See #872 --- tests/csv/3.0/mapped_joint_bam.fixed.csv | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 tests/csv/3.0/mapped_joint_bam.fixed.csv diff --git a/tests/csv/3.0/mapped_joint_bam.fixed.csv b/tests/csv/3.0/mapped_joint_bam.fixed.csv new file mode 100644 index 0000000000..1dc3920b1e --- /dev/null +++ b/tests/csv/3.0/mapped_joint_bam.fixed.csv @@ -0,0 +1,3 @@ +patient,status,sample,bam,bai +testN,0,testN,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai +testT,0,testT,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai From 8257243c059b83c7eb46315b0e4c1bc4374283e2 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 5 Dec 2022 20:13:44 +0100 Subject: [PATCH 20/31] WIP: Adding test of the concatenation of germline-vcfs --- tests/test_concat_germline_vcfs.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tests/test_concat_germline_vcfs.yml diff --git a/tests/test_concat_germline_vcfs.yml b/tests/test_concat_germline_vcfs.yml new file mode 100644 index 0000000000..f3e75144c5 --- /dev/null +++ b/tests/test_concat_germline_vcfs.yml @@ -0,0 +1,20 @@ +- name: Run all germline variant callers and check for existence of concatenated vcf-files + command: nextflow run main.nf -profile test --input ./tests/csv/3.0/mapped_single_bam.fixed.csv --concatenate_vcfs --tools deepvariant,freebayes,haplotypecaller,manta,mpileup,strelka,tiddit --step variant_calling + tags: + - concatenate_vcfs + files: + - path: results/variant_calling/concat/testN/testN.germline.vcf.gz + # binary changes md5sums on reruns. + contains: + [ + "SOURCE=testN.deepvariant.vcf.gz", + "AB=0.167832;ABP=277.102;AC=1;AF=0.5;AN=2;AO=48;CIGAR=1X;DP=286;DPB=286;DPRA=0;EPP=3.0103;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=105.855;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=2017;QR=9863;RO=238;RPL=0;RPP=107.241;RPPR=519.821;RPR=48;RUN=1;SAF=24;SAP=3.0103;SAR=24;SRF=119;SRP=3.0103;SRR=119;TYPE=snp;technology.illumina=1;SOURCE=testN.freebayes.vcf.gz", + "SNVHPOL=7;MQ=60;SOURCE=testN.strelka.variants.vcf.gz", + "DP=2;SGB=-0.379885;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60;SOURCE=testN.bcftools.vcf.gz" + ] + - path: results/variant_calling/concat/testN/testT.germline.vcf.gz + # binary changes md5sums on reruns. + - path: results/variant_calling/concat/testN/testN.germline.vcf.gz.tbi + # binary changes md5sums on reruns. + - path: results/variant_calling/concat/testN/testT.germline.vcf.gz.tbi + # binary changes md5sums on reruns. From 498db83734177bbe6f481fd7e0b35f5ac07fcf66 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 5 Dec 2022 20:59:32 +0100 Subject: [PATCH 21/31] Trying to add new tests --- tests/config/pytest_tags.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index ed429079d8..f780cb72fc 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -263,3 +263,20 @@ vep: - modules/nf-core/ensemblvep/main.nf - modules/nf-core/tabix/bgziptabix/main.nf - subworkflows/nf-core/vcf_annotate_ensemblvep/main.nf + +## concatenate germline vcfs +concatenate_vcfs: + - conf/modules/post_variant_calling.config + - modules/nf-core/bcftools/sort/main.nf + - modules/nf-core/tabix/bgziptabix/main.nf + - modules/nf-core/bcftools/concat/main.nf + - modules/nf-core/deepvariant/main.nf # DEEPVARIANT + - modules/nf-core/tabix/tabix/main.nf + - modules/nf-core/freebayes/main.nf # Freebayes + - modules/nf-core/gatk4/haplotypecaller/main.nf # Haplotypecaller + - modules/nf-core/manta/germline/main.nf # Manta + - modules/nf-core/bcftools/mpileup/main.nf # MPILEUP/BCFTOOLS + - modules/nf-core/samtools/mpileup/main.nf + - modules/nf-core/gatk4/mergevcfs/main.nf + - modules/nf-core/strelka/germline/main.nf + - modules/nf-core/tiddit/sv/main.nf From 27826a8f5a205f309a6fcab430e7a9638f368a39 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 5 Dec 2022 21:26:46 +0100 Subject: [PATCH 22/31] Trying to get new test running --- tests/test_concat_germline_vcfs.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_concat_germline_vcfs.yml b/tests/test_concat_germline_vcfs.yml index f3e75144c5..3879faefcb 100644 --- a/tests/test_concat_germline_vcfs.yml +++ b/tests/test_concat_germline_vcfs.yml @@ -1,5 +1,5 @@ - name: Run all germline variant callers and check for existence of concatenated vcf-files - command: nextflow run main.nf -profile test --input ./tests/csv/3.0/mapped_single_bam.fixed.csv --concatenate_vcfs --tools deepvariant,freebayes,haplotypecaller,manta,mpileup,strelka,tiddit --step variant_calling + command: nextflow run main.nf -profile test --input ./tests/csv/3.0/mapped_joint_bam.fixed.csv --concatenate_vcfs --tools deepvariant,freebayes,haplotypecaller,manta,mpileup,strelka,tiddit --step variant_calling tags: - concatenate_vcfs files: @@ -12,9 +12,9 @@ "SNVHPOL=7;MQ=60;SOURCE=testN.strelka.variants.vcf.gz", "DP=2;SGB=-0.379885;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60;SOURCE=testN.bcftools.vcf.gz" ] - - path: results/variant_calling/concat/testN/testT.germline.vcf.gz + - path: results/variant_calling/concat/testT/testT.germline.vcf.gz # binary changes md5sums on reruns. - path: results/variant_calling/concat/testN/testN.germline.vcf.gz.tbi # binary changes md5sums on reruns. - - path: results/variant_calling/concat/testN/testT.germline.vcf.gz.tbi + - path: results/variant_calling/concat/testT/testT.germline.vcf.gz.tbi # binary changes md5sums on reruns. From bd8f2bef4efb1895f85301bf0228f0702aa13de2 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Mon, 5 Dec 2022 21:37:49 +0100 Subject: [PATCH 23/31] Avoiding publishing files from GERMLINE_VCFS_CONCAT --- conf/modules/post_variant_calling.config | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/conf/modules/post_variant_calling.config b/conf/modules/post_variant_calling.config index 77a2260ecf..b534424003 100644 --- a/conf/modules/post_variant_calling.config +++ b/conf/modules/post_variant_calling.config @@ -15,6 +15,12 @@ // Like, for instance, concatenating the unannotated, germline vcf-files process { + withName: 'GERMLINE_VCFS_CONCAT'{ + publishDir = [ + //specify to avoid publishing, overwritten otherwise + enabled: false + ] + } withName: 'GERMLINE_VCFS_CONCAT_SORT'{ ext.prefix = { "${meta.id}.germline" } From f910c82b9995931cfb273aa358325efd8c00b91b Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 6 Dec 2022 10:25:35 +0100 Subject: [PATCH 24/31] Skip CI-test concatenate_vcfs in conda test-env --- .github/workflows/pytest-workflow.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/pytest-workflow.yml b/.github/workflows/pytest-workflow.yml index a5d9c2872a..3ffff196ef 100644 --- a/.github/workflows/pytest-workflow.yml +++ b/.github/workflows/pytest-workflow.yml @@ -51,6 +51,8 @@ jobs: tags: snpeff - profile: "conda" tags: vep + - profile: "conda" + tags: concatenate_vcfs - profile: "singularity" tags: merge env: From ea9d925db78bb65b9df76c62004d6c8735488c0c Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 6 Dec 2022 11:33:05 +0100 Subject: [PATCH 25/31] prettier --- tests/test_concat_germline_vcfs.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_concat_germline_vcfs.yml b/tests/test_concat_germline_vcfs.yml index 3879faefcb..8b23948a70 100644 --- a/tests/test_concat_germline_vcfs.yml +++ b/tests/test_concat_germline_vcfs.yml @@ -4,13 +4,13 @@ - concatenate_vcfs files: - path: results/variant_calling/concat/testN/testN.germline.vcf.gz - # binary changes md5sums on reruns. + # binary changes md5sums on reruns. contains: [ "SOURCE=testN.deepvariant.vcf.gz", "AB=0.167832;ABP=277.102;AC=1;AF=0.5;AN=2;AO=48;CIGAR=1X;DP=286;DPB=286;DPRA=0;EPP=3.0103;EPPR=3.0103;GTI=0;LEN=1;MEANALT=1;MQM=60;MQMR=60;NS=1;NUMALT=1;ODDS=105.855;PAIRED=1;PAIREDR=1;PAO=0;PQA=0;PQR=0;PRO=0;QA=2017;QR=9863;RO=238;RPL=0;RPP=107.241;RPPR=519.821;RPR=48;RUN=1;SAF=24;SAP=3.0103;SAR=24;SRF=119;SRP=3.0103;SRR=119;TYPE=snp;technology.illumina=1;SOURCE=testN.freebayes.vcf.gz", "SNVHPOL=7;MQ=60;SOURCE=testN.strelka.variants.vcf.gz", - "DP=2;SGB=-0.379885;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60;SOURCE=testN.bcftools.vcf.gz" + "DP=2;SGB=-0.379885;FS=0;MQ0F=0;AC=2;AN=2;DP4=0,0,0,1;MQ=60;SOURCE=testN.bcftools.vcf.gz", ] - path: results/variant_calling/concat/testT/testT.germline.vcf.gz # binary changes md5sums on reruns. From 812f6d05047d2f1f57720ea2cb078e65209b7571 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 6 Dec 2022 11:35:56 +0100 Subject: [PATCH 26/31] Adding synonym for module BCFTOOLS_CONCAT in order to disable publishing from that module --- workflows/sarek.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/workflows/sarek.nf b/workflows/sarek.nf index 0215cc3da8..e37346f9bc 100644 --- a/workflows/sarek.nf +++ b/workflows/sarek.nf @@ -282,7 +282,7 @@ include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_MANTA } from '../modules/nf-c include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_MPILEUP } from '../modules/nf-core/tabix/bgziptabix/main' include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_STRELKA } from '../modules/nf-core/tabix/bgziptabix/main' include { TABIX_BGZIPTABIX as TABIX_EXT_VCF_TIDDIT } from '../modules/nf-core/tabix/bgziptabix/main' -include { BCFTOOLS_CONCAT } from '../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_CONCAT as GERMLINE_VCFS_CONCAT } from '../modules/nf-core/bcftools/concat/main' include { BCFTOOLS_SORT as GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/bcftools/sort/main' include { TABIX_TABIX as TABIX_GERMLINE_VCFS_CONCAT_SORT } from '../modules/nf-core/tabix/tabix/main' @@ -1067,8 +1067,8 @@ workflow SAREK { [new_meta, vcf, tbi] }.groupTuple() - BCFTOOLS_CONCAT(germline_vcfs_with_tbis) - GERMLINE_VCFS_CONCAT_SORT(BCFTOOLS_CONCAT.out.vcf) + GERMLINE_VCFS_CONCAT(germline_vcfs_with_tbis) + GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT.out.vcf) TABIX_GERMLINE_VCFS_CONCAT_SORT(GERMLINE_VCFS_CONCAT_SORT.out.vcf) } @@ -1078,8 +1078,8 @@ workflow SAREK { vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.freebayes_vcf) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.haplotypecaller_vcf) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.manta_vcf) - vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.strelka_vcf) + vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_GERMLINE_ALL.out.tiddit_vcf) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.freebayes_vcf) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.mutect2_vcf) vcf_to_annotate = vcf_to_annotate.mix(BAM_VARIANT_CALLING_TUMOR_ONLY_ALL.out.manta_vcf) From c7335930e17dc94b5da3f95e79d37ca6bc594f33 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 6 Dec 2022 11:48:58 +0100 Subject: [PATCH 27/31] Updating changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 99b4f6d855..a3866ede27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - [#864](https://github.com/nf-core/sarek/pull/864) - Added possibilities to export assembled haplotypes and locally realigned reads +- [#792](https://github.com/nf-core/sarek/pull/792) - Added the option `--concatenate_vcfs` for concatenating the germline vcf-files. Per default, the resulting vcf-files will be placed under `/variant_calling/concat`. ### Changed From 439246d7f56783420954931e74751669e7e14a5d Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 6 Dec 2022 19:44:55 +0100 Subject: [PATCH 28/31] Moving config from modules.config to post_variant_calling.config --- conf/modules/modules.config | 4 ---- conf/modules/post_variant_calling.config | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/conf/modules/modules.config b/conf/modules/modules.config index f2ac98a43e..0c36169370 100644 --- a/conf/modules/modules.config +++ b/conf/modules/modules.config @@ -87,10 +87,6 @@ process { } } - withName: 'TABIX_EXT_VCF_.*' { - ext.prefix = { "${input.baseName}" } - } - // VCF withName: 'BCFTOOLS_STATS' { ext.when = { !(params.skip_tools && params.skip_tools.split(',').contains('bcftools')) } diff --git a/conf/modules/post_variant_calling.config b/conf/modules/post_variant_calling.config index b534424003..4f30f9b69b 100644 --- a/conf/modules/post_variant_calling.config +++ b/conf/modules/post_variant_calling.config @@ -30,6 +30,10 @@ process { ] } + withName: 'TABIX_EXT_VCF_.*' { + ext.prefix = { "${input.baseName}" } + } + withName: 'TABIX_GERMLINE_VCFS_CONCAT_SORT'{ ext.prefix = { "${meta.id}.germline" } publishDir = [ From 4d15e405e22da85387235dcf38aa4bbe9bc3c05d Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 6 Dec 2022 20:00:44 +0100 Subject: [PATCH 29/31] fixing comment --- modules/local/add_info_to_vcf/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/add_info_to_vcf/main.nf b/modules/local/add_info_to_vcf/main.nf index 385f8d1f9b..91c4e6b3b4 100644 --- a/modules/local/add_info_to_vcf/main.nf +++ b/modules/local/add_info_to_vcf/main.nf @@ -27,7 +27,7 @@ process ADD_INFO_TO_VCF { echo '##INFO=' >> \$output ## Add column header grep -E "^#CHROM" \$input >> \$output - ## Add SET value to INFO column of variant calls + ## Add SOURCE value to INFO column of variant calls if grep -Ev "^#" \$input; then grep -Ev "^#" \$input | awk 'BEGIN{FS=OFS="\t"} { \$8=="." ? \$8="SOURCE=$vcf_gz" : \$8=\$8";SOURCE=$vcf_gz"; print }' >> \$output fi From 07fb548a2f9dfdea42bcde5b09375ccd148606da Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 6 Dec 2022 20:04:09 +0100 Subject: [PATCH 30/31] Remove code to pass back tbi-files to sarek.nf --- modules/nf-core/deepvariant/main.nf | 2 -- .../bam_variant_calling_deepvariant/main.nf | 22 ------------------ .../bam_variant_calling_freebayes/main.nf | 17 -------------- .../bam_variant_calling_germline_all/main.nf | 23 ------------------- .../main.nf | 22 ------------------ .../main.nf | 16 ------------- .../local/bam_variant_calling_mpileup/main.nf | 5 ---- .../main.nf | 21 ----------------- .../bam_variant_calling_single_tiddit/main.nf | 23 ------------------- .../local/vcf_variant_filtering_gatk/main.nf | 14 ----------- 10 files changed, 165 deletions(-) diff --git a/modules/nf-core/deepvariant/main.nf b/modules/nf-core/deepvariant/main.nf index d975116d3f..de48d9c1e4 100644 --- a/modules/nf-core/deepvariant/main.nf +++ b/modules/nf-core/deepvariant/main.nf @@ -16,9 +16,7 @@ process DEEPVARIANT { output: tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf - tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: vcf_tbi tuple val(meta), path("${prefix}.g.vcf.gz"), emit: gvcf - tuple val(meta), path("${prefix}.g.vcf.gz.tbi"), emit: gvcf_tbi path "versions.yml" , emit: versions when: diff --git a/subworkflows/local/bam_variant_calling_deepvariant/main.nf b/subworkflows/local/bam_variant_calling_deepvariant/main.nf index 39d0279176..537d1d0d3f 100644 --- a/subworkflows/local/bam_variant_calling_deepvariant/main.nf +++ b/subworkflows/local/bam_variant_calling_deepvariant/main.nf @@ -23,11 +23,6 @@ workflow BAM_VARIANT_CALLING_DEEPVARIANT { no_intervals: it[0].num_intervals <= 1 }.set{deepvariant_vcf_out} - DEEPVARIANT.out.vcf_tbi.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{deepvariant_tbi_out} - DEEPVARIANT.out.gvcf.branch{ intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 @@ -103,22 +98,6 @@ workflow BAM_VARIANT_CALLING_DEEPVARIANT { ], vcf] } - deepvariant_vcf_tbi = Channel.empty().mix( - MERGE_DEEPVARIANT_VCF.out.tbi, - deepvariant_tbi_out.no_intervals) - .map{ meta, tbi -> - [[ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - sex: meta.sex, - status: meta.status, - variantcaller: "deepvariant" - ], tbi] - } - - ch_versions = ch_versions.mix(MERGE_DEEPVARIANT_GVCF.out.versions) ch_versions = ch_versions.mix(MERGE_DEEPVARIANT_VCF.out.versions) ch_versions = ch_versions.mix(DEEPVARIANT.out.versions) @@ -127,7 +106,6 @@ workflow BAM_VARIANT_CALLING_DEEPVARIANT { emit: deepvariant_vcf - deepvariant_vcf_tbi deepvariant_gvcf versions = ch_versions } diff --git a/subworkflows/local/bam_variant_calling_freebayes/main.nf b/subworkflows/local/bam_variant_calling_freebayes/main.nf index 4778db2224..148b20795e 100644 --- a/subworkflows/local/bam_variant_calling_freebayes/main.nf +++ b/subworkflows/local/bam_variant_calling_freebayes/main.nf @@ -72,22 +72,6 @@ workflow BAM_VARIANT_CALLING_FREEBAYES { vcf] } - freebayes_vcf_tbi = Channel.empty().mix( - MERGE_FREEBAYES.out.tbi, - TABIX_VC_FREEBAYES.out.tbi) - .map{ meta, tbi -> - [ [ - id: meta.id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - sex: meta.sex, - tumor_id: meta.tumor_id, - variantcaller: "freebayes" - ], - tbi] - } - ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions) ch_versions = ch_versions.mix(MERGE_FREEBAYES.out.versions) ch_versions = ch_versions.mix(FREEBAYES.out.versions) @@ -95,6 +79,5 @@ workflow BAM_VARIANT_CALLING_FREEBAYES { emit: freebayes_vcf - freebayes_vcf_tbi versions = ch_versions } diff --git a/subworkflows/local/bam_variant_calling_germline_all/main.nf b/subworkflows/local/bam_variant_calling_germline_all/main.nf index f7e4f2b547..4c82912adf 100644 --- a/subworkflows/local/bam_variant_calling_germline_all/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_all/main.nf @@ -43,14 +43,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { strelka_vcf = Channel.empty() tiddit_vcf = Channel.empty() - deepvariant_vcf_tbi = Channel.empty() - freebayes_vcf_tbi = Channel.empty() - haplotypecaller_vcf_tbi = Channel.empty() - manta_vcf_tbi = Channel.empty() - mpileup_vcf_tbi = Channel.empty() - strelka_vcf_tbi = Channel.empty() - tiddit_vcf_tbi = Channel.empty() - // Remap channel with intervals cram_recalibrated_intervals = cram_recalibrated.combine(intervals) .map{ meta, cram, crai, intervals, num_intervals -> @@ -103,7 +95,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) mpileup_vcf = BAM_VARIANT_CALLING_MPILEUP.out.vcf - mpileup_vcf_tbi = BAM_VARIANT_CALLING_MPILEUP.out.tbi ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_MPILEUP.out.versions) } @@ -136,7 +127,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) deepvariant_vcf = Channel.empty().mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.deepvariant_vcf) - deepvariant_vcf_tbi = Channel.empty().mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.deepvariant_vcf_tbi) ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_DEEPVARIANT.out.versions) } @@ -156,7 +146,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) freebayes_vcf = BAM_VARIANT_CALLING_FREEBAYES.out.freebayes_vcf - freebayes_vcf_tbi = BAM_VARIANT_CALLING_FREEBAYES.out.freebayes_vcf_tbi ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_FREEBAYES.out.versions) } @@ -194,7 +183,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { intervals_bed_combined_haplotypec) haplotypecaller_vcf = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.filtered_vcf - haplotypecaller_vcf_tbi = BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.filtered_vcf_tbi ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_HAPLOTYPECALLER.out.versions) } @@ -210,7 +198,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { manta_vcf = BAM_VARIANT_CALLING_GERMLINE_MANTA.out.manta_vcf - manta_vcf_tbi = BAM_VARIANT_CALLING_GERMLINE_MANTA.out.manta_vcf_tbi ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_GERMLINE_MANTA.out.versions) } @@ -224,7 +211,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) strelka_vcf = BAM_VARIANT_CALLING_SINGLE_STRELKA.out.strelka_vcf - strelka_vcf_tbi = BAM_VARIANT_CALLING_SINGLE_STRELKA.out.strelka_vcf_tbi ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_SINGLE_STRELKA.out.versions) } @@ -237,7 +223,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { ) tiddit_vcf = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.tiddit_vcf - tiddit_vcf_tbi = BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.tiddit_vcf_tbi ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_SINGLE_TIDDIT.out.versions) } @@ -250,13 +235,5 @@ workflow BAM_VARIANT_CALLING_GERMLINE_ALL { strelka_vcf tiddit_vcf - deepvariant_vcf_tbi - freebayes_vcf_tbi - haplotypecaller_vcf_tbi - manta_vcf_tbi - mpileup_vcf_tbi - strelka_vcf_tbi - tiddit_vcf_tbi - versions = ch_versions } diff --git a/subworkflows/local/bam_variant_calling_germline_manta/main.nf b/subworkflows/local/bam_variant_calling_germline_manta/main.nf index ca3c637e70..5fcbb0765d 100644 --- a/subworkflows/local/bam_variant_calling_germline_manta/main.nf +++ b/subworkflows/local/bam_variant_calling_germline_manta/main.nf @@ -33,12 +33,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_MANTA { no_intervals: it[0].num_intervals <= 1 }.set{manta_diploid_sv_vcf} - MANTA_GERMLINE.out.diploid_sv_vcf_tbi.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{manta_diploid_sv_vcf_tbi} - - // Only when using intervals MERGE_MANTA_SMALL_INDELS( manta_small_indels_vcf.intervals @@ -110,21 +104,6 @@ workflow BAM_VARIANT_CALLING_GERMLINE_MANTA { vcf] } - manta_vcf_tbi = Channel.empty().mix( - MERGE_MANTA_DIPLOID.out.tbi, - manta_diploid_sv_vcf_tbi.no_intervals) - .map{ meta, tbi -> - [[ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - status: meta.status, - sex: meta.sex, - variantcaller: "manta"], - tbi] - } - ch_versions = ch_versions.mix(MERGE_MANTA_DIPLOID.out.versions) ch_versions = ch_versions.mix(MERGE_MANTA_SMALL_INDELS.out.versions) ch_versions = ch_versions.mix(MERGE_MANTA_SV.out.versions) @@ -132,6 +111,5 @@ workflow BAM_VARIANT_CALLING_GERMLINE_MANTA { emit: manta_vcf - manta_vcf_tbi versions = ch_versions } diff --git a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf index be0305945d..62a8fc06b2 100644 --- a/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf +++ b/subworkflows/local/bam_variant_calling_haplotypecaller/main.nf @@ -23,7 +23,6 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { ch_versions = Channel.empty() filtered_vcf = Channel.empty() - filtered_vcf_tbi = Channel.empty() realigned_bam = Channel.empty() GATK4_HAPLOTYPECALLER( @@ -149,20 +148,6 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { ] } - filtered_vcf_tbi = VCF_VARIANT_FILTERING_GATK.out.filtered_vcf_tbi.map{ meta, tbi-> [ - [ - patient:meta.patient, - sample:meta.sample, - status:meta.status, - sex:meta.sex, - id:meta.sample, - num_intervals:meta.num_intervals, - variantcaller:"haplotypecaller" - ], - tbi - ] - } - ch_versions = ch_versions.mix(GATK4_HAPLOTYPECALLER.out.versions) ch_versions = ch_versions.mix(MERGE_HAPLOTYPECALLER.out.versions) ch_versions = ch_versions.mix(VCF_VARIANT_FILTERING_GATK.out.versions) @@ -171,6 +156,5 @@ workflow BAM_VARIANT_CALLING_HAPLOTYPECALLER { emit: versions = ch_versions filtered_vcf - filtered_vcf_tbi realigned_bam } diff --git a/subworkflows/local/bam_variant_calling_mpileup/main.nf b/subworkflows/local/bam_variant_calling_mpileup/main.nf index ef01065677..4416071ed8 100644 --- a/subworkflows/local/bam_variant_calling_mpileup/main.nf +++ b/subworkflows/local/bam_variant_calling_mpileup/main.nf @@ -21,10 +21,6 @@ workflow BAM_VARIANT_CALLING_MPILEUP { intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 } - tbis = BCFTOOLS_MPILEUP.out.tbi.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - } mpileup = SAMTOOLS_MPILEUP.out.mpileup.branch{ intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 @@ -77,5 +73,4 @@ workflow BAM_VARIANT_CALLING_MPILEUP { versions = ch_versions mpileup = Channel.empty().mix(CAT_MPILEUP.out.file_out, mpileup.no_intervals) vcf = Channel.empty().mix(GATK4_MERGEVCFS.out.vcf, vcfs.no_intervals) - tbi = Channel.empty().mix(GATK4_MERGEVCFS.out.tbi, tbis.no_intervals) } diff --git a/subworkflows/local/bam_variant_calling_single_strelka/main.nf b/subworkflows/local/bam_variant_calling_single_strelka/main.nf index 873d102f96..4eefb3fa99 100644 --- a/subworkflows/local/bam_variant_calling_single_strelka/main.nf +++ b/subworkflows/local/bam_variant_calling_single_strelka/main.nf @@ -21,11 +21,6 @@ workflow BAM_VARIANT_CALLING_SINGLE_STRELKA { no_intervals: it[0].num_intervals <= 1 }.set{strelka_vcf} - STRELKA_SINGLE.out.vcf_tbi.branch{ - intervals: it[0].num_intervals > 1 - no_intervals: it[0].num_intervals <= 1 - }.set{strelka_vcf_tbi} - STRELKA_SINGLE.out.genome_vcf.branch{ intervals: it[0].num_intervals > 1 no_intervals: it[0].num_intervals <= 1 @@ -84,27 +79,11 @@ workflow BAM_VARIANT_CALLING_SINGLE_STRELKA { ], vcf] } - strelka_vcf_tbi = Channel.empty().mix( - MERGE_STRELKA.out.tbi, - strelka_vcf_tbi.no_intervals) - .map{ meta, tbi -> - [[ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - sex: meta.sex, - status: meta.status, - variantcaller: "strelka" - ], tbi] - } - ch_versions = ch_versions.mix(MERGE_STRELKA.out.versions) ch_versions = ch_versions.mix(MERGE_STRELKA_GENOME.out.versions) ch_versions = ch_versions.mix(STRELKA_SINGLE.out.versions) emit: strelka_vcf - strelka_vcf_tbi versions = ch_versions } diff --git a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf index 4301be22a7..5cd4e18bd8 100644 --- a/subworkflows/local/bam_variant_calling_single_tiddit/main.nf +++ b/subworkflows/local/bam_variant_calling_single_tiddit/main.nf @@ -40,28 +40,6 @@ workflow BAM_VARIANT_CALLING_SINGLE_TIDDIT { ] [new_meta, gz]} - tiddit_vcf_tbi = TABIX_BGZIP_TIDDIT_SV.out.gz_tbi.map{ meta, gz, tbi -> - - new_meta = meta.tumor_id ? [ - id: meta.tumor_id + "_vs_" + meta.normal_id, - normal_id: meta.normal_id, - num_intervals: meta.num_intervals, - patient: meta.patient, - sex: meta.sex, - tumor_id: meta.tumor_id, - variantcaller: 'tiddit' - ] - : [ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - sex: meta.sex, - status: meta.status, - variantcaller: 'tiddit' - ] - [new_meta, tbi]} - ch_versions = ch_versions.mix(TABIX_BGZIP_TIDDIT_SV.out.versions) ch_versions = ch_versions.mix(TIDDIT_SV.out.versions) @@ -69,6 +47,5 @@ workflow BAM_VARIANT_CALLING_SINGLE_TIDDIT { versions = ch_versions tiddit_vcf = tiddit_vcf_gz - tiddit_vcf_tbi tiddit_ploidy } diff --git a/subworkflows/local/vcf_variant_filtering_gatk/main.nf b/subworkflows/local/vcf_variant_filtering_gatk/main.nf index 168cb96982..3808d3e533 100644 --- a/subworkflows/local/vcf_variant_filtering_gatk/main.nf +++ b/subworkflows/local/vcf_variant_filtering_gatk/main.nf @@ -59,24 +59,10 @@ workflow VCF_VARIANT_FILTERING_GATK { ], vcf] } - filtered_vcf_tbi = FILTERVARIANTTRANCHES.out.tbi.map{ meta, tbi -> - [[ - id: meta.sample, - num_intervals: meta.num_intervals, - patient: meta.patient, - sample: meta.sample, - sex: meta.sex, - status: meta.status, - variantcaller: "haplotypecaller" - ], tbi] - } - ch_versions = ch_versions.mix(CNNSCOREVARIANTS.out.versions) ch_versions = ch_versions.mix(FILTERVARIANTTRANCHES.out.versions) emit: versions = ch_versions filtered_vcf - filtered_vcf_tbi } - From b32b4cff42ea3199e357c95b215ddc2ad2ff4349 Mon Sep 17 00:00:00 2001 From: Anders Sune Pedersen Date: Tue, 6 Dec 2022 20:04:44 +0100 Subject: [PATCH 31/31] Comments added --- tests/config/pytest_tags.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/config/pytest_tags.yml b/tests/config/pytest_tags.yml index f780cb72fc..64b988e65c 100644 --- a/tests/config/pytest_tags.yml +++ b/tests/config/pytest_tags.yml @@ -267,16 +267,16 @@ vep: ## concatenate germline vcfs concatenate_vcfs: - conf/modules/post_variant_calling.config + - modules/nf-core/deepvariant/main.nf # deepvariant + - modules/nf-core/tabix/tabix/main.nf + - modules/nf-core/freebayes/main.nf # freebayes + - modules/nf-core/gatk4/haplotypecaller/main.nf # haplotypecaller + - modules/nf-core/manta/germline/main.nf # manta + - modules/nf-core/bcftools/mpileup/main.nf # mpileup/bcftools - modules/nf-core/bcftools/sort/main.nf - modules/nf-core/tabix/bgziptabix/main.nf - modules/nf-core/bcftools/concat/main.nf - - modules/nf-core/deepvariant/main.nf # DEEPVARIANT - - modules/nf-core/tabix/tabix/main.nf - - modules/nf-core/freebayes/main.nf # Freebayes - - modules/nf-core/gatk4/haplotypecaller/main.nf # Haplotypecaller - - modules/nf-core/manta/germline/main.nf # Manta - - modules/nf-core/bcftools/mpileup/main.nf # MPILEUP/BCFTOOLS - modules/nf-core/samtools/mpileup/main.nf - - modules/nf-core/gatk4/mergevcfs/main.nf - - modules/nf-core/strelka/germline/main.nf - - modules/nf-core/tiddit/sv/main.nf + - modules/nf-core/gatk4/mergevcfs/main.nf # gatk4/mergevcfs + - modules/nf-core/strelka/germline/main.nf # strelka + - modules/nf-core/tiddit/sv/main.nf # tiddit