Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix getpileupsummary should not run if no germline resource is provided #1171

Merged
merged 11 commits into from
Aug 17, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md`
- [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller
- [#1169](https://github.com/nf-core/sarek/pull/1169) - Updating Sentieon-modules. (The conda-check in the Sentieon-modules was moved to the script-section. The version of Sentieon remain unchanged.)
- [#1171](https://github.com/nf-core/sarek/pull/1171) - Fix channel logic for germline resource to skip GetPileupSummary if not provided
- [#1172](https://github.com/nf-core/sarek/pull/1172) - Publish gvcf files when all intervals are processed at once ([#764](https://github.com/nf-core/sarek/issues/764))
- [#1177](https://github.com/nf-core/sarek/pull/1177) - Fix status inference when using nf-validation plugin

Expand Down
17 changes: 4 additions & 13 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -70,21 +70,12 @@ process {
}
}

if (params.joint_mutect2) {
withName: 'MUTECT2_PAIRED' {
ext.args = { params.ignore_soft_clipped_bases ?
"--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" :
"--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" }
}
}
else {
withName: 'MUTECT2_PAIRED'{
//sample name from when the test data was generated
ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
}
withName: '.*:MUTECT2_PAIRED'{
//sample name from when the test data was generated
ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal" }
}

withName: 'FILTERVARIANTTRANCHES'{
withName: '.*:FILTERVARIANTTRANCHES'{
maxulysse marked this conversation as resolved.
Show resolved Hide resolved
ext.args = { "--info-key CNN_1D --indel-tranche 0" }
}
}
Expand Down
6 changes: 3 additions & 3 deletions conf/test/cache.config
Original file line number Diff line number Diff line change
Expand Up @@ -87,12 +87,12 @@ process {
}
}

withName: 'MUTECT2_PAIRED'{
withName: '.*:MUTECT2_PAIRED'{
//sample name from when the test data was generated
ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal" }
}

withName: 'FILTERVARIANTTRANCHES'{
withName: '.*:FILTERVARIANTTRANCHES'{
ext.args = { "--info-key CNN_1D --indel-tranche 0" }
}
}
Expand Down
16 changes: 11 additions & 5 deletions subworkflows/local/bam_variant_calling_somatic_mutect2/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
main:
versions = Channel.empty()

//If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run
germline_resource_pileup = germline_resource_tbi ? germline_resource : Channel.empty()
germline_resource_pileup_tbi = germline_resource_tbi ?: Channel.empty()

Expand Down Expand Up @@ -143,13 +144,18 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
// This is necessary because we generated one normal pileup summary for each patient but we need run calculate contamination for each tumor-normal pair.
pileup_table_tumor = Channel.empty().mix(GATHERPILEUPSUMMARIES_TUMOR.out.table, pileup_table_tumor_branch.no_intervals).map{meta, table -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id:meta.patient], meta.id, table ] }
pileup_table_normal= Channel.empty().mix(GATHERPILEUPSUMMARIES_NORMAL.out.table, pileup_table_normal_branch.no_intervals).map{meta, table -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id:meta.patient], meta.id, table ] }

ch_calculatecontamination_in_tables = pileup_table_tumor.combine(
pileup_table_normal, by:0).map{
meta, tumor_id, tumor_table, normal_id, normal_table -> [ meta + [ id: tumor_id + "_vs_" + normal_id ], tumor_table, normal_table]
}

CALCULATECONTAMINATION(ch_calculatecontamination_in_tables)

// Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided
ch_seg_to_filtermutectcalls = Channel.empty()
ch_cont_to_filtermutectcalls = Channel.empty()

if (joint_mutect2) {
// Reduce the meta to only patient name
ch_seg_to_filtermutectcalls = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], seg]}.groupTuple()
Expand All @@ -163,11 +169,11 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {

// Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables
vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true)
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(ch_seg_to_filtermutectcalls, failOnDuplicate: true, failOnMismatch: true)
.join(ch_cont_to_filtermutectcalls, failOnDuplicate: true, failOnMismatch: true)
.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] }
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(ch_seg_to_filtermutectcalls)
.join(ch_cont_to_filtermutectcalls)
.map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] }

FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
main:
versions = Channel.empty()

//If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run
germline_resource_pileup = germline_resource_tbi ? germline_resource : Channel.empty()
germline_resource_pileup_tbi = germline_resource_tbi ?: Channel.empty()

Expand Down Expand Up @@ -118,6 +119,10 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
// Contamination and segmentation tables created using calculatecontamination on the pileup summary table
CALCULATECONTAMINATION(pileup_table.map{ meta, table -> [ meta, table, [] ] })

// Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided
calculatecontamination_out_seg = Channel.empty()
calculatecontamination_out_cont = Channel.empty()

if (joint_mutect2) {
// Remove sample names and retain patient name as the main identifier
calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('sample') + [id:meta.patient], seg ] }.groupTuple()
Expand All @@ -133,8 +138,8 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true)
.join(stats, failOnDuplicate: true, failOnMismatch: true)
.join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
.join(calculatecontamination_out_seg, failOnDuplicate: true, failOnMismatch: true)
.join(calculatecontamination_out_cont, failOnDuplicate: true, failOnMismatch: true)
.join(calculatecontamination_out_seg)
.join(calculatecontamination_out_cont)
.map{ meta, vcf, tbi, stats, artifactprior, seg, cont -> [ meta, vcf, tbi, stats, artifactprior, seg, cont, [] ] }

FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict)
Expand Down
85 changes: 42 additions & 43 deletions tests/test_tools_manually.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
- variant_calling
files:
- path: results/csv/variantcalled.csv
md5sum: d3c9f0559d48696c54f3c463b1606586
md5sum: f87290ce1c6ea523e08354ed6c258b0b
- path: results/multiqc
- path: results/no_intervals.bed
md5sum: f3dac01ea66b95fe477446fde2d31489
Expand All @@ -53,7 +53,7 @@
- path: results/no_intervals.bed.gz.tbi
md5sum: f3dac01ea66b95fe477446fde2d31489
- path: results/reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.bcftools_stats.txt
md5sum: d83942d74fbbf1c3770b3c04bf622c63
md5sum: 9876607145d11c6b8492264936d7a82c
- path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.FILTER.summary
md5sum: b25d4d2a64f9590d0ffb119fd3adb06e
- path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.TsTv.count
Expand Down Expand Up @@ -91,10 +91,10 @@
- variant_calling
files:
- path: results/csv/variantcalled.csv
md5sum: d3c9f0559d48696c54f3c463b1606586
md5sum: f87290ce1c6ea523e08354ed6c258b0b
- path: results/multiqc
- path: results/reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.bcftools_stats.txt
md5sum: d83942d74fbbf1c3770b3c04bf622c63
md5sum: 9876607145d11c6b8492264936d7a82c
- path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.FILTER.summary
md5sum: b25d4d2a64f9590d0ffb119fd3adb06e
- path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.TsTv.count
Expand Down Expand Up @@ -123,100 +123,99 @@
md5sum: c09dff3f145d77d4848992e244811c08
- path: results/variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.tbi
# binary changes md5sums on reruns
- name: Run joint calling on somatic samples with mutect2
command: nextflow run main.nf -profile test,tools_somatic --input tests/csv/3.0/recalibrated_somatic_joint.csv --tools mutect2 --step variant_calling --joint_mutect2 --outdir results
- name: Run joint calling on tumor only samples with mutect2
command: nextflow run main.nf -profile test_cache,tools_tumoronly --input tests/csv/3.0/recalibrated_tumoronly_joint.csv --tools mutect2 --joint_mutect2 --outdir results
tags:
- mutect2_manual
- manual
- somatic
- tumor_only
- variant_calling
- multi_sample
- joint_somatic
- joint_tumoronly
files:
- path: results/csv/variantcalled.csv
md5sum: 7aa65d5d625ce77cd8cabd58162bd71a
md5sum: f87290ce1c6ea523e08354ed6c258b0b
- path: results/multiqc
- path: results/reports/bcftools/mutect2/test/test.mutect2.filtered.bcftools_stats.txt
md5sum: d75da410d57960944f54d02b2b5cdcac
md5sum: a0cdc26fb7d8c446dd0283fed71a24d5
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.FILTER.summary
md5sum: e0eb3e34fc15f3b452bfc43f032cc8be
md5sum: e1e42b6f65cbdba116cff72a56e40c4b
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.count
md5sum: aa51bde6080c015c6aa6c8254977dd11
md5sum: c00e1639a41deb107099487676a6cf37
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.qual
md5sum: 262f843f68d072c457ca28b56da3ede8
- path: results/variant_calling/mutect2/sample1/sample1.mutect2.pileups.table
md5sum: 16077fdb885a8afe64c7669477471354
md5sum: a21016aa99e5cbf32eeae1b405ca6d8d
- path: results/variant_calling/mutect2/test/sample2.mutect2.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
- path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileups.table
md5sum: 9afe42339f590937166edcf4746c22ec
- path: results/variant_calling/mutect2/test/sample2.mutect2.segmentation.table
md5sum: f4643d9319bde4efbfbe516d6fb13052
- path: results/variant_calling/mutect2/test/sample3.mutect2.contamination.table
md5sum: 11440fe64b5b953d7efb9cf47e330364
- path: results/variant_calling/mutect2/sample3/sample3.mutect2.pileups.table
md5sum: fd0c1f7819717b7f94e52f6611f4b2e0
- path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
- path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.contamination.table
md5sum: 11440fe64b5b953d7efb9cf47e330364
- path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.segmentation.table
md5sum: f4643d9319bde4efbfbe516d6fb13052
- path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.segmentation.table
- path: results/variant_calling/mutect2/test/sample3.mutect2.segmentation.table
md5sum: 38f83e2f98b206640644dd93d5e96f4e
- path: results/variant_calling/mutect2/test/test.mutect2.artifactprior.tar.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.filteringStats.tsv
md5sum: dee72b4c5c9bbda01d44fd3e00f1b404
md5sum: f237666ae325fde0c06b8bc62d2846fc
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.tbi
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.stats
md5sum: 094cb75b0bda28e92b6718ff33d136e2
md5sum: 22e58aef3b14b335fa487d40b590ffeb
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.tbi
# binary changes md5sums on reruns
- name: Run joint calling on tumor only samples with mutect2
command: nextflow run main.nf -profile test,tools_somatic --input tests/csv/3.0/recalibrated_tumoronly_joint.csv --tools mutect2 --step variant_calling --joint_mutect2 --outdir results
- name: Run joint calling on somatic samples with mutect2
command: nextflow run main.nf -profile test_cache,tools_somatic --input tests/csv/3.0/recalibrated_somatic_joint.csv --tools mutect2 --joint_mutect2 --outdir results
tags:
- mutect2_manual
- manual
- tumoronly
- somatic
- variant_calling
- multi_sample
- joint_tumoronly
- joint_somatic
files:
- path: results/csv/variantcalled.csv
md5sum: 7aa65d5d625ce77cd8cabd58162bd71a
md5sum: f87290ce1c6ea523e08354ed6c258b0b
- path: results/multiqc
- path: results/reports/bcftools/mutect2/test/test.mutect2.filtered.bcftools_stats.txt
md5sum: a0cdc26fb7d8c446dd0283fed71a24d5
md5sum: d75da410d57960944f54d02b2b5cdcac
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.FILTER.summary
md5sum: e1e42b6f65cbdba116cff72a56e40c4b
md5sum: e0eb3e34fc15f3b452bfc43f032cc8be
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.count
md5sum: c00e1639a41deb107099487676a6cf37
md5sum: aa51bde6080c015c6aa6c8254977dd11
- path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.qual
md5sum: a21016aa99e5cbf32eeae1b405ca6d8d
- path: results/variant_calling/mutect2/test/sample2.mutect2.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
md5sum: 262f843f68d072c457ca28b56da3ede8
- path: results/variant_calling/mutect2/sample1/sample1.mutect2.pileups.table
md5sum: 16077fdb885a8afe64c7669477471354
- path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileups.table
md5sum: 9afe42339f590937166edcf4746c22ec
- path: results/variant_calling/mutect2/test/sample2.mutect2.segmentation.table
md5sum: f4643d9319bde4efbfbe516d6fb13052
- path: results/variant_calling/mutect2/test/sample3.mutect2.contamination.table
md5sum: 11440fe64b5b953d7efb9cf47e330364
- path: results/variant_calling/mutect2/sample3/sample3.mutect2.pileups.table
md5sum: fd0c1f7819717b7f94e52f6611f4b2e0
- path: results/variant_calling/mutect2/test/sample3.mutect2.segmentation.table
- path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.contamination.table
md5sum: 46c708c943b453da89a3da08acfdb2a7
- path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.contamination.table
md5sum: 11440fe64b5b953d7efb9cf47e330364
- path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.segmentation.table
md5sum: f4643d9319bde4efbfbe516d6fb13052
- path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.segmentation.table
md5sum: 38f83e2f98b206640644dd93d5e96f4e
- path: results/variant_calling/mutect2/test/test.mutect2.artifactprior.tar.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.filteringStats.tsv
md5sum: f237666ae325fde0c06b8bc62d2846fc
md5sum: dee72b4c5c9bbda01d44fd3e00f1b404
- path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.tbi
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz
# binary changes md5sums on reruns
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.stats
md5sum: 22e58aef3b14b335fa487d40b590ffeb
md5sum: 094cb75b0bda28e92b6718ff33d136e2
- path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.tbi
# binary changes md5sums on reruns
- name: Run full pipeline on tumoronly with most tools
Expand Down
Loading
Loading