nf-core · FriederikeHanssen · Aug 17, 2023 · Aug 4, 2023 · Aug 4, 2023 · Aug 4, 2023
@@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - [#1155](https://github.com/nf-core/sarek/pull/1155) - Restore proper rendering in `usage.md`
 - [#1163](https://github.com/nf-core/sarek/pull/1163) - Correcting location of output folder for joint variant calling with GATK's haplotypecaller
 - [#1169](https://github.com/nf-core/sarek/pull/1169) - Updating Sentieon-modules. (The conda-check in the Sentieon-modules was moved to the script-section. The version of Sentieon remain unchanged.)
+- [#1171](https://github.com/nf-core/sarek/pull/1171) - Fix channel logic for germline resource to skip GetPileupSummary if not provided
 - [#1172](https://github.com/nf-core/sarek/pull/1172) - Publish gvcf files when all intervals are processed at once ([#764](https://github.com/nf-core/sarek/issues/764))
 - [#1177](https://github.com/nf-core/sarek/pull/1177) - Fix status inference when using nf-validation plugin
 

@@ -70,21 +70,12 @@ process {
         }
     }
 
-    if (params.joint_mutect2) {
-        withName: 'MUTECT2_PAIRED' {
-            ext.args         = { params.ignore_soft_clipped_bases ?
-                                    "--dont-use-soft-clipped-bases true --f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" :
-                                    "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample ${meta.normal_id}" }
-        }
-    }
-    else {
-        withName: 'MUTECT2_PAIRED'{
-            //sample name from when the test data was generated
-            ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
-        }
+    withName: '.*:MUTECT2_PAIRED'{
+        //sample name from when the test data was generated
+        ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal" }
     }
 
-    withName: 'FILTERVARIANTTRANCHES'{
+    withName: '.*:FILTERVARIANTTRANCHES'{
         ext.args = { "--info-key CNN_1D --indel-tranche 0" }
     }
 }

@@ -87,12 +87,12 @@ process {
         }
     }
 
-    withName: 'MUTECT2_PAIRED'{
+    withName: '.*:MUTECT2_PAIRED'{
         //sample name from when the test data was generated
-        ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal " }
+        ext.args = { "--f1r2-tar-gz ${task.ext.prefix}.f1r2.tar.gz --normal-sample normal" }
     }
 
-    withName: 'FILTERVARIANTTRANCHES'{
+    withName: '.*:FILTERVARIANTTRANCHES'{
         ext.args = { "--info-key CNN_1D --indel-tranche 0" }
     }
 }

@@ -30,6 +30,7 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
     main:
     versions = Channel.empty()
 
+    //If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run
     germline_resource_pileup     = germline_resource_tbi ? germline_resource : Channel.empty()
     germline_resource_pileup_tbi = germline_resource_tbi ?: Channel.empty()
 
@@ -143,13 +144,18 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
     // This is necessary because we generated one normal pileup summary for each patient but we need run calculate contamination for each tumor-normal pair.
     pileup_table_tumor = Channel.empty().mix(GATHERPILEUPSUMMARIES_TUMOR.out.table, pileup_table_tumor_branch.no_intervals).map{meta, table -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id:meta.patient], meta.id, table ] }
     pileup_table_normal= Channel.empty().mix(GATHERPILEUPSUMMARIES_NORMAL.out.table, pileup_table_normal_branch.no_intervals).map{meta, table -> [ meta - meta.subMap('normal_id', 'tumor_id', 'num_intervals') + [id:meta.patient], meta.id, table ] }
+
     ch_calculatecontamination_in_tables = pileup_table_tumor.combine(
         pileup_table_normal, by:0).map{
         meta, tumor_id, tumor_table, normal_id, normal_table -> [ meta + [ id: tumor_id + "_vs_" + normal_id ], tumor_table, normal_table]
         }
 
     CALCULATECONTAMINATION(ch_calculatecontamination_in_tables)
 
+    // Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided
+    ch_seg_to_filtermutectcalls = Channel.empty()
+    ch_cont_to_filtermutectcalls = Channel.empty()
+
     if (joint_mutect2) {
         // Reduce the meta to only patient name
         ch_seg_to_filtermutectcalls = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('tumor_id') + [id: meta.patient], seg]}.groupTuple()
@@ -163,11 +169,11 @@ workflow BAM_VARIANT_CALLING_SOMATIC_MUTECT2 {
 
     // Mutect2 calls filtered by filtermutectcalls using the artifactpriors, contamination and segmentation tables
     vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true)
-        .join(stats, failOnDuplicate: true, failOnMismatch: true)
-        .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
-        .join(ch_seg_to_filtermutectcalls, failOnDuplicate: true, failOnMismatch: true)
-        .join(ch_cont_to_filtermutectcalls, failOnDuplicate: true, failOnMismatch: true)
-        .map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] }
+                        .join(stats, failOnDuplicate: true, failOnMismatch: true)
+                        .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
+                        .join(ch_seg_to_filtermutectcalls)
+                        .join(ch_cont_to_filtermutectcalls)
+                    .map{ meta, vcf, tbi, stats, orientation, seg, cont -> [ meta, vcf, tbi, stats, orientation, seg, cont, [] ] }
 
     FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict)
 

@@ -29,6 +29,7 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
     main:
     versions = Channel.empty()
 
+    //If no germline resource is provided, then create an empty channel to avoid GetPileupsummaries from being run
     germline_resource_pileup     = germline_resource_tbi ? germline_resource : Channel.empty()
     germline_resource_pileup_tbi = germline_resource_tbi ?: Channel.empty()
 
@@ -118,6 +119,10 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
     // Contamination and segmentation tables created using calculatecontamination on the pileup summary table
     CALCULATECONTAMINATION(pileup_table.map{ meta, table -> [ meta, table, [] ] })
 
+    // Initialize empty channel: Contamination calculation is run on pileup table, pileup is not run if germline resource is not provided
+    calculatecontamination_out_seg = Channel.empty()
+    calculatecontamination_out_cont = Channel.empty()
+
     if (joint_mutect2) {
         // Remove sample names and retain patient name as the main identifier
         calculatecontamination_out_seg = CALCULATECONTAMINATION.out.segmentation.map{ meta, seg -> [ meta - meta.subMap('sample') + [id:meta.patient], seg ] }.groupTuple()
@@ -133,8 +138,8 @@ workflow BAM_VARIANT_CALLING_TUMOR_ONLY_MUTECT2 {
     vcf_to_filter = vcf.join(tbi, failOnDuplicate: true, failOnMismatch: true)
         .join(stats, failOnDuplicate: true, failOnMismatch: true)
         .join(LEARNREADORIENTATIONMODEL.out.artifactprior, failOnDuplicate: true, failOnMismatch: true)
-        .join(calculatecontamination_out_seg, failOnDuplicate: true, failOnMismatch: true)
-        .join(calculatecontamination_out_cont, failOnDuplicate: true, failOnMismatch: true)
+        .join(calculatecontamination_out_seg)
+        .join(calculatecontamination_out_cont)
         .map{ meta, vcf, tbi, stats, artifactprior, seg, cont -> [ meta, vcf, tbi, stats, artifactprior, seg, cont, [] ] }
 
     FILTERMUTECTCALLS(vcf_to_filter, fasta, fai, dict)

@@ -44,7 +44,7 @@
     - variant_calling
   files:
     - path: results/csv/variantcalled.csv
-      md5sum: d3c9f0559d48696c54f3c463b1606586
+      md5sum: f87290ce1c6ea523e08354ed6c258b0b
     - path: results/multiqc
     - path: results/no_intervals.bed
       md5sum: f3dac01ea66b95fe477446fde2d31489
@@ -53,7 +53,7 @@
     - path: results/no_intervals.bed.gz.tbi
       md5sum: f3dac01ea66b95fe477446fde2d31489
     - path: results/reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.bcftools_stats.txt
-      md5sum: d83942d74fbbf1c3770b3c04bf622c63
+      md5sum: 9876607145d11c6b8492264936d7a82c
     - path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.FILTER.summary
       md5sum: b25d4d2a64f9590d0ffb119fd3adb06e
     - path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.TsTv.count
@@ -91,10 +91,10 @@
     - variant_calling
   files:
     - path: results/csv/variantcalled.csv
-      md5sum: d3c9f0559d48696c54f3c463b1606586
+      md5sum: f87290ce1c6ea523e08354ed6c258b0b
     - path: results/multiqc
     - path: results/reports/bcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.bcftools_stats.txt
-      md5sum: d83942d74fbbf1c3770b3c04bf622c63
+      md5sum: 9876607145d11c6b8492264936d7a82c
     - path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.FILTER.summary
       md5sum: b25d4d2a64f9590d0ffb119fd3adb06e
     - path: results/reports/vcftools/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.filtered.TsTv.count
@@ -123,100 +123,99 @@
       md5sum: c09dff3f145d77d4848992e244811c08
     - path: results/variant_calling/mutect2/sample4_vs_sample3/sample4_vs_sample3.mutect2.vcf.gz.tbi
     # binary changes md5sums on reruns
-- name: Run joint calling on somatic samples with mutect2
-  command: nextflow run main.nf -profile test,tools_somatic --input tests/csv/3.0/recalibrated_somatic_joint.csv --tools mutect2 --step variant_calling --joint_mutect2 --outdir results
+- name: Run joint calling on tumor only samples with mutect2
+  command: nextflow run main.nf -profile test_cache,tools_tumoronly --input tests/csv/3.0/recalibrated_tumoronly_joint.csv --tools mutect2 --joint_mutect2 --outdir results
   tags:
     - mutect2_manual
     - manual
-    - somatic
+    - tumor_only
     - variant_calling
     - multi_sample
-    - joint_somatic
+    - joint_tumoronly
   files:
     - path: results/csv/variantcalled.csv
-      md5sum: 7aa65d5d625ce77cd8cabd58162bd71a
+      md5sum: f87290ce1c6ea523e08354ed6c258b0b
     - path: results/multiqc
     - path: results/reports/bcftools/mutect2/test/test.mutect2.filtered.bcftools_stats.txt
-      md5sum: d75da410d57960944f54d02b2b5cdcac
+      md5sum: a0cdc26fb7d8c446dd0283fed71a24d5
     - path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.FILTER.summary
-      md5sum: e0eb3e34fc15f3b452bfc43f032cc8be
+      md5sum: e1e42b6f65cbdba116cff72a56e40c4b
     - path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.count
-      md5sum: aa51bde6080c015c6aa6c8254977dd11
+      md5sum: c00e1639a41deb107099487676a6cf37
     - path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.qual
-      md5sum: 262f843f68d072c457ca28b56da3ede8
-    - path: results/variant_calling/mutect2/sample1/sample1.mutect2.pileups.table
-      md5sum: 16077fdb885a8afe64c7669477471354
+      md5sum: a21016aa99e5cbf32eeae1b405ca6d8d
+    - path: results/variant_calling/mutect2/test/sample2.mutect2.contamination.table
+      md5sum: 46c708c943b453da89a3da08acfdb2a7
     - path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileups.table
       md5sum: 9afe42339f590937166edcf4746c22ec
+    - path: results/variant_calling/mutect2/test/sample2.mutect2.segmentation.table
+      md5sum: f4643d9319bde4efbfbe516d6fb13052
+    - path: results/variant_calling/mutect2/test/sample3.mutect2.contamination.table
+      md5sum: 11440fe64b5b953d7efb9cf47e330364
     - path: results/variant_calling/mutect2/sample3/sample3.mutect2.pileups.table
       md5sum: fd0c1f7819717b7f94e52f6611f4b2e0
-    - path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.contamination.table
-      md5sum: 46c708c943b453da89a3da08acfdb2a7
-    - path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.contamination.table
-      md5sum: 11440fe64b5b953d7efb9cf47e330364
-    - path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.segmentation.table
-      md5sum: f4643d9319bde4efbfbe516d6fb13052
-    - path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.segmentation.table
+    - path: results/variant_calling/mutect2/test/sample3.mutect2.segmentation.table
       md5sum: 38f83e2f98b206640644dd93d5e96f4e
     - path: results/variant_calling/mutect2/test/test.mutect2.artifactprior.tar.gz
     # binary changes md5sums on reruns
     - path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz
     # binary changes md5sums on reruns
     - path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.filteringStats.tsv
-      md5sum: dee72b4c5c9bbda01d44fd3e00f1b404
+      md5sum: f237666ae325fde0c06b8bc62d2846fc
     - path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.tbi
     # binary changes md5sums on reruns
     - path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz
     # binary changes md5sums on reruns
     - path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.stats
-      md5sum: 094cb75b0bda28e92b6718ff33d136e2
+      md5sum: 22e58aef3b14b335fa487d40b590ffeb
     - path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.tbi
     # binary changes md5sums on reruns
-- name: Run joint calling on tumor only samples with mutect2
-  command: nextflow run main.nf -profile test,tools_somatic --input tests/csv/3.0/recalibrated_tumoronly_joint.csv --tools mutect2 --step variant_calling --joint_mutect2 --outdir results
+- name: Run joint calling on somatic samples with mutect2
+  command: nextflow run main.nf -profile test_cache,tools_somatic --input tests/csv/3.0/recalibrated_somatic_joint.csv --tools mutect2 --joint_mutect2 --outdir results
   tags:
     - mutect2_manual
-    - manual
-    - tumoronly
+    - somatic
     - variant_calling
     - multi_sample
-    - joint_tumoronly
+    - joint_somatic
   files:
     - path: results/csv/variantcalled.csv
-      md5sum: 7aa65d5d625ce77cd8cabd58162bd71a
+      md5sum: f87290ce1c6ea523e08354ed6c258b0b
     - path: results/multiqc
     - path: results/reports/bcftools/mutect2/test/test.mutect2.filtered.bcftools_stats.txt
-      md5sum: a0cdc26fb7d8c446dd0283fed71a24d5
+      md5sum: d75da410d57960944f54d02b2b5cdcac
     - path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.FILTER.summary
-      md5sum: e1e42b6f65cbdba116cff72a56e40c4b
+      md5sum: e0eb3e34fc15f3b452bfc43f032cc8be
     - path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.count
-      md5sum: c00e1639a41deb107099487676a6cf37
+      md5sum: aa51bde6080c015c6aa6c8254977dd11
     - path: results/reports/vcftools/mutect2/test/test.mutect2.filtered.TsTv.qual
-      md5sum: a21016aa99e5cbf32eeae1b405ca6d8d
-    - path: results/variant_calling/mutect2/test/sample2.mutect2.contamination.table
-      md5sum: 46c708c943b453da89a3da08acfdb2a7
+      md5sum: 262f843f68d072c457ca28b56da3ede8
+    - path: results/variant_calling/mutect2/sample1/sample1.mutect2.pileups.table
+      md5sum: 16077fdb885a8afe64c7669477471354
     - path: results/variant_calling/mutect2/sample2/sample2.mutect2.pileups.table
       md5sum: 9afe42339f590937166edcf4746c22ec
-    - path: results/variant_calling/mutect2/test/sample2.mutect2.segmentation.table
-      md5sum: f4643d9319bde4efbfbe516d6fb13052
-    - path: results/variant_calling/mutect2/test/sample3.mutect2.contamination.table
-      md5sum: 11440fe64b5b953d7efb9cf47e330364
     - path: results/variant_calling/mutect2/sample3/sample3.mutect2.pileups.table
       md5sum: fd0c1f7819717b7f94e52f6611f4b2e0
-    - path: results/variant_calling/mutect2/test/sample3.mutect2.segmentation.table
+    - path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.contamination.table
+      md5sum: 46c708c943b453da89a3da08acfdb2a7
+    - path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.contamination.table
+      md5sum: 11440fe64b5b953d7efb9cf47e330364
+    - path: results/variant_calling/mutect2/test/sample2_vs_sample1.mutect2.segmentation.table
+      md5sum: f4643d9319bde4efbfbe516d6fb13052
+    - path: results/variant_calling/mutect2/test/sample3_vs_sample1.mutect2.segmentation.table
       md5sum: 38f83e2f98b206640644dd93d5e96f4e
     - path: results/variant_calling/mutect2/test/test.mutect2.artifactprior.tar.gz
     # binary changes md5sums on reruns
     - path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz
     # binary changes md5sums on reruns
     - path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.filteringStats.tsv
-      md5sum: f237666ae325fde0c06b8bc62d2846fc
+      md5sum: dee72b4c5c9bbda01d44fd3e00f1b404
     - path: results/variant_calling/mutect2/test/test.mutect2.filtered.vcf.gz.tbi
     # binary changes md5sums on reruns
     - path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz
     # binary changes md5sums on reruns
     - path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.stats
-      md5sum: 22e58aef3b14b335fa487d40b590ffeb
+      md5sum: 094cb75b0bda28e92b6718ff33d136e2
     - path: results/variant_calling/mutect2/test/test.mutect2.vcf.gz.tbi
     # binary changes md5sums on reruns
 - name: Run full pipeline on tumoronly with most tools