diff --git a/docs/hello_nextflow/04_hello_genomics.md b/docs/hello_nextflow/04_hello_genomics.md index 61bc3054f8..59be62228e 100644 --- a/docs/hello_nextflow/04_hello_genomics.md +++ b/docs/hello_nextflow/04_hello_genomics.md @@ -309,8 +309,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.vcf" - path "${input_bam}.vcf.idx" + path "${input_bam}.vcf" , emit: vcf + path "${input_bam}.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ @@ -322,6 +322,8 @@ process GATK_HAPLOTYPECALLER { } ``` +You'll notice that we've introduced some new syntax here (`emit:`) to uniquely name each of our output channels, and the reasons for this will become clear soon. + This command takes quite a few more inputs, because GATK needs more information to perform the analysis compared to a simple indexing job. But you'll note that there are even more inputs defined in the inputs block than are listed in the GATK command. Why is that? diff --git a/docs/hello_nextflow/05_hello_operators.md b/docs/hello_nextflow/05_hello_operators.md index 7b3bf9564e..4e1d889ed7 100644 --- a/docs/hello_nextflow/05_hello_operators.md +++ b/docs/hello_nextflow/05_hello_operators.md @@ -331,16 +331,16 @@ _Before:_ ```groovy title="hello-operators.nf" linenums="52" output: - path "${input_bam}.vcf" - path "${input_bam}.vcf.idx" + path "${input_bam}.vcf" , emit: vcf + path "${input_bam}.vcf.idx" , emit: idx ``` _After:_ ```groovy title="hello-operators.nf" linenums="52" output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx ``` ### 1.3. Run the pipeline again @@ -454,14 +454,14 @@ Good news: we can do that using the `collect()` channel operator. Let's add the ```groovy title="hello-operators.nf" linenums="118" // Collect variant calling outputs across samples -all_gvcfs_ch = GATK_HAPLOTYPECALLER.out[0].collect() -all_idxs_ch = GATK_HAPLOTYPECALLER.out[1].collect() +all_gvcfs_ch = GATK_HAPLOTYPECALLER.out.vcf.collect() +all_idxs_ch = GATK_HAPLOTYPECALLER.out.idx.collect() ``` Does that seem a bit complicated? Let's break this down and translate it into plain language. 1. We're taking the output channel from the `GATK_HAPLOTYPECALLER` process, referred to using the `.out` property. -2. Each 'element' coming out of the channel is a pair of files: the GVCF and its index file, in that order because that's the order they're listed in the process output block. Conveniently, we can pick out the GVCFs on one hand by adding `[0]` and the index files on the other by adding `[1]` after the `.out` property. +2. Each 'element' coming out of the channel is a pair of files: the GVCF and its index file, in that order because that's the order they're listed in the process output block. Conveniently, because in the last session we named the outputs of this process (using `emit:`), we can pick out the GVCFs on one hand by adding `.vcf` and the index files on the other by adding `.idx` after the `.out` property. If we had not named those outputs, we would have had to refer to them by `.out[0]` and `.out[1]`, respectively. 3. We append the `collect()` channel operator to bundle all the GVCF files together into a single element in a new channel called `all_gvcfs_ch`, and do the same with the index files to form the new channel called `all_idxs_ch`. !!!tip @@ -773,8 +773,8 @@ _After:_ ```groovy title="hello-operators.nf" linenums="87" output: - path "${cohort_name}.joint.vcf" - path "${cohort_name}.joint.vcf.idx" + path "${cohort_name}.joint.vcf" , emit: vcf + path "${cohort_name}.joint.vcf.idx" , emit: idx ``` We're almost done! diff --git a/docs/hello_nextflow/07_hello_modules.md b/docs/hello_nextflow/07_hello_modules.md index 3ec20e5268..3fb91b6076 100644 --- a/docs/hello_nextflow/07_hello_modules.md +++ b/docs/hello_nextflow/07_hello_modules.md @@ -292,8 +292,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ @@ -331,8 +331,8 @@ process GATK_JOINTGENOTYPING { path ref_dict output: - path "${cohort_name}.joint.vcf" - path "${cohort_name}.joint.vcf.idx" + path "${cohort_name}.joint.vcf" , emit: vcf + path "${cohort_name}.joint.vcf.idx" , emit: idx script: def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') diff --git a/hello-nextflow/hello-config/main.nf b/hello-nextflow/hello-config/main.nf index a5f7f8ccaa..1956a5392a 100644 --- a/hello-nextflow/hello-config/main.nf +++ b/hello-nextflow/hello-config/main.nf @@ -53,8 +53,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/hello-modules/main.nf b/hello-nextflow/hello-modules/main.nf index 90ef4e8f61..5570b532f1 100644 --- a/hello-nextflow/hello-modules/main.nf +++ b/hello-nextflow/hello-modules/main.nf @@ -39,8 +39,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf b/hello-nextflow/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf index 836dee6dca..9475cd33b8 100644 --- a/hello-nextflow/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf +++ b/hello-nextflow/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf @@ -18,8 +18,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf b/hello-nextflow/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf index d0b5429af4..f0d4e8502e 100644 --- a/hello-nextflow/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf +++ b/hello-nextflow/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf @@ -18,8 +18,8 @@ process GATK_JOINTGENOTYPING { path ref_dict output: - path "${cohort_name}.joint.vcf" - path "${cohort_name}.joint.vcf.idx" + path "${cohort_name}.joint.vcf" , emit: vcf + path "${cohort_name}.joint.vcf.idx" , emit: idx script: def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') diff --git a/hello-nextflow/hello-operators.nf b/hello-nextflow/hello-operators.nf index d9b3e7e7dd..646b7d4ebb 100644 --- a/hello-nextflow/hello-operators.nf +++ b/hello-nextflow/hello-operators.nf @@ -50,8 +50,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.vcf" - path "${input_bam}.vcf.idx" + path "${input_bam}.vcf" , emit: vcf + path "${input_bam}.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/solutions/hello-config/final-main.nf b/hello-nextflow/solutions/hello-config/final-main.nf index 90ef4e8f61..5570b532f1 100644 --- a/hello-nextflow/solutions/hello-config/final-main.nf +++ b/hello-nextflow/solutions/hello-config/final-main.nf @@ -39,8 +39,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-2.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-2.nf index 405ca033b6..c668b742e1 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-2.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-2.nf @@ -51,8 +51,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.vcf" - path "${input_bam}.vcf.idx" + path "${input_bam}.vcf" , emit: vcf + path "${input_bam}.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-3.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-3.nf index dc8e5eb467..5f42ec999e 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-3.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-3.nf @@ -54,8 +54,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.vcf" - path "${input_bam}.vcf.idx" + path "${input_bam}.vcf" , emit: vcf + path "${input_bam}.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf index d9b3e7e7dd..099a89003e 100644 --- a/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf +++ b/hello-nextflow/solutions/hello-genomics/hello-genomics-4.nf @@ -50,8 +50,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.vcf" - path "${input_bam}.vcf.idx" + path "${input_bam}.vcf" , emit: vcf + path "${input_bam}.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/solutions/hello-modules/modules/local/gatk/GATK_HAPLOTYPECALLER/main.nf b/hello-nextflow/solutions/hello-modules/modules/local/gatk/GATK_HAPLOTYPECALLER/main.nf index 836dee6dca..9475cd33b8 100644 --- a/hello-nextflow/solutions/hello-modules/modules/local/gatk/GATK_HAPLOTYPECALLER/main.nf +++ b/hello-nextflow/solutions/hello-modules/modules/local/gatk/GATK_HAPLOTYPECALLER/main.nf @@ -18,8 +18,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/solutions/hello-modules/modules/local/gatk/GATK_JOINTGENOTYPING/main.nf b/hello-nextflow/solutions/hello-modules/modules/local/gatk/GATK_JOINTGENOTYPING/main.nf index d0b5429af4..a8babd379f 100644 --- a/hello-nextflow/solutions/hello-modules/modules/local/gatk/GATK_JOINTGENOTYPING/main.nf +++ b/hello-nextflow/solutions/hello-modules/modules/local/gatk/GATK_JOINTGENOTYPING/main.nf @@ -18,8 +18,8 @@ process GATK_JOINTGENOTYPING { path ref_dict output: - path "${cohort_name}.joint.vcf" - path "${cohort_name}.joint.vcf.idx" + path "${cohort_name}.joint.vcf" , emit: vcf + path "${cohort_name}.joint.vcf.idx", emit: idx script: def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') diff --git a/hello-nextflow/solutions/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf b/hello-nextflow/solutions/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf index 836dee6dca..6d57951254 100644 --- a/hello-nextflow/solutions/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf +++ b/hello-nextflow/solutions/hello-nf-test/modules/local/gatk/haplotypecaller/main.nf @@ -18,8 +18,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/solutions/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf b/hello-nextflow/solutions/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf index d0b5429af4..a8babd379f 100644 --- a/hello-nextflow/solutions/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf +++ b/hello-nextflow/solutions/hello-nf-test/modules/local/gatk/jointgenotyping/main.nf @@ -18,8 +18,8 @@ process GATK_JOINTGENOTYPING { path ref_dict output: - path "${cohort_name}.joint.vcf" - path "${cohort_name}.joint.vcf.idx" + path "${cohort_name}.joint.vcf" , emit: vcf + path "${cohort_name}.joint.vcf.idx", emit: idx script: def gvcfs_line = all_gvcfs.collect { gvcf -> "-V ${gvcf}" }.join(' ') diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf index 928c4ac9d6..c03c2480cd 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-1.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-1.nf @@ -50,8 +50,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf index 786d192ab7..4e4aa47ede 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-2.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-2.nf @@ -53,8 +53,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \ diff --git a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf index a5f7f8ccaa..6d114bb01e 100644 --- a/hello-nextflow/solutions/hello-operators/hello-operators-3.nf +++ b/hello-nextflow/solutions/hello-operators/hello-operators-3.nf @@ -53,8 +53,8 @@ process GATK_HAPLOTYPECALLER { path interval_list output: - path "${input_bam}.g.vcf" - path "${input_bam}.g.vcf.idx" + path "${input_bam}.g.vcf" , emit: vcf + path "${input_bam}.g.vcf.idx" , emit: idx """ gatk HaplotypeCaller \