From 11d0f9e9c2d3bd587ec830eeb044e92d8d89c6fa Mon Sep 17 00:00:00 2001 From: Marcel Ribeiro-Dantas Date: Tue, 21 May 2024 15:16:33 -0400 Subject: [PATCH 1/4] Fix #336 baseDir -> projectDir, from -> of Signed-off-by: Marcel Ribeiro-Dantas --- docs/basic_training/processes.md | 10 +++--- docs/basic_training/processes.pt.md | 12 +++---- docs/hands_on/04_implementation.md | 10 +++--- docs/hello_nextflow/03_hello_gatk.md | 36 +++++++++---------- docs/hello_nextflow/04_hello_modules.md | 14 ++++---- docs/hello_nextflow/05_hello_nf-test.md | 46 ++++++++++++------------- docs/troubleshoot/06_exercise.md | 12 +++---- hands-on/final_main.nf | 8 ++--- hello-nextflow/hello-modules.nf | 14 ++++---- hello-nextflow/scripts/hello-gatk-1.nf | 10 +++--- hello-nextflow/scripts/hello-gatk-2.nf | 18 +++++----- hello-nextflow/scripts/hello-gatk-3.nf | 22 ++++++------ hello-nextflow/scripts/hello-gatk-4.nf | 14 ++++---- hello-nextflow/scripts/hello-gatk-5.nf | 14 ++++---- hello-nextflow/scripts/hello-gatk-6.nf | 14 ++++---- hello-nextflow/scripts/nextflow.config | 14 ++++---- mkdocs.yml | 3 -- troubleshoot/exercise1/hello-gatk.nf | 14 ++++---- troubleshoot/exercise2/hello-gatk.nf | 14 ++++---- troubleshoot/exercise3/hello-gatk.nf | 14 ++++---- troubleshoot/exercise4/hello-gatk.nf | 14 ++++---- troubleshoot/exercise5/hello-gatk.nf | 14 ++++---- troubleshoot/exercise6/hello-gatk.nf | 14 ++++---- troubleshoot/hello-gatk.nf | 14 ++++---- 24 files changed, 183 insertions(+), 186 deletions(-) diff --git a/docs/basic_training/processes.md b/docs/basic_training/processes.md index 97deddd2..11251328 100644 --- a/docs/basic_training/processes.md +++ b/docs/basic_training/processes.md @@ -223,7 +223,7 @@ The process script can also be defined in a completely dynamic manner using an ` ```groovy linenums="1" title="snippet.nf" params.compress = 'gzip' -params.file2compress = "$baseDir/data/ggal/transcriptome.fa" +params.file2compress = "$projectDir/data/ggal/transcriptome.fa" process FOO { debug true @@ -551,8 +551,8 @@ As `ch2` is now a _value_ channel, it can be consumed multiple times and do not One possible solution is shown below: ```groovy linenums="1" title="snippet.nf" - params.reads = "$baseDir/data/ggal/*_1.fq" - params.transcriptome_file = "$baseDir/data/ggal/transcriptome.fa" + params.reads = "$projectDir/data/ggal/*_1.fq" + params.transcriptome_file = "$projectDir/data/ggal/transcriptome.fa" Channel .fromPath(params.reads) @@ -583,7 +583,7 @@ As `ch2` is now a _value_ channel, it can be consumed multiple times and do not The `each` qualifier allows you to repeat the execution of a process for each item in a collection every time new data is received. For example: ```groovy linenums="1" title="snippet.nf" -sequences = Channel.fromPath("$baseDir/data/ggal/*_1.fq") +sequences = Channel.fromPath("$projectDir/data/ggal/*_1.fq") methods = ['regular', 'espresso'] process ALIGNSEQUENCES { @@ -624,7 +624,7 @@ In the above example, every time a file of sequences is received as an input by Modify the methods list and add another coffee type: ```groovy linenums="1" title="snippet.nf" - sequences = Channel.fromPath("$baseDir/data/ggal/*_1.fq") + sequences = Channel.fromPath("$projectDir/data/ggal/*_1.fq") methods = ['regular', 'espresso', 'cappuccino'] process ALIGNSEQUENCES { diff --git a/docs/basic_training/processes.pt.md b/docs/basic_training/processes.pt.md index f41a18e4..304257bc 100644 --- a/docs/basic_training/processes.pt.md +++ b/docs/basic_training/processes.pt.md @@ -188,7 +188,7 @@ O script do processo também pode ser definido de maneira completamente dinâmic ```groovy linenums="1" params.compressor = 'gzip' -params.arquivo_a_comprimir = "$baseDir/data/ggal/transcriptome.fa" +params.arquivo_a_comprimir = "$projectDir/data/ggal/transcriptome.fa" process FOO { input: @@ -347,7 +347,7 @@ workflow { ??? solution ```groovy linenums="1" - params.leituras = "$baseDir/data/ggal/*_1.fq" + params.leituras = "$projectDir/data/ggal/*_1.fq" Channel .fromPath(params.leituras) @@ -484,8 +484,8 @@ Isso ocorre porque os canais de valor podem ser consumidos várias vezes e não ??? solution ```groovy linenums="1" - params.leituras = "$baseDir/data/ggal/*_1.fq" - params.arquivo_transcriptoma = "$baseDir/data/ggal/transcriptome.fa" + params.leituras = "$projectDir/data/ggal/*_1.fq" + params.arquivo_transcriptoma = "$projectDir/data/ggal/transcriptome.fa" Channel .fromPath(params.leituras) @@ -548,8 +548,8 @@ No exemplo acima, toda vez que um arquivo de sequências é recebido como entrad ??? solution ```groovy linenums="1" - params.leituras = "$baseDir/data/ggal/*_1.fq" - params.arquivo_transcriptoma = "$baseDir/data/ggal/transcriptome.fa" + params.leituras = "$projectDir/data/ggal/*_1.fq" + params.arquivo_transcriptoma = "$projectDir/data/ggal/transcriptome.fa" metodos= ['salmon', 'kallisto'] Channel diff --git a/docs/hands_on/04_implementation.md b/docs/hands_on/04_implementation.md index d26db870..2f4b7a9b 100644 --- a/docs/hands_on/04_implementation.md +++ b/docs/hands_on/04_implementation.md @@ -35,15 +35,15 @@ Edit this file to specify the input files as script parameters. Using this notat * Define the default parameters (1) */ -params.genome = "${baseDir}/data/genome.fa" // (2)! -params.variants = "${baseDir}/data/known_variants.vcf.gz" -params.blacklist = "${baseDir}/data/blacklist.bed" -params.reads = "${baseDir}/data/reads/ENCSR000COQ1_{1,2}.fastq.gz" // (3)! +params.genome = "${projectDir}/data/genome.fa" // (2)! +params.variants = "${projectDir}/data/known_variants.vcf.gz" +params.blacklist = "${projectDir}/data/blacklist.bed" +params.reads = "${projectDir}/data/reads/ENCSR000COQ1_{1,2}.fastq.gz" // (3)! params.results = "results" // (4)! ``` 1. The `/*`, `*` and `*/` specify comment lines which are ignored by Nextflow. -2. The `baseDir` variable represents the main script path location. +2. The `projectDir` variable represents the main script path location. 3. The `reads` parameter uses a glob pattern to specify the forward (`ENCSR000COQ1_1.fq.gz`) and reverse (`ENCSR000COQ1_2.fq.gz`) reads (paired-end) of a sample. 4. The `results` parameter is used to specify a directory called `results`. diff --git a/docs/hello_nextflow/03_hello_gatk.md b/docs/hello_nextflow/03_hello_gatk.md index 956e3a39..19713cc6 100644 --- a/docs/hello_nextflow/03_hello_gatk.md +++ b/docs/hello_nextflow/03_hello_gatk.md @@ -143,11 +143,11 @@ process SAMTOOLS_INDEX { */ // Execution environment setup -params.baseDir = "/workspace/gitpod/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/hello-nextflow" +$projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/bam/reads_mother.bam" +params.reads_bam = "${projectDir}/data/bam/reads_mother.bam" ``` #### 1.3. Add workflow block to run SAMTOOLS_INDEX @@ -156,7 +156,7 @@ params.reads_bam = "${baseDir}/data/bam/reads_mother.bam" workflow { // Create input channel (single file via CLI parameter) - reads_ch = Channel.from(params.reads_bam) + reads_ch = Channel.of(params.reads_bam) // Create index file for input BAM file SAMTOOLS_INDEX(reads_ch) @@ -227,10 +227,10 @@ process GATK_HAPLOTYPECALLER { ```groovy title="hello-gatk.nf" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" ``` #### 2.3. Add a call to the workflow block to run GATK_HAPLOTYPECALLER @@ -290,9 +290,9 @@ Make the workflow handle multiple samples in bulk. ```groovy title="hello-gatk.nf" // Primary input params.reads_bam = [ - "${baseDir}/data/bam/reads_mother.bam", - "${baseDir}/data/bam/reads_father.bam", - "${baseDir}/data/bam/reads_son.bam" + "${projectDir}/data/bam/reads_mother.bam", + "${projectDir}/data/bam/reads_father.bam", + "${projectDir}/data/bam/reads_son.bam" ] ``` @@ -409,9 +409,9 @@ _Before:_ ```groovy title="hello-gatk.nf" // Primary input params.reads_bam = [ - "${baseDir}/data/bam/reads_mother.bam", - "${baseDir}/data/bam/reads_father.bam", - "${baseDir}/data/bam/reads_son.bam" + "${projectDir}/data/bam/reads_mother.bam", + "${projectDir}/data/bam/reads_father.bam", + "${projectDir}/data/bam/reads_son.bam" ] ``` @@ -419,7 +419,7 @@ _After:_ ```groovy title="hello-gatk.nf" // Primary input (list of input files, one per line) -params.reads_bam = "${baseDir}/data/sample_bams.txt" +params.reads_bam = "${projectDir}/data/sample_bams.txt" ``` #### 4.3. Update the channel factory to read lines from a file @@ -428,7 +428,7 @@ _Before:_ ```groovy title="hello-gatk.nf" // Create input channel -reads_ch = Channel.from(params.reads_bam) +reads_ch = Channel.of(params.reads_bam) ``` _After:_ @@ -486,14 +486,14 @@ _Before:_ ```groovy title="hello-gatk.nf" // Primary input (list of input files, one sample per line) -params.reads_bam = "${baseDir}/data/bam/sample_bams.txt" +params.reads_bam = "${projectDir}/data/bam/sample_bams.txt" ``` _After:_ ```groovy title="hello-gatk.nf" // Primary input (samplesheet in CSV format with ID and file path, one sample per line) -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" ``` #### 5.3. Update the channel factory to parse a CSV file diff --git a/docs/hello_nextflow/04_hello_modules.md b/docs/hello_nextflow/04_hello_modules.md index 28a8f588..321fa936 100644 --- a/docs/hello_nextflow/04_hello_modules.md +++ b/docs/hello_nextflow/04_hello_modules.md @@ -42,17 +42,17 @@ The pipeline takes in three BAM files, each one containing sequencing data for o */ // Execution environment setup -params.baseDir = "/workspace/gitpod/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/hello-nextflow" +$projectDir = params.projectDir // Primary input (samplesheet in CSV format with ID and file path, one sample per line) -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/docs/hello_nextflow/05_hello_nf-test.md b/docs/hello_nextflow/05_hello_nf-test.md index ba35e9c4..528dab40 100644 --- a/docs/hello_nextflow/05_hello_nf-test.md +++ b/docs/hello_nextflow/05_hello_nf-test.md @@ -169,7 +169,7 @@ _After:_ ```groovy title="modules/local/samtools/index/tests/main.nf.test" linenums="14" process { """ - input[0] = [ [id: 'NA12882' ], file("${baseDir}/data/bam/reads_son.bam") ] + input[0] = [ [id: 'NA12882' ], file("${projectDir}/data/bam/reads_son.bam") ] """ } ``` @@ -271,7 +271,7 @@ test("reads_mother [bam]") { } process { """ - input[0] = [ [id: 'NA12878' ], file("${baseDir}/data/bam/reads_mother.bam") ] + input[0] = [ [id: 'NA12878' ], file("${projectDir}/data/bam/reads_mother.bam") ] """ } } @@ -295,7 +295,7 @@ test("reads_father [bam]") { } process { """ - input[0] = [ [id: 'NA12877' ], file("${baseDir}/data/bam/reads_father.bam") ] + input[0] = [ [id: 'NA12877' ], file("${projectDir}/data/bam/reads_father.bam") ] """ } } @@ -447,7 +447,7 @@ test("reads_son [bam]") { script "../../../samtools/index/main.nf" process { """ - input[0] = [ [id: 'NA12882' ], file("${baseDir}/data/bam/reads_son.bam") ] + input[0] = [ [id: 'NA12882' ], file("${projectDir}/data/bam/reads_son.bam") ] """ } } @@ -466,10 +466,10 @@ Then we can refer to the output of that process in the `when` block where we spe process { """ input[0] = SAMTOOLS_INDEX.out - input[1] = file("${baseDir}/data/ref/ref.fasta") - input[2] = file("${baseDir}/data/ref/ref.fasta.fai") - input[3] = file("${baseDir}/data/ref/ref.dict") - input[4] = file("${baseDir}/data/intervals.list") + input[1] = file("${projectDir}/data/ref/ref.fasta") + input[2] = file("${projectDir}/data/ref/ref.fasta.fai") + input[3] = file("${projectDir}/data/ref/ref.dict") + input[4] = file("${projectDir}/data/intervals.list") """ } } @@ -625,7 +625,7 @@ test("reads_mother [bam]") { script "../../../samtools/index/main.nf" process { """ - input[0] = [ [id: 'NA12882' ], file("${baseDir}/data/bam/reads_mother.bam") ] + input[0] = [ [id: 'NA12882' ], file("${projectDir}/data/bam/reads_mother.bam") ] """ } } @@ -638,10 +638,10 @@ test("reads_mother [bam]") { process { """ input[0] = SAMTOOLS_INDEX.out - input[1] = file("${baseDir}/data/ref/ref.fasta") - input[2] = file("${baseDir}/data/ref/ref.fasta.fai") - input[3] = file("${baseDir}/data/ref/ref.dict") - input[4] = file("${baseDir}/data/intervals.list") + input[1] = file("${projectDir}/data/ref/ref.fasta") + input[2] = file("${projectDir}/data/ref/ref.fasta.fai") + input[3] = file("${projectDir}/data/ref/ref.dict") + input[4] = file("${projectDir}/data/intervals.list") """ } } @@ -664,7 +664,7 @@ test("reads_father [bam]") { script "../../../samtools/index/main.nf" process { """ - input[0] = [ [id: 'NA12882' ], file("${baseDir}/data/bam/reads_father.bam") ] + input[0] = [ [id: 'NA12882' ], file("${projectDir}/data/bam/reads_father.bam") ] """ } } @@ -677,10 +677,10 @@ test("reads_father [bam]") { process { """ input[0] = SAMTOOLS_INDEX.out - input[1] = file("${baseDir}/data/ref/ref.fasta") - input[2] = file("${baseDir}/data/ref/ref.fasta.fai") - input[3] = file("${baseDir}/data/ref/ref.dict") - input[4] = file("${baseDir}/data/intervals.list") + input[1] = file("${projectDir}/data/ref/ref.fasta") + input[2] = file("${projectDir}/data/ref/ref.fasta.fai") + input[3] = file("${projectDir}/data/ref/ref.dict") + input[4] = file("${projectDir}/data/intervals.list") """ } } @@ -825,12 +825,12 @@ test("family_trio [vcf] [idx]") { } process { """ - input[0] = file("${baseDir}/modules/local/gatk/jointgenotyping/tests/inputs/family_trio_map.tsv") + input[0] = file("${projectDir}/modules/local/gatk/jointgenotyping/tests/inputs/family_trio_map.tsv") input[1] = "family_trio" - input[2] = file("${baseDir}/data/ref/ref.fasta") - input[3] = file("${baseDir}/data/ref/ref.fasta.fai") - input[4] = file("${baseDir}/data/ref/ref.dict") - input[5] = file("${baseDir}/data/intervals.list") + input[2] = file("${projectDir}/data/ref/ref.fasta") + input[3] = file("${projectDir}/data/ref/ref.fasta.fai") + input[4] = file("${projectDir}/data/ref/ref.dict") + input[5] = file("${projectDir}/data/intervals.list") """ } } diff --git a/docs/troubleshoot/06_exercise.md b/docs/troubleshoot/06_exercise.md index 6a6876d7..cbe5fcfb 100644 --- a/docs/troubleshoot/06_exercise.md +++ b/docs/troubleshoot/06_exercise.md @@ -27,19 +27,19 @@ nextflow run hello-gatk.nf ```console title="hello-gatk.nf" linenums="5" // Execution environment setup - params.baseDir = "/workspace/gitpod/troubleshoot" - $baseDir = params.baseDir + params.projectDir = "/workspace/gitpod/troubleshoot" + $projectDir = params.projectDir // Primary input - params.reads_bam = "${baseDir}/data/samplesheet.csv" + params.reads_bam = "${projectDir}/data/samplesheet.csv" ``` - However, it can be noted that there is a `$` before `baseDir` on line 7 which would suggest it is a variable. + However, it can be noted that there is a `$` before `projectDir` on line 7 which would suggest it is a variable. - `$baseDir` was once an implicit variable, and its usage here is causing the wrong path. + `$projectDir` was once an implicit variable, and its usage here is causing the wrong path. By removing the `$` the proper path should be resolved. ```console title="hello-gatk.nf" linenums="7" - baseDir = params.baseDir + projectDir = params.projectDir ``` diff --git a/hands-on/final_main.nf b/hands-on/final_main.nf index 3722694c..609308da 100644 --- a/hands-on/final_main.nf +++ b/hands-on/final_main.nf @@ -2,10 +2,10 @@ * Define the default parameters */ -params.genome = "${baseDir}/data/genome.fa" -params.variants = "${baseDir}/data/known_variants.vcf.gz" -params.blacklist = "${baseDir}/data/blacklist.bed" -params.reads = "${baseDir}/data/reads/ENCSR000C*_{1,2}.fastq.gz" +params.genome = "${projectDir}/data/genome.fa" +params.variants = "${projectDir}/data/known_variants.vcf.gz" +params.blacklist = "${projectDir}/data/blacklist.bed" +params.reads = "${projectDir}/data/reads/ENCSR000C*_{1,2}.fastq.gz" params.results = "results" /* diff --git a/hello-nextflow/hello-modules.nf b/hello-nextflow/hello-modules.nf index c942c60f..b0c8f864 100644 --- a/hello-nextflow/hello-modules.nf +++ b/hello-nextflow/hello-modules.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/nf-training/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/nf-training/hello-nextflow" +$projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/hello-nextflow/scripts/hello-gatk-1.nf b/hello-nextflow/scripts/hello-gatk-1.nf index 67033986..e5b5ac73 100644 --- a/hello-nextflow/scripts/hello-gatk-1.nf +++ b/hello-nextflow/scripts/hello-gatk-1.nf @@ -3,11 +3,11 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/hello-nextflow" +$projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/bam/reads_mother.bam" +params.reads_bam = "${projectDir}/data/bam/reads_mother.bam" /* * Generate BAM index file @@ -31,8 +31,8 @@ process SAMTOOLS_INDEX { workflow { // Create input channel - reads_ch = Channel.from(params.reads_bam) + reads_ch = Channel.of(params.reads_bam) // Create index file for input BAM file SAMTOOLS_INDEX(reads_ch) -} \ No newline at end of file +} diff --git a/hello-nextflow/scripts/hello-gatk-2.nf b/hello-nextflow/scripts/hello-gatk-2.nf index 25d31196..99d77be0 100644 --- a/hello-nextflow/scripts/hello-gatk-2.nf +++ b/hello-nextflow/scripts/hello-gatk-2.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/hello-nextflow" +$projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/bam/reads_mother.bam" +params.reads_bam = "${projectDir}/data/bam/reads_mother.bam" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" /* * Generate BAM index file @@ -66,7 +66,7 @@ process GATK_HAPLOTYPECALLER { workflow { // Create input channel - reads_ch = Channel.from(params.reads_bam) + reads_ch = Channel.of(params.reads_bam) // Create index file for input BAM file SAMTOOLS_INDEX(reads_ch) @@ -80,4 +80,4 @@ workflow { params.genome_reference_dict, params.calling_intervals ) -} \ No newline at end of file +} diff --git a/hello-nextflow/scripts/hello-gatk-3.nf b/hello-nextflow/scripts/hello-gatk-3.nf index 8bf7f1ad..18de2dfa 100644 --- a/hello-nextflow/scripts/hello-gatk-3.nf +++ b/hello-nextflow/scripts/hello-gatk-3.nf @@ -3,19 +3,19 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/hello-nextflow" +$projectDir = params.projectDir // Primary input -params.reads_bam = ["${baseDir}/data/bam/reads_mother.bam", - "${baseDir}/data/bam/reads_father.bam", - "${baseDir}/data/bam/reads_son.bam"] +params.reads_bam = ["${projectDir}/data/bam/reads_mother.bam", + "${projectDir}/data/bam/reads_father.bam", + "${projectDir}/data/bam/reads_son.bam"] // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" /* * Generate BAM index file @@ -67,7 +67,7 @@ process GATK_HAPLOTYPECALLER { workflow { // Create input channel - reads_ch = Channel.from(params.reads_bam) + reads_ch = Channel.of(params.reads_bam) // Create index file for input BAM file SAMTOOLS_INDEX(reads_ch) @@ -80,4 +80,4 @@ workflow { params.genome_reference_dict, params.calling_intervals ) -} \ No newline at end of file +} diff --git a/hello-nextflow/scripts/hello-gatk-4.nf b/hello-nextflow/scripts/hello-gatk-4.nf index 00dc0f68..1a5abcba 100644 --- a/hello-nextflow/scripts/hello-gatk-4.nf +++ b/hello-nextflow/scripts/hello-gatk-4.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/hello-nextflow" +$projectDir = params.projectDir // Primary input (list of input files, one per line) -params.reads_bam = "${baseDir}/data/sample_bams.txt" +params.reads_bam = "${projectDir}/data/sample_bams.txt" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" /* * Generate BAM index file diff --git a/hello-nextflow/scripts/hello-gatk-5.nf b/hello-nextflow/scripts/hello-gatk-5.nf index 8aef43c8..e23ddca4 100644 --- a/hello-nextflow/scripts/hello-gatk-5.nf +++ b/hello-nextflow/scripts/hello-gatk-5.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/hello-nextflow" +$projectDir = params.projectDir // Primary input (samplesheet in CSV format with ID and file path, one sample per line) -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" /* * Generate BAM index file diff --git a/hello-nextflow/scripts/hello-gatk-6.nf b/hello-nextflow/scripts/hello-gatk-6.nf index c942c60f..b0c8f864 100644 --- a/hello-nextflow/scripts/hello-gatk-6.nf +++ b/hello-nextflow/scripts/hello-gatk-6.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/nf-training/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/nf-training/hello-nextflow" +$projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/hello-nextflow/scripts/nextflow.config b/hello-nextflow/scripts/nextflow.config index 0cf2867d..0c950dc1 100644 --- a/hello-nextflow/scripts/nextflow.config +++ b/hello-nextflow/scripts/nextflow.config @@ -6,17 +6,17 @@ docker.fixOwnership = true */ // Execution environment setup -params.baseDir = "/workspace/gitpod/nf-training/hello-nextflow" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/nf-training/hello-nextflow" +$projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/mkdocs.yml b/mkdocs.yml index 20642136..c50110ca 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -139,9 +139,6 @@ markdown_extensions: - attr_list - md_in_html - pymdownx.details - - pymdownx.emoji: - emoji_index: !!python/name:material.extensions.emoji.twemoji - emoji_generator: !!python/name:material.extensions.emoji.to_svg - pymdownx.highlight: anchor_linenums: true - pymdownx.inlinehilite diff --git a/troubleshoot/exercise1/hello-gatk.nf b/troubleshoot/exercise1/hello-gatk.nf index 0ebe5b8e..b0e6b435 100644 --- a/troubleshoot/exercise1/hello-gatk.nf +++ b/troubleshoot/exercise1/hello-gatk.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/troubleshoot" -baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/troubleshoot" +projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/troubleshoot/exercise2/hello-gatk.nf b/troubleshoot/exercise2/hello-gatk.nf index 17ac5777..0fa52e0b 100644 --- a/troubleshoot/exercise2/hello-gatk.nf +++ b/troubleshoot/exercise2/hello-gatk.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/troubleshoot" -baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/troubleshoot" +projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/troubleshoot/exercise3/hello-gatk.nf b/troubleshoot/exercise3/hello-gatk.nf index a799d06b..83db804c 100644 --- a/troubleshoot/exercise3/hello-gatk.nf +++ b/troubleshoot/exercise3/hello-gatk.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/troubleshoot" -baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/troubleshoot" +projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/troubleshoot/exercise4/hello-gatk.nf b/troubleshoot/exercise4/hello-gatk.nf index fb3212b6..915e3940 100644 --- a/troubleshoot/exercise4/hello-gatk.nf +++ b/troubleshoot/exercise4/hello-gatk.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/troubleshoot" -baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/troubleshoot" +projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/troubleshoot/exercise5/hello-gatk.nf b/troubleshoot/exercise5/hello-gatk.nf index 3a50107d..703259a0 100644 --- a/troubleshoot/exercise5/hello-gatk.nf +++ b/troubleshoot/exercise5/hello-gatk.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/troubleshoot" -baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/troubleshoot" +projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/troubleshoot/exercise6/hello-gatk.nf b/troubleshoot/exercise6/hello-gatk.nf index 89f3d3fa..3a52e924 100644 --- a/troubleshoot/exercise6/hello-gatk.nf +++ b/troubleshoot/exercise6/hello-gatk.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/troubleshoot" -$baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/troubleshoot" +$projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" diff --git a/troubleshoot/hello-gatk.nf b/troubleshoot/hello-gatk.nf index 0ebe5b8e..b0e6b435 100644 --- a/troubleshoot/hello-gatk.nf +++ b/troubleshoot/hello-gatk.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.baseDir = "/workspace/gitpod/troubleshoot" -baseDir = params.baseDir +params.projectDir = "/workspace/gitpod/troubleshoot" +projectDir = params.projectDir // Primary input -params.reads_bam = "${baseDir}/data/samplesheet.csv" +params.reads_bam = "${projectDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${baseDir}/data/ref/ref.fasta" -params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" -params.calling_intervals = "${baseDir}/data/intervals.list" +params.genome_reference = "${projectDir}/data/ref/ref.fasta" +params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" +params.calling_intervals = "${projectDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio" From 2257b30e7e07a6d2826829f886c5c8c81a3cd075 Mon Sep 17 00:00:00 2001 From: Marcel Ribeiro-Dantas Date: Mon, 27 May 2024 20:45:04 -0300 Subject: [PATCH 2/4] Update docs/troubleshoot/06_exercise.md Co-authored-by: Christopher Hakkaart --- docs/troubleshoot/06_exercise.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/troubleshoot/06_exercise.md b/docs/troubleshoot/06_exercise.md index cbe5fcfb..c5f084c2 100644 --- a/docs/troubleshoot/06_exercise.md +++ b/docs/troubleshoot/06_exercise.md @@ -41,5 +41,5 @@ nextflow run hello-gatk.nf By removing the `$` the proper path should be resolved. ```console title="hello-gatk.nf" linenums="7" - projectDir = params.projectDir + baseDir = params.baseDir ``` From 7935158f2bcd011fd47aad81728ea1e04a663b85 Mon Sep 17 00:00:00 2001 From: Marcel Ribeiro-Dantas Date: Mon, 27 May 2024 20:45:48 -0300 Subject: [PATCH 3/4] Apply suggestions from code review Co-authored-by: Christopher Hakkaart --- docs/troubleshoot/06_exercise.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/troubleshoot/06_exercise.md b/docs/troubleshoot/06_exercise.md index c5f084c2..6a6876d7 100644 --- a/docs/troubleshoot/06_exercise.md +++ b/docs/troubleshoot/06_exercise.md @@ -27,16 +27,16 @@ nextflow run hello-gatk.nf ```console title="hello-gatk.nf" linenums="5" // Execution environment setup - params.projectDir = "/workspace/gitpod/troubleshoot" - $projectDir = params.projectDir + params.baseDir = "/workspace/gitpod/troubleshoot" + $baseDir = params.baseDir // Primary input - params.reads_bam = "${projectDir}/data/samplesheet.csv" + params.reads_bam = "${baseDir}/data/samplesheet.csv" ``` - However, it can be noted that there is a `$` before `projectDir` on line 7 which would suggest it is a variable. + However, it can be noted that there is a `$` before `baseDir` on line 7 which would suggest it is a variable. - `$projectDir` was once an implicit variable, and its usage here is causing the wrong path. + `$baseDir` was once an implicit variable, and its usage here is causing the wrong path. By removing the `$` the proper path should be resolved. From 45ec07c288118c55fd213df5f80765083d620e78 Mon Sep 17 00:00:00 2001 From: Marcel Ribeiro-Dantas Date: Mon, 27 May 2024 20:46:41 -0300 Subject: [PATCH 4/4] Apply suggestions from code review Co-authored-by: Christopher Hakkaart --- troubleshoot/exercise6/hello-gatk.nf | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/troubleshoot/exercise6/hello-gatk.nf b/troubleshoot/exercise6/hello-gatk.nf index 3a52e924..89f3d3fa 100644 --- a/troubleshoot/exercise6/hello-gatk.nf +++ b/troubleshoot/exercise6/hello-gatk.nf @@ -3,17 +3,17 @@ */ // Execution environment setup -params.projectDir = "/workspace/gitpod/troubleshoot" -$projectDir = params.projectDir +params.baseDir = "/workspace/gitpod/troubleshoot" +$baseDir = params.baseDir // Primary input -params.reads_bam = "${projectDir}/data/samplesheet.csv" +params.reads_bam = "${baseDir}/data/samplesheet.csv" // Accessory files -params.genome_reference = "${projectDir}/data/ref/ref.fasta" -params.genome_reference_index = "${projectDir}/data/ref/ref.fasta.fai" -params.genome_reference_dict = "${projectDir}/data/ref/ref.dict" -params.calling_intervals = "${projectDir}/data/intervals.list" +params.genome_reference = "${baseDir}/data/ref/ref.fasta" +params.genome_reference_index = "${baseDir}/data/ref/ref.fasta.fai" +params.genome_reference_dict = "${baseDir}/data/ref/ref.dict" +params.calling_intervals = "${baseDir}/data/intervals.list" // Base name for final output file params.cohort_name = "family_trio"