From 15388ba0ea05dfeb7fbafe465a4e5883fc06372b Mon Sep 17 00:00:00 2001 From: Ido Bar Date: Mon, 11 Jan 2021 09:33:46 +1000 Subject: [PATCH 1/4] Add Read Groups to bowtie2 mapping Add Read Group information to `bowtie2` mapping to fix issue [#655 ] --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index ee9883454..d1ba4e285 100644 --- a/main.nf +++ b/main.nf @@ -1663,13 +1663,13 @@ process bowtie2 { //PE data without merging, PE data without any AR applied if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){ """ - bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam + bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:ILLUMINA 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } else { //PE collapsed, or SE data """ - bowtie2 -x ${fasta} -U ${r1} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam + bowtie2 -x ${fasta} -U ${r1} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:ILLUMINA 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } From e3ad6ea937d38a96ff8cf43695fdc9786f0075c4 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 11 Jan 2021 06:18:25 +0100 Subject: [PATCH 2/4] Add CI test for bowtie2 readgroup issue --- .github/workflows/ci.yml | 10 ++++++---- main.nf | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8edacdf46..0b9bc6549 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -144,18 +144,20 @@ jobs: - name: PMDTOOLS Test PMDtools works alone run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_pmdtools + - name: GENOTYPING_UG AND BOWTIE2 Test running bowtie2 mapping and genotyping to check valid output for GATK + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --mapper 'bowtie2' --bt2_alignmode 'local' --bt2_sensitivity 'sensitive' --bt2n 1 --bt2l 16 --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' - name: GENOTYPING_UG AND MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer --additional_vcf_files 'https://raw.githubusercontent.com/nf-core/test-datasets/eager/testdata/Mammoth/vcf/JK2772_CATCAGTGAGTAGA_L008_R1_001.fastq.gz.tengrand.fq.combined.fq.mapped_rmdup.bam.unifiedgenotyper.vcf.gz' --write_allele_frequencies - name: COMPLEX LANE/LIBRARY MERGING Test running lane and library merging prior to GATK UnifiedGenotyper and running MultiVCFAnalyzer run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_complex,docker --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' --run_multivcfanalyzer - name: GENOTYPING_UG ON TRIMMED BAM Test run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_genotyping --run_trim_bam --genotyping_source 'trimmed' --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' - name: BAM_INPUT Run the basic pipeline with the bam input profile, skip AdapterRemoval as no convertBam run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval + nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --skip_adapterremoval - name: BAM_INPUT Run the basic pipeline with the bam input profile, convert to FASTQ for adapterremoval test and downstream run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv_bam,docker --run_convertinputbam diff --git a/main.nf b/main.nf index d1ba4e285..5f627fb22 100644 --- a/main.nf +++ b/main.nf @@ -1663,13 +1663,13 @@ process bowtie2 { //PE data without merging, PE data without any AR applied if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){ """ - bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:ILLUMINA 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam + bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } else { //PE collapsed, or SE data """ - bowtie2 -x ${fasta} -U ${r1} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:ILLUMINA 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam + bowtie2 -x ${fasta} -U ${r1} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } From 90709cd7cbd8e579bb3ba00f7c85d1a72d0975a1 Mon Sep 17 00:00:00 2001 From: James Fellows Yates Date: Mon, 11 Jan 2021 06:28:34 +0100 Subject: [PATCH 3/4] Add missing run: --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b9bc6549..4c0b36c41 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -145,6 +145,7 @@ jobs: run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --run_pmdtools - name: GENOTYPING_UG AND BOWTIE2 Test running bowtie2 mapping and genotyping to check valid output for GATK + run: | nextflow run ${GITHUB_WORKSPACE} -profile test_tsv,docker --mapper 'bowtie2' --bt2_alignmode 'local' --bt2_sensitivity 'sensitive' --bt2n 1 --bt2l 16 --run_genotyping --genotyping_tool 'ug' --gatk_out_mode 'EMIT_ALL_SITES' --gatk_ug_genotype_model 'SNP' - name: GENOTYPING_UG AND MULTIVCFANALYZER Test running GATK UnifiedGenotyper and MultiVCFAnalyzer, additional VCFS run: | From d92fe4d55c808e95bd43dd166ba96dcc035259a4 Mon Sep 17 00:00:00 2001 From: Ido Bar Date: Tue, 12 Jan 2021 03:13:22 +1000 Subject: [PATCH 4/4] Apply suggestions from code review --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 5f627fb22..f0aacc4a2 100644 --- a/main.nf +++ b/main.nf @@ -1663,13 +1663,13 @@ process bowtie2 { //PE data without merging, PE data without any AR applied if ( seqtype == 'PE' && ( params.skip_collapse || params.skip_adapterremoval ) ){ """ - bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam + bowtie2 -x ${fasta} -1 ${r1} -2 ${r2} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ } else { //PE collapsed, or SE data """ - bowtie2 -x ${fasta} -U ${r1} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam + bowtie2 -x ${fasta} -U ${r1} -p ${task.cpus} ${sensitivity} ${bt2n} ${bt2l} ${trim5} ${trim3} --rg-id ILLUMINA-${libraryid} --rg SM:${libraryid} --rg PL:illumina --rg PU:ILLUMINA-${libraryid}-${seqtype} 2> "${libraryid}"_bt2.log | samtools sort -@ ${task.cpus} -O bam > "${libraryid}"_"${seqtype}".mapped.bam samtools index "${libraryid}"_"${seqtype}".mapped.bam ${size} """ }