Skip to content

Commit

Permalink
Merge pull request #844 from maxplanck-ie/bwa_mem2_and_misc
Browse files Browse the repository at this point in the history
Bwa mem2 and misc
  • Loading branch information
katsikora committed Oct 6, 2022
2 parents 51028e6 + 2fdf6a7 commit fb70901
Show file tree
Hide file tree
Showing 35 changed files with 225 additions and 46 deletions.
2 changes: 2 additions & 0 deletions .ci_stuff/organism.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ genome_2bit: ".ci_stuff/genome_fasta/genome.2bit"
bowtie2_index: ".ci_stuff/BowtieIndex/genome"
hisat2_index: ".ci_stuff/HISAT2Index/genome"
bwa_index: ".ci_stuff/BWAindex/genome.fa"
bwa_mem2_index: ".ci_stuff/BWA-MEM2Index/genome.fa"
bwameth_index: "/tmp/genome.fa"
bwameth2_index: "/tmp/genome.fa"
known_splicesites: ".ci_stuff/gencode/m9/HISAT2/splice_sites.txt"
star_index: ".ci_stuff/STARIndex/"
genes_bed: "/tmp/genes.bed"
Expand Down
14 changes: 10 additions & 4 deletions .ci_stuff/test_dag.sh
Original file line number Diff line number Diff line change
Expand Up @@ -117,14 +117,14 @@ snakePipes config

# createIndices
WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --genome ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtf ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz blah | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 126 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 142 ]; then exit 1 ; fi
WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --genome ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtf ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz --rmskURL http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz blah | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 132 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 148 ]; then exit 1 ; fi
WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --genome ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtf ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz --rmskURL http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz blah | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 132 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 148 ]; then exit 1 ; fi
# spikein
WC=`createIndices -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --genomeURL ftp://ftp.ensembl.org/pub/release-93/fasta/mus_musculus/dna/Mus_musculus.GRCm38.dna_sm.primary_assembly.fa.gz --gtfURL ftp://ftp.ensembl.org/pub/release-93/gtf/mus_musculus/Mus_musculus.GRCm38.93.gtf.gz --spikeinGenomeURL ftp://ftp.ensembl.org/pub/release-79/fasta/drosophila_melanogaster/dna/Drosophila_melanogaster.BDGP6.dna_sm.toplevel.fa.gz --spikeinGtfURL ftp://ftp.ensembl.org/pub/release-96/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP6.22.96.gtf.gz --rmskURL http://hgdownload.soe.ucsc.edu/goldenPath/dm6/database/rmsk.txt.gz blah | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 165 ]; then exit 1 ; fi
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 181 ]; then exit 1 ; fi


# DNA mapping
Expand All @@ -146,6 +146,8 @@ WC=`DNA-mapping -i SE_input -o output .ci_stuff/organism.yaml --snakemakeOptions
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 705 ]; then exit 1 ; fi
WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --trim --aligner bwa | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 719 ]; then exit 1 ; fi
WC=`DNA-mapping -i PE_input -o output .ci_stuff/organism.yaml --snakemakeOptions " --dryrun --conda-prefix /tmp" --DAG --trim --aligner bwa-mem2 | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 719 ]; then exit 1 ; fi
#allelic
WC=`DNA-mapping -m allelic-mapping -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --VCFfile allelic_input/file.vcf.gz --strains strain1,strain2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 1297 ]; then exit 1 ; fi
Expand Down Expand Up @@ -252,6 +254,8 @@ if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 354 ]; then exit 1 ; fi
# WGBS
WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 751 ]; then exit 1 ; fi
WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --aligner bwameth2 --snakemakeOptions " --dryrun --conda-prefix /tmp" .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 751 ]; then exit 1 ; fi
WC=`WGBS -i PE_input -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --snakemakeOptions " --dryrun --conda-prefix /tmp" --trim --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 760 ]; then exit 1 ; fi
WC=`WGBS -i BAM_input/filtered_bam -o output --sampleSheet .ci_stuff/test_sampleSheet.tsv --fromBAM --snakemakeOptions " --dryrun --conda-prefix /tmp" --GCbias .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
Expand Down Expand Up @@ -284,6 +288,8 @@ WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp"
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 463 ]; then exit 1 ; fi
WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --noTAD .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 413 ]; then exit 1 ; fi
WC=`HiC -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --aligner bwa-mem2 .ci_stuff/organism.yaml | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
if [ ${PIPESTATUS[0]} -ne 0 ] || [ $WC -ne 463 ]; then exit 1 ; fi

# preprocessing
WC=`preprocessing -i PE_input -o output --snakemakeOptions " --dryrun --conda-prefix /tmp" --fastqc --optDedupDist 2500 | tee >(cat 1>&2) | grep -v "Conda environment" | sed '/^\s*$/d' | wc -l`
Expand Down
11 changes: 7 additions & 4 deletions docs/content/News.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ snakePipes News
snakePipes 2.x.x
----------------

* Bumped software versions in environment yamls.
* Added apeglm2 logFC shrinkage to allelic DESeq2 results.
* Added bwa-mem2 as an optional aligner to DNA-mapping and HiC, as well as as an additional tool to createIndices.
* Added bwameth2 as an optional tool to createIndices - this will create a bwameth index with bwa-mem2.
* Added bwameth2 as an optional aligner to WGBS - this will run bwameth with bwa-mem2 underneath.
* Updated software versions in environment yamls.
* Updated organism yamls.
* Updated CSAW output.
* Fixed a couple of issues in the ATAC-seq workflow after sofware versions update.
* Updated organism yamls.
* Added apeglm2 logFC shrinkage to allelic DESeq2 results.

* Fixed genome size conversion to string.

snakePipes 2.5.4
----------------
Expand Down
6 changes: 3 additions & 3 deletions docs/content/workflows/HiC.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ What it does
------------

The snakePipes HiC workflow allows users to process their HiC data from raw fastq files to
corrected HiC matrices and TADs. The workflow utilized mapping by BWA, followed by analysis
corrected HiC matrices and TADs. The workflow utilized mapping by bwa or bwa-mem2 , followed by analysis
using `HiCExplorer <https://www.nature.com/articles/s41467-017-02525-w>`__. The workflow follows the `example workflow described in the documentation of HiCExplorer <https://hicexplorer.readthedocs.io/en/latest/content/mES-HiC_analysis.html>`__, which explains each step in detail and would be useful for new users to have a look at. The output matrices
are produced in the `.hdf5 <https://en.wikipedia.org/wiki/Hierarchical_Data_Format>`__ format.

Expand Down Expand Up @@ -123,7 +123,7 @@ Structure of output directory
In addition to the FASTQ module results (see :ref:`running_snakePipes`), the workflow produces the following outputs::

.
|--BWA
|--bwa
|--FASTQ
|--HiC_matrices
| |--logs
Expand All @@ -133,7 +133,7 @@ In addition to the FASTQ module results (see :ref:`running_snakePipes`), the wor
|--TADs
|--logs

* **BWA** folder contains the mapping results in BAM format. The files were obtained after running `BWA <https://github.com/lh3/bwa>`__ on each of the paired-end reads individually.
* **bwa** folder contains the mapping results in BAM format. The files were obtained after running `bwa <https://github.com/lh3/bwa>`__ on each of the paired-end reads individually.

* **HiC_matrices** folder accommodates the contact matrices generated by `hicBuildMatrix <https://hicexplorer.readthedocs.io/en/latest/content/tools/hicBuildMatrix.html>`__. In case of merging samples or merging bins the initial matrix is saved in this folder along with the merged ones.

Expand Down
2 changes: 1 addition & 1 deletion snakePipes/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '2.5.4'
__version__ = '2.6.0'
2 changes: 2 additions & 0 deletions snakePipes/shared/organisms/GRCh38_gencode40.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
blacklist_bed: /data/repository/organisms/GRCh38_gencode_40/akundaje/blacklist.bed
bowtie2_index: /data/repository/organisms/GRCh38_gencode_40/Indices/BowtieIndex/genome
bwa_index: /data/repository/organisms/GRCh38_gencode_40/Indices/BWAIndex/genome
bwa_mem2_index: /data/repository/organisms/GRCh38_gencode_40/Indices/BWAMEM2Index/genome
bwameth_index: /data/repository/organisms/GRCh38_gencode_40/Indices/BWAmethIndex/genome.fa
bwameth2_index:
extended_coding_regions_gtf: /data/repository/organisms/GRCh38_gencode_40/gencode/release-40/genes.slop.gtf
genes_bed: /data/repository/organisms/GRCh38_gencode_40/gencode/release-40/genes.bed
genes_gtf: /data/repository/organisms/GRCh38_gencode_40/gencode/release-40/genes.gtf
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/organisms/GRCz10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ genome_2bit: "/data/repository/organisms/GRCz10_ensembl/genome_fasta/genome.2bit
bowtie2_index: "/data/repository/organisms/GRCz10_ensembl/BowtieIndex/genome"
hisat2_index: "/data/repository/organisms/GRCz10_ensembl/HISAT2Index/genome"
bwa_index: "/data/repository/organisms/GRCz10_ensembl/BWAindex/genome.fa"
bwa_mem2_index: "/data/repository/organisms/GRCz10_ensembl/BWA2index/genome.fa"
bwameth_index: "/data/repository/organisms/GRCz10_ensembl/BWAmethIndex/genome.fa"
bwameth2_index:
known_splicesites: "/data/repository/organisms/GRCz10_ensembl/ensembl/release-91/HISAT2/splice_sites.txt"
star_index: "/data/repository/organisms/GRCz10_ensembl/STARIndex/2.7.10a/"
genes_bed: "/data/repository/organisms/GRCz10_ensembl/ensembl/release-91/genes.bed"
Expand Down
2 changes: 2 additions & 0 deletions snakePipes/shared/organisms/GRCz11.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ genome_2bit: "/data/repository/organisms/GRCz11_ensembl/genome_fasta/genome.2bit
bowtie2_index: "/data/repository/organisms/GRCz11_ensembl/BowtieIndex/genome"
hisat2_index: "/data/repository/organisms/GRCz11_ensembl/HISAT2Index/genome"
bwa_index: "/data/repository/organisms/GRCz11_ensembl/BWAindex/genome.fa"
bwa_mem2_index:
bwameth_index: "/data/repository/organisms/GRCz11_ensembl/BWAmethIndex/genome.fa"
bwameth2_index:
known_splicesites: "/data/repository/organisms/GRCz11_ensembl/ensembl/release-100/HISAT2/splice_sites.txt"
star_index: "/data/repository/organisms/GRCz11_ensembl/STARIndex/2.7.10a/"
genes_bed: "/data/repository/organisms/GRCz11_ensembl/ensembl/release-100/genes.bed"
Expand Down
3 changes: 3 additions & 0 deletions snakePipes/shared/organisms/SchizoSPombe_ASM294v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ genome_2bit: "/data/repository/organisms/SchizoSPombe_ASM294v2_ensembl/genome_fa
bowtie2_index: "/data/repository/organisms/SchizoSPombe_ASM294v2_ensembl/BowtieIndex/genome"
hisat2_index: "/data/repository/organisms/SchizoSPombe_ASM294v2_ensembl/HISAT2Index/genome"
bwa_index: "/data/repository/organisms/SchizoSPombe_ASM294v2_ensembl/BWAindex/genome.fa"
bwa_mem2_index: "/data/repository/organisms/SchizoSPombe_ASM294v2_ensembl/BWA2index/genome.fa"
bwameth_index: "/data/repository/organisms/SchizoSPombe_ASM294v2_ensembl/BWAmethIndex/genome.fa"
bwameth2_index:
known_splicesites: "/data/repository/organisms/SchizoSPombe_ASM294v2_ensembl/ensembl/release-35/HISAT2/splice_sites.txt"
star_index: "/data/repository/organisms/SchizoSPombe_ASM294v2_ensembl/STARIndex/2.7.10a/"
genes_bed: "/data/repository/organisms/SchizoSPombe_ASM294v2_ensembl/Ensembl/release-35/genes.bed"
Expand Down
3 changes: 3 additions & 0 deletions snakePipes/shared/organisms/dm6.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ genome_2bit: "/data/repository/organisms/dm6_ensembl/genome_fasta/genome.2bit"
bowtie2_index: "/data/repository/organisms/dm6_ensembl/BowtieIndex/genome"
hisat2_index: "/data/repository/organisms/dm6_ensembl/HISAT2Index/genome"
bwa_index: "/data/repository/organisms/dm6_ensembl/BWAindex/genome.fa"
bwa_mem2_index: "/data/repository/organisms/dm6_ensembl/BWA2index/genome.fa"
bwameth_index: "/data/repository/organisms/dm6_ensembl/BWAmethIndex/genome.fa"
bwameth2_index:
known_splicesites: "/data/repository/organisms/dm6_ensembl/ensembl/release-96/HISAT2/splice_sites.txt"
star_index: "/data/repository/organisms/dm6_ensembl/STARIndex/2.7.10a/"
genes_bed: "/data/repository/organisms/dm6_ensembl/Ensembl/release-96/genes.bed"
Expand Down
3 changes: 3 additions & 0 deletions snakePipes/shared/organisms/hg38.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ genome_2bit: "/data/repository/organisms/GRCh38_ensembl/genome_fasta/genome.2bit
bowtie2_index: "/data/repository/organisms/GRCh38_ensembl/BowtieIndex/genome"
hisat2_index: "/data/repository/organisms/GRCh38_ensembl/HISAT2Index/genome"
bwa_index: "/data/repository/organisms/GRCh38_ensembl/BWAindex/genome.fa"
bwa_mem2_index: "/data/repository/organisms/GRCh38_ensembl/BWA2index/genome.fa"
bwameth_index: "/data/repository/organisms/GRCh38_ensembl/BWAmethIndex/genome.fa"
bwameth2_index:
known_splicesites: "/data/repository/organisms/GRCh38_ensembl/gencode/release_31/HISAT2/splice_sites.txt"
star_index: "/data/repository/organisms/GRCh38_ensembl/STARIndex/2.7.10a/"
genes_bed: "/data/repository/organisms/GRCh38_ensembl/gencode/release_31/genes.bed"
Expand Down
3 changes: 3 additions & 0 deletions snakePipes/shared/organisms/mm10_gencodeM19.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ genome_2bit: "/data/repository/organisms/GRCm38_ensembl/genome_fasta/genome.2bit
bowtie2_index: "/data/repository/organisms/GRCm38_ensembl/BowtieIndex/genome"
hisat2_index: "/data/repository/organisms/GRCm38_ensembl/HISAT2Index/genome"
bwa_index: "/data/repository/organisms/GRCm38_ensembl/BWAindex/genome.fa"
bwa_mem2_index: "/data/repository/organisms/GRCm38_ensembl/BWA2index/genome.fa"
bwameth_index: "/data/repository/organisms/GRCm38_ensembl/BWAmethIndex/genome.fa"
bwameth2_index:
known_splicesites: "/data/repository/organisms/GRCm38_ensembl/gencode/m19/HISAT2/splice_sites.txt"
star_index: "/data/repository/organisms/GRCm38_ensembl/STARIndex/2.7.10a/"
genes_bed: "/data/repository/organisms/GRCm38_ensembl/gencode/m19/genes.bed"
Expand Down
3 changes: 3 additions & 0 deletions snakePipes/shared/organisms/mm39_ens106.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ genome_2bit: "/data/repository/organisms/GRCm39_ensembl_106/genome_fasta/genome.
bowtie2_index: "/data/repository/organisms/GRCm39_ensembl_106/Indices/BowtieIndex/genome"
hisat2_index: "/data/repository/organisms/GRCm39_ensembl_106/Indices/HISAT2Index/genome"
bwa_index: "/data/repository/organisms/GRCm39_ensembl_106/Indices/BWAindex/genome.fa"
bwa_mem2_index: "/data/repository/organisms/GRCm39_ensembl_106/Indices/BWAMEM2Index/genome"
bwameth_index: "/data/repository/organisms/GRCm39_ensembl_106/Indices/BWAmethIndex/genome.fa"
bwameth2_index:
known_splicesites: "/data/repository/organisms/GRCm39_ensembl_106/ensembl/release-106/HISAT2/genome.ss"
star_index: "/data/repository/organisms/GRCm39_ensembl_106/Indices/STARIndex/2.7.10a/"
genes_bed: "/data/repository/organisms/GRCm39_ensembl_106/ensembl/release-106/genes.bed"
Expand Down
28 changes: 14 additions & 14 deletions snakePipes/shared/rules/WGBS.snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ if pairedEnd and not fromBAM:
r1=fastq_dir + "/{sample}" + reads[0] + ".fastq.gz",
r2=fastq_dir + "/{sample}" + reads[1] + ".fastq.gz"
output:
sbam=temp("bwameth/{sample}.bam")
sbam=temp(aligner+"/{sample}.bam")
log:
err="bwameth/logs/{sample}.map_reads.err",
out="bwameth/logs/{sample}.map_reads.out"
err=aligner+"/logs/{sample}.map_reads.err",
out=aligner+"/logs/{sample}.map_reads.out"
params:
bwameth_index=bwameth_index,
bwameth_index=bwameth_index if aligner=="bwameth" else bwameth2_index,
tempDir = tempDir
threads: lambda wildcards: 20 if 20<max_thread else max_thread
conda: CONDA_WGBS_ENV
Expand All @@ -47,12 +47,12 @@ elif not pairedEnd and not fromBAM:
input:
r1=fastq_dir + "/{sample}" + reads[0] + ".fastq.gz",
output:
sbam=temp("bwameth/{sample}.bam")
sbam=temp(aligner+"/{sample}.bam")
log:
err="bwameth/logs/{sample}.map_reads.err",
out="bwameth/logs/{sample}.map_reads.out"
err=aligner+"/logs/{sample}.map_reads.err",
out=aligner+"/logs/{sample}.map_reads.out"
params:
bwameth_index=bwameth_index,
bwameth_index=bwameth_index if aligner=="bwameth" else bwameth2_index,
tempDir = tempDir
threads: lambda wildcards: 20 if 20<max_thread else max_thread
conda: CONDA_WGBS_ENV
Expand All @@ -67,12 +67,12 @@ elif not pairedEnd and not fromBAM:
if not fromBAM:
rule index_bam:
input:
"bwameth/{sample}.bam"
aligner+"/{sample}.bam"
output:
temp("bwameth/{sample}.bam.bai")
temp(aligner+"/{sample}.bam.bai")
log:
err="bwameth/logs/{sample}.index_bam.err",
out="bwameth/logs/{sample}.index_bam.out"
err=aligner+"/logs/{sample}.index_bam.err",
out=aligner+"/logs/{sample}.index_bam.out"
conda: CONDA_SHARED_ENV
shell: """
samtools index "{input}" > {log.out} 2> {log.err}
Expand All @@ -81,8 +81,8 @@ if not fromBAM:
if not skipBamQC:
rule markDupes:
input:
"bwameth/{sample}.bam",
"bwameth/{sample}.bam.bai"
aligner+"/{sample}.bam",
aligner+"/{sample}.bam.bai"
output:
"Sambamba/{sample}.markdup.bam"
log:
Expand Down
53 changes: 53 additions & 0 deletions snakePipes/shared/rules/bwa-mem2.snakefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
if pairedEnd:
rule bwamem2:
input:
r1 = fastq_dir+"/{sample}"+reads[0]+".fastq.gz",
r2 = fastq_dir+"/{sample}"+reads[1]+".fastq.gz"
output:
align_summary = "bwa-mem2/{sample}.bwa-mem2_summary.txt", #samtools flagstat
bam = temp("bwa-mem2/{sample}.sorted.bam")
log: "bwa-mem2/logs/{sample}.sort.log"
params:
bwa_index = bwa_mem2_index,
alignerOpts = str(alignerOpts or ''),
tempDir = tempDir
threads: lambda wildcards: 8 if 8<max_thread else max_thread
conda: CONDA_DNA_MAPPING_ENV
shell:"""
TMPDIR={params.tempDir}
MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX);
bwa-mem2 mem \
-t 6 \
-R '@RG\\tID:{wildcards.sample}\\tDS:{wildcards.sample}\\tPL:ILLUMINA\\tSM:{wildcards.sample}' {params.alignerOpts} \
{params.bwa_index} {input.r1} {input.r2} | \
samtools view -Sb - | \
samtools sort -m 2G -@ 2 -O bam - > {output.bam} 2> {log};
rm -rf $MYTEMP
samtools flagstat {output.bam} > {output.align_summary}
"""
else:
rule bwamem2:
input:
fastq_dir+"/{sample}"+reads[0]+".fastq.gz"
output:
align_summary = "bwa-mem2/{sample}.bwa-mem2_summary.txt", #samtools flagstat
bam = temp("bwa-mem2/{sample}.sorted.bam")
log: "bwa-mem2/logs/{sample}.sort.log"
params:
bwa_index = bwa_mem2_index,
alignerOpts = str(alignerOpts or ''),
tempDir = tempDir
threads : lambda wildcards: 12 if 12<max_thread else max_thread
conda: CONDA_DNA_MAPPING_ENV
shell: """
TMPDIR={params.tempDir}
MYTEMP=$(mktemp -d ${{TMPDIR:-/tmp}}/snakepipes.XXXXXXXXXX);
bwa-mem2 mem \
-t {threads} \
-R '@RG\\tID:{wildcards.sample}\\tDS:{wildcards.sample}\\tPL:ILLUMINA\\tSM:{wildcards.sample}' {params.alignerOpts}\
{params.bwa_index} {input} | \
samtools view -Sbu - | \
samtools sort -m 2G -T $MYTEMP/{wildcards.sample} -@ 2 -O bam - > {output.bam} 2> {log};
rm -rf $MYTEMP
samtools flagstat {output.bam} > {output.align_summary}
"""

0 comments on commit fb70901

Please sign in to comment.