Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update modules required for rnaseq pipeline #449

Merged
merged 8 commits into from
Apr 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions software/hisat2/build/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ def VERSION = '2.2.0'
process HISAT2_BUILD {
tag "$fasta"
label 'process_high'
label 'process_high_memory'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) }
Expand All @@ -26,7 +27,7 @@ process HISAT2_BUILD {
path splicesites

output:
path "hisat2", emit: index
path "hisat2" , emit: index
path "*.version.txt", emit: version

script:
Expand All @@ -38,17 +39,18 @@ process HISAT2_BUILD {
avail_mem = task.memory.toGiga()
}

def extract_exons = ''
def ss = ''
def exon = ''
if (avail_mem > params.hisat_build_memory) {
log.info "[HISAT2 index build] Over ${params.hisat_build_memory} GB available, so using splice sites and exons in HISAT2 index"
def extract_exons = ''
def hisat2_build_memory = params.hisat2_build_memory ? (params.hisat2_build_memory as nextflow.util.MemoryUnit).toGiga() : 0
JoseEspinosa marked this conversation as resolved.
Show resolved Hide resolved
if (avail_mem >= hisat2_build_memory) {
log.info "[HISAT2 index build] At least ${hisat2_build_memory} GB available, so using splice sites and exons to build HISAT2 index"
extract_exons = "hisat2_extract_exons.py $gtf > ${gtf.baseName}.exons.txt"
ss = "--ss $splicesites"
exon = "--exon ${gtf.baseName}.exons.txt"
} else {
log.info "[HISAT2 index build] Less than ${params.hisat_build_memory} GB available, so NOT using splice sites and exons in HISAT2 index."
log.info "[HISAT2 index build] Use --hisat_build_memory [small number] to skip this check."
log.info "[HISAT2 index build] Less than ${hisat2_build_memory} GB available, so NOT using splice sites and exons to build HISAT2 index."
log.info "[HISAT2 index build] Use --hisat2_build_memory [small number] to skip this check."
}

def software = getSoftwareName(task.process)
Expand Down
6 changes: 3 additions & 3 deletions software/preseq/lcextrap/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ process PRESEQ_LCEXTRAP {
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }

conda (params.enable_conda ? "bioconda::preseq=2.0.3" : null)
conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/preseq:2.0.3--hf53bd2b_3"
container "https://depot.galaxyproject.org/singularity/preseq:3.1.2--h06ef8b0_1"
} else {
container "quay.io/biocontainers/preseq:2.0.3--hf53bd2b_3"
container "quay.io/biocontainers/preseq:3.1.2--h06ef8b0_1"
}

input:
Expand Down
47 changes: 47 additions & 0 deletions software/preseq/lcextrap/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
name: preseq_lcextrap
description: Software for predicting library complexity and genome coverage in high-throughput sequencing
keywords:
- preseq
- library
- complexity
tools:
- preseq:
description: Software for predicting library complexity and genome coverage in high-throughput sequencing
homepage: http://smithlabresearch.org/software/preseq/
documentation: None
tool_dev_url: None
doi: ""
licence: ['GPL']

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/CRAM/SAM file
pattern: "*.{bam,cram,sam}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- version:
type: file
description: File containing software version
pattern: "*.{version.txt}"
- ccurve:
type: file
description: File containing output of Preseq lcextrap
pattern: "*.{ccurve.txt}"
- log:
type: file
description: Log file containing stderr produced by Preseq
pattern: "*.{log}"

authors:
- "@drpatelh"
5 changes: 3 additions & 2 deletions software/samtools/index/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@ process SAMTOOLS_INDEX {
tuple val(meta), path(bam)

output:
tuple val(meta), path("*.bai"), emit: bai
tuple val(meta), path("*.bai"), optional:true, emit: bai
tuple val(meta), path("*.csi"), optional:true, emit: csi
path "*.version.txt" , emit: version

script:
def software = getSoftwareName(task.process)
"""
samtools index $bam
samtools index $options.args $bam
echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//' > ${software}.version.txt
"""
}
4 changes: 4 additions & 0 deletions software/samtools/index/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ output:
type: file
description: BAM/CRAM/SAM index file
pattern: "*.{bai,crai,sai}"
- csi:
type: file
description: CSI index file
pattern: "*.{csi}"
- version:
type: file
description: File containing software version
Expand Down
57 changes: 39 additions & 18 deletions software/star/genomegenerate/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ process STAR_GENOMEGENERATE {
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:'index', meta:[:], publish_by_meta:[]) }

// Note: 2.7X indices incompatible with AWS iGenomes.
conda (params.enable_conda ? "bioconda::star=2.6.1d" : null)
conda (params.enable_conda ? "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/star:2.6.1d--0"
container "https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0"
} else {
container "quay.io/biocontainers/star:2.6.1d--0"
container "quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0"
}

input:
Expand All @@ -28,19 +28,40 @@ process STAR_GENOMEGENERATE {
path "*.version.txt", emit: version

script:
def software = getSoftwareName(task.process)
def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
"""
mkdir star
STAR \\
--runMode genomeGenerate \\
--genomeDir star/ \\
--genomeFastaFiles $fasta \\
--sjdbGTFfile $gtf \\
--runThreadN $task.cpus \\
$memory \\
$options.args

STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt
"""
def software = getSoftwareName(task.process)
def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : ''
def args = options.args.tokenize()
if (args.contains('--genomeSAindexNbases')) {
"""
mkdir star
STAR \\
--runMode genomeGenerate \\
--genomeDir star/ \\
--genomeFastaFiles $fasta \\
--sjdbGTFfile $gtf \\
--runThreadN $task.cpus \\
$memory \\
$options.args

STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt
"""
} else {
"""
samtools faidx $fasta
NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai`

mkdir star
STAR \\
--runMode genomeGenerate \\
--genomeDir star/ \\
--genomeFastaFiles $fasta \\
--sjdbGTFfile $gtf \\
--runThreadN $task.cpus \\
--genomeSAindexNbases \$NUM_BASES \\
$memory \\
$options.args

STAR --version | sed -e "s/STAR_//g" > ${software}.version.txt
"""
}
}
4 changes: 2 additions & 2 deletions tests/software/preseq/lcextrap/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
- preseq/lcextrap
files:
- path: output/preseq/test.ccurve.txt
md5sum: 76ae04c8eaf19c94e3210bb69da38498
md5sum: 1fa5cdd601079329618f61660bee00de
- path: output/preseq/test.command.log

- name: preseq lcextrap paired-end
Expand All @@ -15,5 +15,5 @@
- preseq/lcextrap
files:
- path: output/preseq/test.ccurve.txt
md5sum: 2836d2fabd2213f097fd7063db550276
md5sum: 10e5ea860e87fb6f5dc10f4f20c62040
- path: output/preseq/test.command.log
15 changes: 12 additions & 3 deletions tests/software/samtools/index/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,21 @@

nextflow.enable.dsl = 2

include { SAMTOOLS_INDEX } from '../../../../software/samtools/index/main.nf' addParams( options: [:] )
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_BAI } from '../../../../software/samtools/index/main.nf' addParams( options: [:] )
include { SAMTOOLS_INDEX as SAMTOOLS_INDEX_CSI } from '../../../../software/samtools/index/main.nf' addParams( options: [args:'-c'] )

workflow test_samtools_index {
workflow test_samtools_index_bai {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]

SAMTOOLS_INDEX ( input )
SAMTOOLS_INDEX_BAI ( input )
}

workflow test_samtools_index_csi {
input = [ [ id:'test', single_end:false ], // meta map
file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
]

SAMTOOLS_INDEX_CSI ( input )
}
13 changes: 11 additions & 2 deletions tests/software/samtools/index/test.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
- name: samtools index
command: nextflow run tests/software/samtools/index -entry test_samtools_index -c tests/config/nextflow.config
- name: samtools index bai
command: nextflow run tests/software/samtools/index -entry test_samtools_index_bai -c tests/config/nextflow.config
tags:
- samtools
- samtools/index
files:
- path: output/samtools/test_paired_end.sorted.bam.bai
md5sum: 704c10dd1326482448ca3073fdebc2f4

- name: samtools index csi
command: nextflow run tests/software/samtools/index -entry test_samtools_index_csi -c tests/config/nextflow.config
tags:
- samtools
- samtools/index
files:
- path: output/samtools/test_paired_end.sorted.bam.csi
md5sum: 3dd9e3ed959fca075b88bb8dc3cf7dbd
6 changes: 3 additions & 3 deletions tests/software/star/genomegenerate/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

nextflow.enable.dsl = 2

include { STAR_GENOMEGENERATE } from '../../../../software/star/genomegenerate/main.nf' addParams( options: [args: '--genomeSAindexNbases 9'] )
include { STAR_GENOMEGENERATE } from '../../../../software/star/genomegenerate/main.nf' addParams( options: [:] )

workflow test_star_genomegenerate {
fasta = file("${launchDir}/tests/data/generic/fasta/GCF_000019425.1_ASM1942v1_genomic.fna", checkIfExists: true)
gtf = file("${launchDir}/tests/data/generic/gtf/GCF_000019425.1_ASM1942v1_genomic.gtf", checkIfExists: true)
fasta = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
gtf = file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)

STAR_GENOMEGENERATE ( fasta, gtf )
}