Skip to content

Commit

Permalink
Merge pull request #959 from drpatelh/fixes
Browse files Browse the repository at this point in the history
Rejig prepare_genome.nf subworkflow
  • Loading branch information
drpatelh committed Mar 15, 2023
2 parents 640ff1f + b10efc8 commit 4656b56
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 101 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unpublished Version / DEV]
## [[3.11.0](https://github.com/nf-core/rnaseq/releases/tag/3.11.0)] - 2023-03-24

### Enhancements & fixes

Expand Down
3 changes: 1 addition & 2 deletions modules.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@
"custom/getchromsizes": {
"branch": "master",
"git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
"installed_by": ["modules"],
"patch": "modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff"
"installed_by": ["modules"]
},
"fastqc": {
"branch": "master",
Expand Down
24 changes: 0 additions & 24 deletions modules/nf-core/custom/getchromsizes/custom-getchromsizes.diff

This file was deleted.

10 changes: 5 additions & 5 deletions modules/nf-core/custom/getchromsizes/main.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ manifest {
description = """RNA sequencing analysis pipeline for gene/isoform quantification and extensive quality control."""
mainScript = 'main.nf'
nextflowVersion = '!>=22.10.1'
version = '3.10.1'
version = '3.11.0'
doi = 'https://doi.org/10.5281/zenodo.1400710'
}

Expand Down
144 changes: 79 additions & 65 deletions subworkflows/local/prepare_genome.nf
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,23 @@ include { STAR_GENOMEGENERATE_IGENOMES } from '../../modules/local/star_

workflow PREPARE_GENOME {
take:
prepare_tool_indices // list : tools to prepare indices for
biotype // string : if additional fasta file is provided biotype value to use when appending entries to GTF file
is_aws_igenome // boolean: whether the genome files are from AWS iGenomes
fasta // file: /path/to/genome.fasta
gtf // file: /path/to/genome.gtf
gff // file: /path/to/genome.gff
additional_fasta // file: /path/to/additional.fasta
transcript_fasta // file: /path/to/transcript.fasta
gene_bed // file: /path/to/gene.bed
splicesites // file: /path/to/splicesites.txt
bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt
star_index // directory: /path/to/star/index/
rsem_index // directory: /path/to/rsem/index/
salmon_index // directory: /path/to/salmon/index/
hisat2_index // directory: /path/to/hisat2/index/
bbsplit_index // directory: /path/to/rsem/index/
gencode // boolean: whether the genome is from GENCODE
is_aws_igenome // boolean: whether the genome files are from AWS iGenomes
biotype // string: if additional fasta file is provided biotype value to use when appending entries to GTF file
prepare_tool_indices // list: tools to prepare indices for

main:

Expand All @@ -44,29 +58,29 @@ workflow PREPARE_GENOME {
//
// Uncompress genome fasta file if required
//
if (params.fasta.endsWith('.gz')) {
ch_fasta = GUNZIP_FASTA ( [ [:], params.fasta ] ).gunzip.map { it[1] }
if (fasta.endsWith('.gz')) {
ch_fasta = GUNZIP_FASTA ( [ [:], fasta ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions)
} else {
ch_fasta = file(params.fasta)
ch_fasta = Channel.value(file(fasta))
}

//
// Uncompress GTF annotation file or create from GFF3 if required
//
if (params.gtf) {
if (params.gtf.endsWith('.gz')) {
ch_gtf = GUNZIP_GTF ( [ [:], params.gtf ] ).gunzip.map { it[1] }
if (gtf) {
if (gtf.endsWith('.gz')) {
ch_gtf = GUNZIP_GTF ( [ [:], gtf ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions)
} else {
ch_gtf = file(params.gtf)
ch_gtf = Channel.value(file(gtf))
}
} else if (params.gff) {
if (params.gff.endsWith('.gz')) {
ch_gff = GUNZIP_GFF ( [ [:], params.gff ] ).gunzip.map { it[1] }
} else if (gff) {
if (gff.endsWith('.gz')) {
ch_gff = GUNZIP_GFF ( [ [:], gff ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions)
} else {
ch_gff = file(params.gff)
ch_gff = Channel.value(file(gff))
}
ch_gtf = GFFREAD ( ch_gff ).gtf
ch_versions = ch_versions.mix(GFFREAD.out.versions)
Expand All @@ -75,12 +89,12 @@ workflow PREPARE_GENOME {
//
// Uncompress additional fasta file and concatenate with reference fasta and gtf files
//
if (params.additional_fasta) {
if (params.additional_fasta.endsWith('.gz')) {
ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], params.additional_fasta ] ).gunzip.map { it[1] }
if (additional_fasta) {
if (additional_fasta.endsWith('.gz')) {
ch_add_fasta = GUNZIP_ADDITIONAL_FASTA ( [ [:], additional_fasta ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_ADDITIONAL_FASTA.out.versions)
} else {
ch_add_fasta = file(params.additional_fasta)
ch_add_fasta = Channel.value(file(additional_fasta))
}
CAT_ADDITIONAL_FASTA ( ch_fasta, ch_gtf, ch_add_fasta, biotype )
ch_fasta = CAT_ADDITIONAL_FASTA.out.fasta
Expand All @@ -91,12 +105,12 @@ workflow PREPARE_GENOME {
//
// Uncompress gene BED annotation file or create from GTF if required
//
if (params.gene_bed) {
if (params.gene_bed.endsWith('.gz')) {
ch_gene_bed = GUNZIP_GENE_BED ( [ [:], params.gene_bed ] ).gunzip.map { it[1] }
if (gene_bed) {
if (gene_bed.endsWith('.gz')) {
ch_gene_bed = GUNZIP_GENE_BED ( [ [:], gene_bed ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions)
} else {
ch_gene_bed = file(params.gene_bed)
ch_gene_bed = Channel.value(file(gene_bed))
}
} else {
ch_gene_bed = GTF2BED ( ch_gtf ).bed
Expand All @@ -106,14 +120,14 @@ workflow PREPARE_GENOME {
//
// Uncompress transcript fasta file / create if required
//
if (params.transcript_fasta) {
if (params.transcript_fasta.endsWith('.gz')) {
ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ( [ [:], params.transcript_fasta ] ).gunzip.map { it[1] }
if (transcript_fasta) {
if (transcript_fasta.endsWith('.gz')) {
ch_transcript_fasta = GUNZIP_TRANSCRIPT_FASTA ( [ [:], transcript_fasta ] ).gunzip.map { it[1] }
ch_versions = ch_versions.mix(GUNZIP_TRANSCRIPT_FASTA.out.versions)
} else {
ch_transcript_fasta = file(params.transcript_fasta)
ch_transcript_fasta = Channel.value(file(transcript_fasta))
}
if (params.gencode) {
if (gencode) {
PREPROCESS_TRANSCRIPTS_FASTA_GENCODE ( ch_transcript_fasta )
ch_transcript_fasta = PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.fasta
ch_versions = ch_versions.mix(PREPROCESS_TRANSCRIPTS_FASTA_GENCODE.out.versions)
Expand All @@ -128,26 +142,26 @@ workflow PREPARE_GENOME {
//
// Create chromosome sizes file
//
CUSTOM_GETCHROMSIZES ( ch_fasta )
ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes
ch_fai = CUSTOM_GETCHROMSIZES.out.fai
CUSTOM_GETCHROMSIZES ( ch_fasta.map { [ [:], it ] } )
ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] }
ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] }
ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions)

//
// Uncompress BBSplit index or generate from scratch if required
//
ch_bbsplit_index = Channel.empty()
if ('bbsplit' in prepare_tool_indices) {
if (params.bbsplit_index) {
if (params.bbsplit_index.endsWith('.tar.gz')) {
ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], params.bbsplit_index ] ).untar.map { it[1] }
if (bbsplit_index) {
if (bbsplit_index.endsWith('.tar.gz')) {
ch_bbsplit_index = UNTAR_BBSPLIT_INDEX ( [ [:], bbsplit_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_BBSPLIT_INDEX.out.versions)
} else {
ch_bbsplit_index = file(params.bbsplit_index)
ch_bbsplit_index = Channel.value(file(bbsplit_index))
}
} else {
Channel
.from(file(params.bbsplit_fasta_list))
.from(file(bbsplit_fasta_list))
.splitCsv() // Read in 2 column csv file: short_name,path_to_fasta
.flatMap { id, fasta -> [ [ 'id', id ], [ 'fasta', file(fasta, checkIfExists: true) ] ] } // Flatten entries to be able to groupTuple by a common key
.groupTuple()
Expand All @@ -165,12 +179,12 @@ workflow PREPARE_GENOME {
//
ch_star_index = Channel.empty()
if ('star_salmon' in prepare_tool_indices) {
if (params.star_index) {
if (params.star_index.endsWith('.tar.gz')) {
ch_star_index = UNTAR_STAR_INDEX ( [ [:], params.star_index ] ).untar.map { it[1] }
if (star_index) {
if (star_index.endsWith('.tar.gz')) {
ch_star_index = UNTAR_STAR_INDEX ( [ [:], star_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions)
} else {
ch_star_index = file(params.star_index)
ch_star_index = Channel.value(file(star_index))
}
} else {
if (is_aws_igenome) {
Expand All @@ -188,12 +202,12 @@ workflow PREPARE_GENOME {
//
ch_rsem_index = Channel.empty()
if ('star_rsem' in prepare_tool_indices) {
if (params.rsem_index) {
if (params.rsem_index.endsWith('.tar.gz')) {
ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], params.rsem_index ] ).untar.map { it[1] }
if (rsem_index) {
if (rsem_index.endsWith('.tar.gz')) {
ch_rsem_index = UNTAR_RSEM_INDEX ( [ [:], rsem_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_RSEM_INDEX.out.versions)
} else {
ch_rsem_index = file(params.rsem_index)
ch_rsem_index = Channel.value(file(rsem_index))
}
} else {
ch_rsem_index = RSEM_PREPAREREFERENCE_GENOME ( ch_fasta, ch_gtf ).index
Expand All @@ -207,18 +221,18 @@ workflow PREPARE_GENOME {
ch_splicesites = Channel.empty()
ch_hisat2_index = Channel.empty()
if ('hisat2' in prepare_tool_indices) {
if (!params.splicesites) {
if (!splicesites) {
ch_splicesites = HISAT2_EXTRACTSPLICESITES ( ch_gtf ).txt
ch_versions = ch_versions.mix(HISAT2_EXTRACTSPLICESITES.out.versions)
} else {
ch_splicesites = file(params.splicesites)
ch_splicesites = Channel.value(file(splicesites))
}
if (params.hisat2_index) {
if (params.hisat2_index.endsWith('.tar.gz')) {
ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], params.hisat2_index ] ).untar.map { it[1] }
if (hisat2_index) {
if (hisat2_index.endsWith('.tar.gz')) {
ch_hisat2_index = UNTAR_HISAT2_INDEX ( [ [:], hisat2_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_HISAT2_INDEX.out.versions)
} else {
ch_hisat2_index = file(params.hisat2_index)
ch_hisat2_index = Channel.value(file(hisat2_index))
}
} else {
ch_hisat2_index = HISAT2_BUILD ( ch_fasta, ch_gtf, ch_splicesites ).index
Expand All @@ -230,12 +244,12 @@ workflow PREPARE_GENOME {
// Uncompress Salmon index or generate from scratch if required
//
ch_salmon_index = Channel.empty()
if (params.salmon_index) {
if (params.salmon_index.endsWith('.tar.gz')) {
ch_salmon_index = UNTAR_SALMON_INDEX ( [ [:], params.salmon_index ] ).untar.map { it[1] }
if (salmon_index) {
if (salmon_index.endsWith('.tar.gz')) {
ch_salmon_index = UNTAR_SALMON_INDEX ( [ [:], salmon_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_SALMON_INDEX.out.versions)
} else {
ch_salmon_index = file(params.salmon_index)
ch_salmon_index = Channel.value(file(salmon_index))
}
} else {
if ('salmon' in prepare_tool_indices) {
Expand All @@ -245,18 +259,18 @@ workflow PREPARE_GENOME {
}

emit:
fasta = ch_fasta // path: genome.fasta
gtf = ch_gtf // path: genome.gtf
fai = ch_fai // path: genome.fai
gene_bed = ch_gene_bed // path: gene.bed
transcript_fasta = ch_transcript_fasta // path: transcript.fasta
chrom_sizes = ch_chrom_sizes // path: genome.sizes
splicesites = ch_splicesites // path: genome.splicesites.txt
bbsplit_index = ch_bbsplit_index // path: bbsplit/index/
star_index = ch_star_index // path: star/index/
rsem_index = ch_rsem_index // path: rsem/index/
hisat2_index = ch_hisat2_index // path: hisat2/index/
salmon_index = ch_salmon_index // path: salmon/index/
fasta = ch_fasta // channel: path(genome.fasta)
gtf = ch_gtf // channel: path(genome.gtf)
fai = ch_fai // channel: path(genome.fai)
gene_bed = ch_gene_bed // channel: path(gene.bed)
transcript_fasta = ch_transcript_fasta // channel: path(transcript.fasta)
chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes)
splicesites = ch_splicesites // channel: path(genome.splicesites.txt)
bbsplit_index = ch_bbsplit_index // channel: path(bbsplit/index/)
star_index = ch_star_index // channel: path(star/index/)
rsem_index = ch_rsem_index // channel: path(rsem/index/)
hisat2_index = ch_hisat2_index // channel: path(hisat2/index/)
salmon_index = ch_salmon_index // channel: path(salmon/index/)

versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ]
}
19 changes: 16 additions & 3 deletions workflows/rnaseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,23 @@ workflow RNASEQ {
//
def biotype = params.gencode ? "gene_type" : params.featurecounts_group_type
PREPARE_GENOME (
prepareToolIndices,
params.fasta,
params.gtf,
params.gff,
params.additional_fasta,
params.transcript_fasta,
params.gene_bed,
params.splicesites,
params.bbsplit_fasta_list,
params.star_index,
params.rsem_index,
params.salmon_index,
params.hisat2_index,
params.bbsplit_index,
params.gencode,
is_aws_igenome,
biotype,
is_aws_igenome

prepareToolIndices
)
ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions)

Expand Down

0 comments on commit 4656b56

Please sign in to comment.