From 955d7913523b4f20bb8fafecb40a426116c572a4 Mon Sep 17 00:00:00 2001 From: Jonathan Manning Date: Mon, 18 May 2026 16:08:09 +0100 Subject: [PATCH] feat(ribocode,ribotish): pyfasta indexes, prefix-scoped outputs, optional ribotish -a Bundles three in-place module changes carried in nf-core/riboseq#174. ribocode/prepare: pre-build the pyfasta .gdx/.flat indexes for annotation/transcripts_sequence.fa using the same key_fn RiboCode applies internally (split on first space, else split on '|'). Downstream RiboCode tasks otherwise lazily build those sidecars inside the staged input directory, which fails under Fusion staging because writes leak back to the upstream task's S3 prefix. ribocode/ribocode: scope the orf_txt and orf_txt_collapsed output globs to ${prefix}.txt and ${prefix}_collapsed.txt rather than *.txt/*_collapsed.txt so multi-instance publication is unambiguous. The prefix binding is promoted out of `def` in both the script and stub blocks so it resolves at the output-glob stage (Nextflow 26 strict parser rejects redeclaration of the same name across script/stub if either uses `def`). The existing stub-test assertion that indexed orf_txt[0][1][0] is adjusted to the new single-file shape. ribotish/predict: extend the fasta/gtf input tuple with an optional fourth path, reference_gtf, plumbed to ribotish predict as `-a ` when populated. BREAKING signature change for callers: every emitter must supply a fourth element in the third tuple (use `[]` for the no-op case). Source: nf-core/riboseq#174 Co-Authored-By: Claude Opus 4.7 (1M context) --- modules/nf-core/ribocode/prepare/main.nf | 14 ++++++++++++++ .../ribocode/prepare/tests/main.nf.test.snap | 8 ++++++-- modules/nf-core/ribocode/ribocode/main.nf | 8 ++++---- modules/nf-core/ribocode/ribocode/meta.yml | 4 ++-- .../nf-core/ribocode/ribocode/tests/main.nf.test | 2 +- .../ribocode/ribocode/tests/main.nf.test.snap | 5 +---- modules/nf-core/ribotish/predict/main.nf | 4 +++- modules/nf-core/ribotish/predict/meta.yml | 7 +++++++ .../nf-core/ribotish/predict/tests/main.nf.test | 12 ++++++++---- 9 files changed, 46 insertions(+), 18 deletions(-) diff --git a/modules/nf-core/ribocode/prepare/main.nf b/modules/nf-core/ribocode/prepare/main.nf index d61653fceb11..e02739ed21a7 100644 --- a/modules/nf-core/ribocode/prepare/main.nf +++ b/modules/nf-core/ribocode/prepare/main.nf @@ -27,6 +27,18 @@ process RIBOCODE_PREPARE { -f ${fasta} \\ -o annotation \\ $args + + # Pre-build pyfasta .gdx/.flat with RiboCode's key_fn so consumers don't write to staged inputs. + python - <<'PYTHON' +from pyfasta import Fasta +def key_fn(name): + if ' ' in name: + return name.split()[0] + if '|' in name: + return name.split('|') + return name +Fasta('annotation/transcripts_sequence.fa', key_fn=key_fn) +PYTHON """ stub: @@ -36,6 +48,8 @@ process RIBOCODE_PREPARE { touch annotation/transcripts_cds.txt touch annotation/transcripts_sequence.fa + touch annotation/transcripts_sequence.fa.gdx + touch annotation/transcripts_sequence.fa.flat touch annotation/transcripts.pickle """ } diff --git a/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap b/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap index 363d03353aac..b955eef94299 100644 --- a/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap +++ b/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap @@ -28,7 +28,9 @@ [ "transcripts.pickle:md5,b83be7910166b56d09c4879d38223883", "transcripts_cds.txt:md5,6fae20439cbe378eb4db60a8bdf6a6af", - "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33" + "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33", + "transcripts_sequence.fa.flat:md5,e99a891bd574545ef72d40334b383c23", + "transcripts_sequence.fa.gdx:md5,4981ecea133628891d475215d48b9fa3" ] ] ], @@ -47,7 +49,9 @@ [ "transcripts.pickle:md5,b83be7910166b56d09c4879d38223883", "transcripts_cds.txt:md5,6fae20439cbe378eb4db60a8bdf6a6af", - "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33" + "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33", + "transcripts_sequence.fa.flat:md5,e99a891bd574545ef72d40334b383c23", + "transcripts_sequence.fa.gdx:md5,4981ecea133628891d475215d48b9fa3" ] ] ], diff --git a/modules/nf-core/ribocode/ribocode/main.nf b/modules/nf-core/ribocode/ribocode/main.nf index 0d41be9ac0fe..9abee32b4d6b 100644 --- a/modules/nf-core/ribocode/ribocode/main.nf +++ b/modules/nf-core/ribocode/ribocode/main.nf @@ -14,8 +14,8 @@ process RIBOCODE_RIBOCODE { output: - tuple val(meta), path("*.txt") , emit: orf_txt - tuple val(meta), path("*_collapsed.txt") , emit: orf_txt_collapsed + tuple val(meta), path("${prefix}.txt") , emit: orf_txt + tuple val(meta), path("${prefix}_collapsed.txt") , emit: orf_txt_collapsed tuple val(meta), path("*_ORFs_category.pdf") , emit: orf_pdf, optional: true tuple val(meta), path("*_psites.hd5") , emit: psites_hd5, optional: true tuple val("${task.process}"), val('ribocode'), eval('RiboCode --version 2>&1') , emit: versions_ribocode, topic: versions @@ -25,7 +25,7 @@ process RIBOCODE_RIBOCODE { script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ # Run RiboCode and capture output to check for errors RiboCode \\ @@ -45,7 +45,7 @@ process RIBOCODE_RIBOCODE { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" """ touch ${prefix}.txt diff --git a/modules/nf-core/ribocode/ribocode/meta.yml b/modules/nf-core/ribocode/ribocode/meta.yml index a6a382768d3a..5257d0e6c0ba 100644 --- a/modules/nf-core/ribocode/ribocode/meta.yml +++ b/modules/nf-core/ribocode/ribocode/meta.yml @@ -55,7 +55,7 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*.txt": + - ${prefix}.txt: type: file description: Text file containing all detected ORFs with detailed information pattern: "*.txt" @@ -66,7 +66,7 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - "*_collapsed.txt": + - ${prefix}_collapsed.txt: type: file description: Text file containing collapsed ORFs (merged isoforms) pattern: "*_collapsed.txt" diff --git a/modules/nf-core/ribocode/ribocode/tests/main.nf.test b/modules/nf-core/ribocode/ribocode/tests/main.nf.test index 493a228f0286..7f5bc97a1822 100644 --- a/modules/nf-core/ribocode/ribocode/tests/main.nf.test +++ b/modules/nf-core/ribocode/ribocode/tests/main.nf.test @@ -85,7 +85,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert process.out.orf_txt[0][1][0].toString().endsWith('.txt') }, + { assert process.out.orf_txt[0][1].toString().endsWith('.txt') }, { assert process.out.orf_txt_collapsed[0][1].toString().endsWith('_collapsed.txt') }, { assert process.out.orf_pdf[0][1].toString().endsWith('.pdf') }, { assert process.out.psites_hd5[0][1].toString().endsWith('.hd5') } diff --git a/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap b/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap index a29fa8fd7114..9625d262f504 100644 --- a/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap +++ b/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap @@ -7,10 +7,7 @@ "id": "test", "single_end": false }, - [ - "test.txt:md5,3c6c1f3ffff5f9c4f4e59fd4f52c56f4", - "test_collapsed.txt:md5,d1e13bb728ad0b0e79b9326c75c6e47a" - ] + "test.txt:md5,3c6c1f3ffff5f9c4f4e59fd4f52c56f4" ] ], [ diff --git a/modules/nf-core/ribotish/predict/main.nf b/modules/nf-core/ribotish/predict/main.nf index eafe4568b71a..ca6b2ccef6fd 100644 --- a/modules/nf-core/ribotish/predict/main.nf +++ b/modules/nf-core/ribotish/predict/main.nf @@ -10,7 +10,7 @@ process RIBOTISH_PREDICT { input: tuple val(meta), path(bam_ribo), path(bai_ribo) tuple val(meta2), path(bam_ti), path(bai_ti) - tuple val(meta3), path(fasta), path(gtf) + tuple val(meta3), path(fasta), path(gtf), path(reference_gtf, stageAs: 'secondary.gtf') tuple val(meta4), path(candidate_orfs) tuple val(meta5), path(para_ribo) tuple val(meta6), path(para_ti) @@ -27,6 +27,7 @@ process RIBOTISH_PREDICT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def reference_gtf_arg = reference_gtf ? "-a ${reference_gtf}" : '' ribo_bam_cmd = '' ti_bam_cmd = '' @@ -48,6 +49,7 @@ process RIBOTISH_PREDICT { $ti_bam_cmd \\ -f $fasta \\ -g $gtf \\ + $reference_gtf_arg \\ -o ${prefix}_pred.txt \\ --allresult ${prefix}_all.txt \\ --transprofile ${prefix}_transprofile.py \\ diff --git a/modules/nf-core/ribotish/predict/meta.yml b/modules/nf-core/ribotish/predict/meta.yml index a0cb0b93aeb4..98957a9b141e 100644 --- a/modules/nf-core/ribotish/predict/meta.yml +++ b/modules/nf-core/ribotish/predict/meta.yml @@ -64,6 +64,13 @@ input: GTF-format annotation file for reference sequences used in the bam file pattern: "*.gtf" ontologies: [] + - reference_gtf: + type: file + description: | + Optional secondary GTF annotation passed to ribotish as `-a` (e.g. a + MANE/RefSeq overlay). Pass `[]` to omit. + pattern: "*.gtf" + ontologies: [] - - meta4: type: map description: | diff --git a/modules/nf-core/ribotish/predict/tests/main.nf.test b/modules/nf-core/ribotish/predict/tests/main.nf.test index e20e29820735..3b0c96707e39 100644 --- a/modules/nf-core/ribotish/predict/tests/main.nf.test +++ b/modules/nf-core/ribotish/predict/tests/main.nf.test @@ -38,7 +38,8 @@ nextflow_process { input[2] = GUNZIP.out.gunzip.map{[ [id:'homo_sapiens_chr20'], it[1], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true), + [] ]} input[3] = Channel.of([[],[]]) input[4] = Channel.of([[],[]]) @@ -74,7 +75,8 @@ nextflow_process { input[2] = GUNZIP.out.gunzip.map{[ [id:'homo_sapiens_chr20'], it[1], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true), + [] ]} input[3] = Channel.of([[],[]]) input[4] = Channel.of([[],[]]) @@ -114,7 +116,8 @@ nextflow_process { input[2] = GUNZIP.out.gunzip.map{[ [id:'homo_sapiens_chr20'], it[1], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true), + [] ]} input[3] = Channel.of([[],[]]) input[4] = Channel.of([[],[]]) @@ -156,7 +159,8 @@ nextflow_process { input[2] = GUNZIP.out.gunzip.map{[ [id:'homo_sapiens_chr20'], it[1], - file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true) + file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true), + [] ]} input[3] = Channel.of([[],[]]) input[4] = Channel.of([[],[]])