From 955d7913523b4f20bb8fafecb40a426116c572a4 Mon Sep 17 00:00:00 2001
From: Jonathan Manning <jonathan.manning@seqera.io>
Date: Mon, 18 May 2026 16:08:09 +0100
Subject: [PATCH] feat(ribocode,ribotish): pyfasta indexes, prefix-scoped
 outputs, optional ribotish -a

Bundles three in-place module changes carried in nf-core/riboseq#174.

ribocode/prepare: pre-build the pyfasta .gdx/.flat indexes for
annotation/transcripts_sequence.fa using the same key_fn RiboCode applies
internally (split on first space, else split on '|'). Downstream RiboCode
tasks otherwise lazily build those sidecars inside the staged input
directory, which fails under Fusion staging because writes leak back to
the upstream task's S3 prefix.

ribocode/ribocode: scope the orf_txt and orf_txt_collapsed output globs to
${prefix}.txt and ${prefix}_collapsed.txt rather than *.txt/*_collapsed.txt
so multi-instance publication is unambiguous. The prefix binding is
promoted out of `def` in both the script and stub blocks so it resolves at
the output-glob stage (Nextflow 26 strict parser rejects redeclaration of
the same name across script/stub if either uses `def`). The existing
stub-test assertion that indexed orf_txt[0][1][0] is adjusted to the new
single-file shape.

ribotish/predict: extend the fasta/gtf input tuple with an optional fourth
path, reference_gtf, plumbed to ribotish predict as `-a <gtf>` when
populated. BREAKING signature change for callers: every emitter must
supply a fourth element in the third tuple (use `[]` for the no-op case).

Source: nf-core/riboseq#174

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 modules/nf-core/ribocode/prepare/main.nf           | 14 ++++++++++++++
 .../ribocode/prepare/tests/main.nf.test.snap       |  8 ++++++--
 modules/nf-core/ribocode/ribocode/main.nf          |  8 ++++----
 modules/nf-core/ribocode/ribocode/meta.yml         |  4 ++--
 .../nf-core/ribocode/ribocode/tests/main.nf.test   |  2 +-
 .../ribocode/ribocode/tests/main.nf.test.snap      |  5 +----
 modules/nf-core/ribotish/predict/main.nf           |  4 +++-
 modules/nf-core/ribotish/predict/meta.yml          |  7 +++++++
 .../nf-core/ribotish/predict/tests/main.nf.test    | 12 ++++++++----
 9 files changed, 46 insertions(+), 18 deletions(-)
diff --git a/modules/nf-core/ribocode/prepare/main.nf b/modules/nf-core/ribocode/prepare/main.nf
index d61653fceb11..e02739ed21a7 100644
--- a/modules/nf-core/ribocode/prepare/main.nf
+++ b/modules/nf-core/ribocode/prepare/main.nf
@@ -27,6 +27,18 @@ process RIBOCODE_PREPARE {
         -f ${fasta} \\
         -o annotation \\
         $args
+
+    # Pre-build pyfasta .gdx/.flat with RiboCode's key_fn so consumers don't write to staged inputs.
+    python - <<'PYTHON'
+from pyfasta import Fasta
+def key_fn(name):
+    if ' ' in name:
+        return name.split()[0]
+    if '|' in name:
+        return name.split('|')
+    return name
+Fasta('annotation/transcripts_sequence.fa', key_fn=key_fn)
+PYTHON
     """
 
     stub:
@@ -36,6 +48,8 @@ process RIBOCODE_PREPARE {
 
     touch annotation/transcripts_cds.txt
     touch annotation/transcripts_sequence.fa
+    touch annotation/transcripts_sequence.fa.gdx
+    touch annotation/transcripts_sequence.fa.flat
     touch annotation/transcripts.pickle
     """
 }
diff --git a/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap b/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap
index 363d03353aac..b955eef94299 100644
--- a/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap
+++ b/modules/nf-core/ribocode/prepare/tests/main.nf.test.snap
@@ -28,7 +28,9 @@
                         [
                             "transcripts.pickle:md5,b83be7910166b56d09c4879d38223883",
                             "transcripts_cds.txt:md5,6fae20439cbe378eb4db60a8bdf6a6af",
-                            "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33"
+                            "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33",
+                            "transcripts_sequence.fa.flat:md5,e99a891bd574545ef72d40334b383c23",
+                            "transcripts_sequence.fa.gdx:md5,4981ecea133628891d475215d48b9fa3"
                         ]
                     ]
                 ],
@@ -47,7 +49,9 @@
                         [
                             "transcripts.pickle:md5,b83be7910166b56d09c4879d38223883",
                             "transcripts_cds.txt:md5,6fae20439cbe378eb4db60a8bdf6a6af",
-                            "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33"
+                            "transcripts_sequence.fa:md5,b0401ee625d655ea116528507b038c33",
+                            "transcripts_sequence.fa.flat:md5,e99a891bd574545ef72d40334b383c23",
+                            "transcripts_sequence.fa.gdx:md5,4981ecea133628891d475215d48b9fa3"
                         ]
                     ]
                 ],
diff --git a/modules/nf-core/ribocode/ribocode/main.nf b/modules/nf-core/ribocode/ribocode/main.nf
index 0d41be9ac0fe..9abee32b4d6b 100644
--- a/modules/nf-core/ribocode/ribocode/main.nf
+++ b/modules/nf-core/ribocode/ribocode/main.nf
@@ -14,8 +14,8 @@ process RIBOCODE_RIBOCODE {
 
     output:
 
-    tuple val(meta), path("*.txt")                                                  , emit: orf_txt
-    tuple val(meta), path("*_collapsed.txt")                                        , emit: orf_txt_collapsed
+    tuple val(meta), path("${prefix}.txt")                                          , emit: orf_txt
+    tuple val(meta), path("${prefix}_collapsed.txt")                                , emit: orf_txt_collapsed
     tuple val(meta), path("*_ORFs_category.pdf")                                    , emit: orf_pdf, optional: true
     tuple val(meta), path("*_psites.hd5")                                           , emit: psites_hd5, optional: true
     tuple val("${task.process}"), val('ribocode'), eval('RiboCode --version  2>&1') , emit: versions_ribocode, topic: versions
@@ -25,7 +25,7 @@ process RIBOCODE_RIBOCODE {
 
     script:
     def args = task.ext.args ?: ''
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    prefix = task.ext.prefix ?: "${meta.id}"
     """
     # Run RiboCode and capture output to check for errors
     RiboCode \\
@@ -45,7 +45,7 @@ process RIBOCODE_RIBOCODE {
     """
 
     stub:
-    def prefix = task.ext.prefix ?: "${meta.id}"
+    prefix = task.ext.prefix ?: "${meta.id}"
 
     """
     touch ${prefix}.txt
diff --git a/modules/nf-core/ribocode/ribocode/meta.yml b/modules/nf-core/ribocode/ribocode/meta.yml
index a6a382768d3a..5257d0e6c0ba 100644
--- a/modules/nf-core/ribocode/ribocode/meta.yml
+++ b/modules/nf-core/ribocode/ribocode/meta.yml
@@ -55,7 +55,7 @@ output:
           description: |
             Groovy Map containing sample information
             e.g. [ id:'test', single_end:false ]
-      - "*.txt":
+      - ${prefix}.txt:
           type: file
           description: Text file containing all detected ORFs with detailed information
           pattern: "*.txt"
@@ -66,7 +66,7 @@ output:
           description: |
             Groovy Map containing sample information
             e.g. [ id:'test', single_end:false ]
-      - "*_collapsed.txt":
+      - ${prefix}_collapsed.txt:
           type: file
           description: Text file containing collapsed ORFs (merged isoforms)
           pattern: "*_collapsed.txt"
diff --git a/modules/nf-core/ribocode/ribocode/tests/main.nf.test b/modules/nf-core/ribocode/ribocode/tests/main.nf.test
index 493a228f0286..7f5bc97a1822 100644
--- a/modules/nf-core/ribocode/ribocode/tests/main.nf.test
+++ b/modules/nf-core/ribocode/ribocode/tests/main.nf.test
@@ -85,7 +85,7 @@ nextflow_process {
         then {
             assertAll(
                 { assert process.success },
-                { assert process.out.orf_txt[0][1][0].toString().endsWith('.txt') },
+                { assert process.out.orf_txt[0][1].toString().endsWith('.txt') },
                 { assert process.out.orf_txt_collapsed[0][1].toString().endsWith('_collapsed.txt') },
                 { assert process.out.orf_pdf[0][1].toString().endsWith('.pdf') },
                 { assert process.out.psites_hd5[0][1].toString().endsWith('.hd5') }
diff --git a/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap b/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap
index a29fa8fd7114..9625d262f504 100644
--- a/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap
+++ b/modules/nf-core/ribocode/ribocode/tests/main.nf.test.snap
@@ -7,10 +7,7 @@
                         "id": "test",
                         "single_end": false
                     },
-                    [
-                        "test.txt:md5,3c6c1f3ffff5f9c4f4e59fd4f52c56f4",
-                        "test_collapsed.txt:md5,d1e13bb728ad0b0e79b9326c75c6e47a"
-                    ]
+                    "test.txt:md5,3c6c1f3ffff5f9c4f4e59fd4f52c56f4"
                 ]
             ],
             [
diff --git a/modules/nf-core/ribotish/predict/main.nf b/modules/nf-core/ribotish/predict/main.nf
index eafe4568b71a..ca6b2ccef6fd 100644
--- a/modules/nf-core/ribotish/predict/main.nf
+++ b/modules/nf-core/ribotish/predict/main.nf
@@ -10,7 +10,7 @@ process RIBOTISH_PREDICT {
     input:
     tuple val(meta), path(bam_ribo), path(bai_ribo)
     tuple val(meta2), path(bam_ti), path(bai_ti)
-    tuple val(meta3), path(fasta), path(gtf)
+    tuple val(meta3), path(fasta), path(gtf), path(reference_gtf, stageAs: 'secondary.gtf')
     tuple val(meta4), path(candidate_orfs)
     tuple val(meta5), path(para_ribo)
     tuple val(meta6), path(para_ti)
@@ -27,6 +27,7 @@ process RIBOTISH_PREDICT {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
+    def reference_gtf_arg = reference_gtf ? "-a ${reference_gtf}" : ''
 
     ribo_bam_cmd = ''
     ti_bam_cmd = ''
@@ -48,6 +49,7 @@ process RIBOTISH_PREDICT {
         $ti_bam_cmd \\
         -f $fasta \\
         -g $gtf \\
+        $reference_gtf_arg \\
         -o ${prefix}_pred.txt \\
         --allresult ${prefix}_all.txt \\
         --transprofile ${prefix}_transprofile.py \\
diff --git a/modules/nf-core/ribotish/predict/meta.yml b/modules/nf-core/ribotish/predict/meta.yml
index a0cb0b93aeb4..98957a9b141e 100644
--- a/modules/nf-core/ribotish/predict/meta.yml
+++ b/modules/nf-core/ribotish/predict/meta.yml
@@ -64,6 +64,13 @@ input:
           GTF-format annotation file for reference sequences used in the bam file
         pattern: "*.gtf"
         ontologies: []
+    - reference_gtf:
+        type: file
+        description: |
+          Optional secondary GTF annotation passed to ribotish as `-a` (e.g. a
+          MANE/RefSeq overlay). Pass `[]` to omit.
+        pattern: "*.gtf"
+        ontologies: []
   - - meta4:
         type: map
         description: |
diff --git a/modules/nf-core/ribotish/predict/tests/main.nf.test b/modules/nf-core/ribotish/predict/tests/main.nf.test
index e20e29820735..3b0c96707e39 100644
--- a/modules/nf-core/ribotish/predict/tests/main.nf.test
+++ b/modules/nf-core/ribotish/predict/tests/main.nf.test
@@ -38,7 +38,8 @@ nextflow_process {
                 input[2] = GUNZIP.out.gunzip.map{[
                     [id:'homo_sapiens_chr20'],
                     it[1],
-                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true)
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true),
+                    []
                 ]}
                 input[3] = Channel.of([[],[]])
                 input[4] = Channel.of([[],[]])
@@ -74,7 +75,8 @@ nextflow_process {
                 input[2] = GUNZIP.out.gunzip.map{[
                     [id:'homo_sapiens_chr20'],
                     it[1],
-                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true)
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true),
+                    []
                 ]}
                 input[3] = Channel.of([[],[]])
                 input[4] = Channel.of([[],[]])
@@ -114,7 +116,8 @@ nextflow_process {
                 input[2] = GUNZIP.out.gunzip.map{[
                     [id:'homo_sapiens_chr20'],
                     it[1],
-                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true)
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true),
+                    []
                 ]}
                 input[3] = Channel.of([[],[]])
                 input[4] = Channel.of([[],[]])
@@ -156,7 +159,8 @@ nextflow_process {
                 input[2] = GUNZIP.out.gunzip.map{[
                     [id:'homo_sapiens_chr20'],
                     it[1],
-                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true)
+                    file(params.modules_testdata_base_path + "genomics/homo_sapiens/riboseq_expression/Homo_sapiens.GRCh38.111_chr20.gtf", checkIfExists: true),
+                    []
                 ]}
                 input[3] = Channel.of([[],[]])
                 input[4] = Channel.of([[],[]])