nf-core · Darcy220606 · May 21, 2024 · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/subworkflows/nf-core/mmseqs_contig_taxonomy/main.nf b/subworkflows/nf-core/mmseqs_contig_taxonomy/main.nf
@@ -0,0 +1,56 @@
+include { MMSEQS_CREATEDB  } from '../../../modules/nf-core/mmseqs/createdb/main'
+include { MMSEQS_DATABASES } from '../../../modules/nf-core/mmseqs/databases/main'
+include { MMSEQS_TAXONOMY  } from '../../../modules/nf-core/mmseqs/taxonomy/main'
+include { MMSEQS_CREATETSV } from '../../../modules/nf-core/mmseqs/createtsv/main'
+
+workflow MMSEQS_CONTIG_TAXONOMY {
+
+    take:
+    contigs            // channel: tuple val(meta), path(contigs)
+    mmseqs_databases   // channel: path(mmseqs2 local db)
+    databases_id       // channel: [mmseqs2_db_id]
+
+    main:
+
+    ch_versions               = Channel.empty()
+    ch_mmseqs_db              = Channel.empty()
+    ch_taxonomy_querydb       = Channel.empty()
+    ch_taxonomy_querydb_taxdb = Channel.empty()
+    ch_taxonomy_tsv           = Channel.empty()
+
+    // Download the ref db if not supplied by user
+    // MMSEQS_DATABASE
+    if ( !mmseqs_databases.empty ) {
+        ch_mmseqs_db = Channel
+            .fromPath( mmseqs_databases )
+            .first()
+    } else {
+        MMSEQS_DATABASES ( databases_id )
+        ch_versions  = ch_versions.mix( MMSEQS_DATABASES.out.versions )
+        ch_mmseqs_db = ( MMSEQS_DATABASES.out.database )
+    }
+
+    // Create db for query contigs, assign taxonomy and convert to table format
+    // MMSEQS_CREATEDB
+    MMSEQS_CREATEDB ( contigs )
+    ch_versions         = ch_versions.mix( MMSEQS_CREATEDB.out.versions )
+    ch_taxonomy_querydb = MMSEQS_CREATEDB.out.db
+
+    // MMSEQS_TAXONOMY
+    MMSEQS_TAXONOMY ( ch_taxonomy_querydb, ch_mmseqs_db )
+    ch_versions               = ch_versions.mix( MMSEQS_TAXONOMY.out.versions )
+    ch_taxonomy_querydb_taxdb = MMSEQS_TAXONOMY.out.db_taxonomy
+
+    // MMSEQS_CREATETSV
+    MMSEQS_CREATETSV ( ch_taxonomy_querydb_taxdb, [[:],[]], ch_taxonomy_querydb )
+    ch_versions     = ch_versions.mix( MMSEQS_CREATETSV.out.versions )
+    ch_taxonomy_tsv = MMSEQS_CREATETSV.out.tsv
+
+    emit:
+    taxonomy    = ch_taxonomy_tsv           // channel: [ val(meta), tsv ]
+    db_mmseqs   = ch_mmseqs_db              // channel: [ val(meta), mmseqs_database ]
+    db_taxonomy = ch_taxonomy_querydb_taxdb // channel: [ val(meta), db_taxonomy ]
+    db_contig   = ch_taxonomy_querydb       // channel: [ val(meta), db ]
+    versions    = ch_versions               // channel: [ versions.yml ]
+}
+
diff --git a/subworkflows/nf-core/mmseqs_contig_taxonomy/meta.yml b/subworkflows/nf-core/mmseqs_contig_taxonomy/meta.yml
@@ -0,0 +1,68 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "mmseqs_contig_taxonomy"
+description: Assign taxonomy to contigs using the MMseqs2 workflow.
+keywords:
+  - metagenomics
+  - database
+  - contigs
+  - mmsesq2
+  - taxonomy
+components:
+  - mmseqs/databases
+  - mmseqs/createdb
+  - mmseqs/taxonomy
+  - mmseqs/createtsv
+input:
+  - contigs:
+      type: file
+      description: |
+        Channel containing each fasta in nucleotide format as a distinct element with meta.
+        Structure: [ val(meta), path(fasta) ]
+      pattern: "*.{fasta,fa,fna}"
+  - mmseqs_databases:
+      type: string
+      description: |
+        Channel containing a database created by mmseqs2 databases.
+        Structure: [ path(mmseqsdb) ]
+      pattern: "*/mmseqs/database"
+  - databases_id:
+      type: string
+      description: |
+        Channel containing the ID of a database made available by developers of mmseqs2. Please refer to https://github.com/soedinglab/MMseqs2/wiki#downloading-databases for possible IDs to use.
+        Structure: [ val(id) ]
+output:
+  - taxonomy:
+      type: file
+      description: |
+        Channel containing the tab seperated file with all assigned taxonomy.
+        Structure: [ val(meta), path(tsv) ]
+      pattern: "*.tsv"
+  - db_mmseqs:
+      type: directory
+      description: |
+        Channel containing the mmseqs database directory. Useful for when the databases is downloaded in the pipeline.
+        Structure: [ path(outputdir) ]
+      pattern: "*/mmseqs_database"
+  - db_taxonomy:
+      type: directory
+      description: |
+        Channel containing the database containing the taxonomic classification for each input fasta file.
+        Structure: [ path(outputdir) ]
+      pattern: "*/sample_taxonomy"
+  - db_contig:
+      type: directory
+      description: |
+        Channel containing the database containing the mmseqs format of each input fasta file.
+        Structure: [ path(outputdir) ]
+      pattern: "*/sample_db"
+  - versions:
+      type: file
+      description: |
+        File containing software versions
+        Structure: [ path(versions.yml) ]
+      pattern: "versions.yml"
+
+authors:
+  - "@darcy220606"
+maintainers:
+  - "@darcy220606"
@@ -0,0 +1,60 @@
+nextflow_workflow {
+
+    name "Test Subworkflow MMSEQS_CONTIG_TAXONOMY"
+    script "../main.nf"
+    workflow "MMSEQS_CONTIG_TAXONOMY"
+    config './nextflow.config'
+
+    tag "subworkflows"
+    tag "subworkflows_nfcore"
+    tag "subworkflows/mmseqs_contig_taxonomy"
+    tag "gunzip"
+    tag "mmseqs/createdb"
+    tag "mmseqs/databases"
+    tag "mmseqs/taxonomy"
+    tag "mmseqs/createtsv"
+
+    test("mmseqs_contig_taxonomy - bacteroides_fragilis - contig") {
+
+        setup {
+            run("GUNZIP") {
+                script "modules/nf-core/gunzip/main.nf"
+                process {
+                    """
+                    input[0] = Channel.of([
+                                [id:'bacteroides_fragilis'],
+                                file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+                            ]
+                    )
+                    """
+                }
+            }
+        }
+
+        when {
+            workflow {
+                """
+                input[0] = GUNZIP.out.gunzip
+                //input[1] = '/Net/Groups/ccdata/users/AIbrahim/nf-core/modules/mmseqs_database'
+                input[1] = []
+                input[2] = 'Kalamari'
+                """
+            }
+        }
+
+        then{
+            assertAll(
+                { assert workflow.success},
+                { assert workflow.out.db_contig.get(0).get(1) ==~ ".*/bacteroides_fragilis" },
+                { assert snapshot (
+                        workflow.out.versions,
+                        file(workflow.out.taxonomy[0][1]).readLines()[0].contains("NZ_CP069563.1"),
+                        file(workflow.out.db_taxonomy.get(0).get(1)).list().sort(),
+                        file(workflow.out.db_contig.get(0).get(1)).name,
+                        file(workflow.out.db_mmseqs.get(0)).name,
+                        ).match()
+                },
+            )
+        }
+    }
+}
@@ -0,0 +1,26 @@
+{
+    "mmseqs_contig_taxonomy - bacteroides_fragilis - contig": {
+        "content": [
+            [
+                "versions.yml:md5,394dfc0a2af83eb4c8ec9e180cb44b37",
+                "versions.yml:md5,49890601bcc79306ea202d0901d0578e",
+                "versions.yml:md5,50d8f191f53c3da260e67aa3ca64fd77",
+                "versions.yml:md5,b74ec13e6b0d418f76a233963be3c61c"
+            ],
+            true,
+            [
+                "bacteroides_fragilis.0",
+                "bacteroides_fragilis.1",
+                "bacteroides_fragilis.dbtype",
+                "bacteroides_fragilis.index"
+            ],
+            "bacteroides_fragilis",
+            "mmseqs_database"
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-05-17T11:25:44.10208454"
+    }
+}
diff --git a/subworkflows/nf-core/mmseqs_contig_taxonomy/tests/nextflow.config b/subworkflows/nf-core/mmseqs_contig_taxonomy/tests/nextflow.config
@@ -0,0 +1,8 @@
+process {
+
+    withName: MMSEQS_TAXONOMY {
+        ext.args = '--search-type 2'
+        memory = 7.GB
+    }
+
+}
@@ -0,0 +1,2 @@
+subworkflows/mmseqs_contig_taxonomy:
+  - subworkflows/nf-core/mmseqs_contig_taxonomy/**