diff --git a/conf/modules.config b/conf/modules.config index 7f87441..39d2e37 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -145,6 +145,7 @@ process { // withName: 'GENOME_UPLOAD' { + ext.args = "--force" // ensures that ENA metadata is re-downloaded even if cache exists publishDir = [ path: { "${params.outdir}/${params.mode}/upload/manifests" }, mode: params.publish_dir_mode, diff --git a/modules/local/count_rna/main.nf b/modules/local/count_rna/main.nf index ebd4154..ca9876e 100644 --- a/modules/local/count_rna/main.nf +++ b/modules/local/count_rna/main.nf @@ -11,6 +11,8 @@ process COUNT_RNA { input: tuple val(meta), path(trnas_stats), path(rrna_gff) + val min_trna_count + val min_rrna_percentage output: tuple val(meta), path("*rna_decision.tsv"), emit: rna_decision @@ -22,8 +24,8 @@ process COUNT_RNA { --trna ${trnas_stats} \\ --rrna ${rrna_gff} \\ --name ${meta.id} \\ - --trna-limit ${params.trna_limit} \\ - --rrna-limit ${params.rrna_limit} \\ + --trna-limit ${min_trna_count} \\ + --rrna-limit ${min_rrna_percentage} \\ --output ${meta.id}_rna_decision.tsv cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/count_rna/meta.yml b/modules/local/count_rna/meta.yml index 5cbb5fa..d7e92f9 100644 --- a/modules/local/count_rna/meta.yml +++ b/modules/local/count_rna/meta.yml @@ -30,6 +30,12 @@ input: type: file description: rRNA annotation file from barrnap (GFF3 format) pattern: "*.gff" + - min_trna_count: + type: value + description: Minimum required number of tRNA genes. + - min_rrna_percentage: + type: value + description: Minimum required percentage coverage for each rRNA subunit. output: - meta: diff --git a/modules/local/count_rna/tests/main.nf.test b/modules/local/count_rna/tests/main.nf.test index dbb8543..e687fbb 100644 --- a/modules/local/count_rna/tests/main.nf.test +++ b/modules/local/count_rna/tests/main.nf.test @@ -16,6 +16,8 @@ nextflow_process { file("$moduleDir/tests/data/ecoli.stats", checkIfExists: true), file("$moduleDir/tests/data/ecoli_bac.gff", checkIfExists: true) ] + input[1] = 18 + input[2] = 80 """ } } @@ -49,6 +51,8 @@ nextflow_process { file("$moduleDir/tests/data/ecoli.stats", checkIfExists: true), file("$moduleDir/tests/data/ecoli_bac.gff", checkIfExists: true) ] + input[1] = 18 + input[2] = 80 """ } } diff --git a/modules/local/ena_webin_cli_wrapper/main.nf b/modules/local/ena_webin_cli_wrapper/main.nf index 7918ce1..b3a33b2 100644 --- a/modules/local/ena_webin_cli_wrapper/main.nf +++ b/modules/local/ena_webin_cli_wrapper/main.nf @@ -11,16 +11,18 @@ process ENA_WEBIN_CLI_WRAPPER { input: tuple val(meta), path(submission_item), path(manifest) path(webin_cli_jar) + val test_upload + val webincli_submit output: tuple val(meta), path("*_accessions.tsv"), emit: accessions path "versions.yml", emit: versions script: - def args = task.ext.args ?: "" - def prefix = task.ext.prefix ?: "${meta.id}" - def test_flag = params.test_upload ? "--test" : "" - def submit_or_validate = params.webincli_submit ? "--mode submit": "--mode validate" + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def test_flag = test_upload ? "--test" : "" + def submit_or_validate = webincli_submit ? "--mode submit": "--mode validate" """ # change FASTA path in manifest to current workdir diff --git a/modules/local/ena_webin_cli_wrapper/meta.yml b/modules/local/ena_webin_cli_wrapper/meta.yml index a0abd93..ee7993d 100644 --- a/modules/local/ena_webin_cli_wrapper/meta.yml +++ b/modules/local/ena_webin_cli_wrapper/meta.yml @@ -31,19 +31,32 @@ input: type: file description: | The Webin-CLI JAR file downloaded by ena_webin_cli_download. - pattern: "webin-cli-*.jar" + pattern: "webin-cli-*.jar" + - - test_upload: + type: value + description: Whether to run ENA Webin-CLI in test mode. + - - webincli_submit: + type: value + description: Whether to submit (`true`) or only validate (`false`). output: - - - accessions: - type: file - description: | - TSV file containing the accession assigned by ENA for the submitted item. - File has two columns: "alias" and "accession". - - - versions: - - "versions.yml": - type: file - description: File containing software versions. - pattern: "versions.yml" + - accessions: + - meta: + type: map + description: | + Groovy Map containing sample information. + e.g. `[ id:'sample1' ]` + - "*_accessions.tsv": + type: file + description: | + TSV file containing the accession assigned by ENA for the submitted item. + File has two columns: "alias" and "accession". + pattern: "*_accessions.tsv" + - versions: + - "versions.yml": + type: file + description: File containing software versions. + pattern: "versions.yml" authors: - "@KateSakharova" diff --git a/modules/local/generate_assembly_manifest/main.nf b/modules/local/generate_assembly_manifest/main.nf index fcaf32f..aefc0a7 100644 --- a/modules/local/generate_assembly_manifest/main.nf +++ b/modules/local/generate_assembly_manifest/main.nf @@ -7,6 +7,7 @@ process GENERATE_ASSEMBLY_MANIFEST { input: tuple val(meta), path(assembly_fasta), path(data_csv) val(assembly_study) + val(is_tpa) output: tuple val(meta), path("${assembly_study}_upload/*.manifest") , emit: manifest @@ -18,7 +19,7 @@ process GENERATE_ASSEMBLY_MANIFEST { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def tpa = params.upload_tpa ? "--tpa" : "" + def tpa = is_tpa ? "--tpa" : "" """ assembly_manifest \\ --study ${assembly_study} \\ diff --git a/modules/local/generate_assembly_manifest/meta.yml b/modules/local/generate_assembly_manifest/meta.yml index da69a3b..2d58258 100644 --- a/modules/local/generate_assembly_manifest/meta.yml +++ b/modules/local/generate_assembly_manifest/meta.yml @@ -31,6 +31,9 @@ input: description: | Pre-existing study ID to submit to if available. Must exist in the webin account. + - is_tpa: + type: value + description: Whether assembly being submitted is a TPA (Third Party Annotation). output: - meta: diff --git a/modules/local/generate_assembly_manifest/tests/main.nf.test b/modules/local/generate_assembly_manifest/tests/main.nf.test index 2b96eae..22fea31 100644 --- a/modules/local/generate_assembly_manifest/tests/main.nf.test +++ b/modules/local/generate_assembly_manifest/tests/main.nf.test @@ -18,6 +18,7 @@ nextflow_process { file(params.pipelines_testdata_base_path + "/samplesheets/samplesheet_generatemanifest.csv", checkIfExists: true) ] input[1] = "PRJ12345" + input[2] = false """ } @@ -62,6 +63,7 @@ nextflow_process { file('test_samplesheet_generatemanifest.csv') ] input[1] = "PRJ12345" + input[2] = false """ } } diff --git a/modules/local/genome_upload/main.nf b/modules/local/genome_upload/main.nf index 221e8fe..0000d25 100644 --- a/modules/local/genome_upload/main.nf +++ b/modules/local/genome_upload/main.nf @@ -9,6 +9,9 @@ process GENOME_UPLOAD { path(table_for_upload) val(mags_or_bins_flag) val(submission_study) + val(centre_name) + val(is_tpa) + val(test_upload) output: path "results/{MAG,bin}_upload/manifests*/*.manifest" , emit: manifests @@ -22,19 +25,17 @@ process GENOME_UPLOAD { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def tpa = params.upload_tpa ? "--tpa" : "" - def force = params.upload_force ? "--force" : "" - def mode = (!params.test_upload) ? "--live" : "" + def args = task.ext.args ?: '' + def tpa = is_tpa ? "--tpa" : "" + def mode = (!test_upload) ? "--live" : "" """ genome_upload \\ - -u $submission_study \\ + -u ${submission_study} \\ --genome_info ${table_for_upload} \\ - --centre_name $params.centre_name \\ + --centre_name ${centre_name} \\ --${mags_or_bins_flag} \\ ${tpa} \\ - ${force} \\ ${mode} \\ --out results \\ ${args} diff --git a/modules/local/genome_upload/meta.yml b/modules/local/genome_upload/meta.yml index 0a33443..d04f33a 100644 --- a/modules/local/genome_upload/meta.yml +++ b/modules/local/genome_upload/meta.yml @@ -13,11 +13,6 @@ tools: documentation: https://github.com/EBI-Metagenomics/genome_uploader licence: ["Apache License"] input: - - meta: - type: map - description: | - Groovy Map containing sample information. - e.g. [ id:'test', single_end:false ] - table_for_upload: type: file description: | @@ -26,16 +21,26 @@ input: type: file description: File in FASTA format containing targeted mag/bin/assembly for submission pattern: "*.{fasta,fna,fas,fa}*" + - mags_or_bins_flag: + type: value + description: Upload mode selector. Expected values are `mags` or `bins`. + - submission_study: + type: value + description: ENA study accession used for submission. + - centre_name: + type: value + description: Submitting centre name required by ENA. + - is_tpa: + type: value + description: Whether genomes are submitted as TPA (Third Party Annotation). + - test_upload: + type: value + description: Whether to run in ENA test mode. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - manifests: type: file - description: Maniseft file required for submission with webin-cli + description: Manifest file required for submission with webin-cli pattern: "*.manifest" - ena_upload_backup_json: type: file diff --git a/nextflow.config b/nextflow.config index 21871a0..7807171 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,8 +16,6 @@ params { submission_study = null centre_name = null upload_tpa = false - // TODO: remove this parameter because it will never be used, and update the genome_uploader module accordingly - upload_force = true test_upload = true webincli_submit = true webin_cli_version = "9.0.3" diff --git a/nextflow_schema.json b/nextflow_schema.json index 9f52527..155039d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -292,12 +292,6 @@ "default": true, "help": "Use that flag for development purposes or for validation of your submissions. Submission would be done to TEST ENA server instead of LIVE server. TEST server holds data 24h and removes afterwords. LIVE server does not have option to modify your submission, you can only suppress data and re-upload" }, - "upload_force": { - "type": "boolean", - "description": "Enables force mode for genome_uploader (used for MAGs/BINs submission)", - "default": true, - "help": "Forces reset of bin/MAG sample xmls generation. This is useful if you changed something in your tsv table, or if ENA metadata haven't been downloaded correctly (you can check this in ENA_backup.json). Default: true" - }, "submission_study": { "type": "string", "description": "ENA study accession (PRJ/ERP) to submit the data to", @@ -319,7 +313,7 @@ }, "webin_cli_version": { "type": "string", - "description": "Version of webon-cli.jar to use for submission", + "description": "Version of webin-cli.jar to use for submission", "default": "9.0.3", "help": "Check version https://github.com/enasequence/webin-cli" } diff --git a/subworkflows/local/genome_evaluation.nf b/subworkflows/local/genome_evaluation.nf index 6eb4fd9..1324339 100644 --- a/subworkflows/local/genome_evaluation.nf +++ b/subworkflows/local/genome_evaluation.nf @@ -20,7 +20,11 @@ include { CHECKM2_PREDICT } from '../../modules/nf-core/checkm2/predict workflow GENOME_EVALUATION { take: - ch_fasta // channel: [ val(meta), path(fasta) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + ch_checkm2_db // channel: [ val(meta), path(db) ] - pre-built db as directory + // provide channel.empty() to trigger automatic download via ch_checkm2_db_zenodo_id + ch_checkm2_db_zenodo_id // channel: [ val(meta), val(db_id) ] - db ID for CHECKM2_DATABASEDOWNLOAD (e.g. '1234567') + // only used if ch_checkm2_db is empty main: ch_versions = channel.empty() @@ -29,24 +33,19 @@ workflow GENOME_EVALUATION { // Database preparation // - if (!params.checkm2_db || !file(params.checkm2_db).exists()) { - // Conditional download: only trigger if ch_fasta has items - ch_download_trigger = ch_fasta - .map { _meta, _fasta -> params.checkm2_db_zenodo_id } - .first() // Only need one trigger regardless of how many fasta files + // Download and prepare db from scratch if no pre-built db provided + // Only trigger if ch_fasta has items + ch_download_trigger = ch_checkm2_db + .count() + .filter { count -> count == 0 } // Only proceed if ch_checkm2_db is empty + .combine(ch_fasta.first()) + .combine(ch_checkm2_db_zenodo_id) + .map { _count, _meta, _fasta, db_meta, db_id -> [db_meta, db_id] } - CHECKM2_DATABASEDOWNLOAD(ch_download_trigger) - ch_checkm2_db = CHECKM2_DATABASEDOWNLOAD.out.database - } - else { - // Use existing database - ch_checkm2_db = channel.of( - [ - [id: "checkm2_db"], - file(params.checkm2_db), - ] - ) - } + CHECKM2_DATABASEDOWNLOAD(ch_download_trigger) + + // Combine db sources - one of these channels will be empty depending on inputs + ch_db = ch_checkm2_db.mix(CHECKM2_DATABASEDOWNLOAD.out.database).first() // // Genome evaluation @@ -54,7 +53,7 @@ workflow GENOME_EVALUATION { CHECKM2_PREDICT( ch_fasta, - ch_checkm2_db, + ch_db, ) emit: diff --git a/subworkflows/local/rna_detection.nf b/subworkflows/local/rna_detection.nf index d0130f6..85a83e5 100644 --- a/subworkflows/local/rna_detection.nf +++ b/subworkflows/local/rna_detection.nf @@ -20,7 +20,9 @@ include { TRNASCANSE } from '../../modules/nf-core/trnascanse' workflow RNA_DETECTION { take: - fasta + fasta // channel: [ val(meta), path(fasta) ] + min_trna_count // val: int - minimum number of tRNAs required to pass MIMAG standard (e.g. 18) + min_rrna_percentage // val: float - minimum percentage of rRNA genes required to pass MIMAG standard (e.g. 75) main: @@ -36,7 +38,9 @@ workflow RNA_DETECTION { ch_versions = ch_versions.mix( TRNASCANSE.out.versions ) COUNT_RNA( - TRNASCANSE.out.stats.join(BARRNAP.out.gff) + TRNASCANSE.out.stats.join(BARRNAP.out.gff), + min_trna_count, + min_rrna_percentage ) ch_versions = ch_versions.mix( COUNT_RNA.out.versions ) diff --git a/workflows/assemblysubmit.nf b/workflows/assemblysubmit.nf index 5743f7d..146506f 100644 --- a/workflows/assemblysubmit.nf +++ b/workflows/assemblysubmit.nf @@ -165,7 +165,8 @@ workflow ASSEMBLYSUBMIT { // Generate assembly manifest files and submit them to ENA GENERATE_ASSEMBLY_MANIFEST( assemblies_with_coverage.join(assembly_metadata_csv), - study_accession_ch.first() + study_accession_ch.first(), + params.upload_tpa ) ENA_WEBIN_CLI_DOWNLOAD ( @@ -174,7 +175,9 @@ workflow ASSEMBLYSUBMIT { SUBMIT ( assemblies_with_coverage.join(GENERATE_ASSEMBLY_MANIFEST.out.manifest), - ENA_WEBIN_CLI_DOWNLOAD.out.webin_cli_jar + ENA_WEBIN_CLI_DOWNLOAD.out.webin_cli_jar, + params.test_upload, + params.webincli_submit ) // diff --git a/workflows/genomesubmit.nf b/workflows/genomesubmit.nf index a17299c..a17e809 100644 --- a/workflows/genomesubmit.nf +++ b/workflows/genomesubmit.nf @@ -129,7 +129,9 @@ workflow GENOMESUBMIT { .set { branched_rna_results } RNA_DETECTION ( - branched_rna_results.rna_prediction_input + branched_rna_results.rna_prediction_input, + params.trna_limit, + params.rrna_limit ) ch_versions = ch_versions.mix( RNA_DETECTION.out.versions ) @@ -151,8 +153,19 @@ workflow GENOMESUBMIT { } .set { branched_stats_results } + // build input structures for CheckM2 DB depending on what provided as input + def checkm2_db_input = params.checkm2_db + ? channel.of( [['id': 'CHECKM2_DB'], file(params.checkm2_db)] ) + : channel.empty() + + def checkm2_db_id_input = (!params.checkm2_db && params.checkm2_db_download_id) + ? channel.of( [['id': 'CHECKM2_DB_id'], params.checkm2_db_download_id] ) + : channel.empty() + GENOME_EVALUATION ( - branched_stats_results.genome_evaluation_input + branched_stats_results.genome_evaluation_input, + checkm2_db_input, + checkm2_db_id_input ) // Create a value channel with the version string @@ -283,8 +296,11 @@ workflow GENOMESUBMIT { CREATE_MANIFESTS( fasta_updated_with_stats.map{meta, fasta -> fasta}.collect(), genome_metadata_csv, - params.mode, // mags or bins - study_accession_ch.first() + mags_or_bins_flag, // mags or bins + study_accession_ch.first(), + params.centre_name, + params.upload_tpa, + params.test_upload ) // All manifests were generated in one run @@ -314,7 +330,9 @@ workflow GENOMESUBMIT { SUBMIT ( ch_combined, - ENA_WEBIN_CLI_DOWNLOAD.out.webin_cli_jar + ENA_WEBIN_CLI_DOWNLOAD.out.webin_cli_jar, + params.test_upload, + params.webincli_submit ) //