From 640aca0f40dde38caca487449ed392cca3f7911f Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 13 Jun 2022 16:08:20 +0200 Subject: [PATCH 1/4] fix-metadata_category --- CHANGELOG.md | 2 ++ modules/local/metadata_all.nf | 21 ++++------- modules/local/qiime2_diversity_alpha.nf | 40 ++++++++------------- modules/local/qiime2_diversity_beta.nf | 48 +++++++++---------------- modules/local/qiime2_filterasv.nf | 45 +++++++---------------- subworkflows/local/qiime2_ancom.nf | 8 +++-- subworkflows/local/qiime2_diversity.nf | 3 +- workflows/ampliseq.nf | 14 +++++--- 8 files changed, 68 insertions(+), 113 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44aab9e3..d2964315 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#448](https://github.com/nf-core/ampliseq/pull/448) - Pairwise statistics will be now performed on a subset of metadata columns specified with `--metadata_category` instead of ignoring that setting. + ### `Dependencies` ### `Removed` diff --git a/modules/local/metadata_all.nf b/modules/local/metadata_all.nf index 2ac050b1..e022c6d7 100644 --- a/modules/local/metadata_all.nf +++ b/modules/local/metadata_all.nf @@ -9,24 +9,17 @@ process METADATA_ALL { input: path(metadata) - val(metadata_category) output: stdout script: - if( !metadata_category ) { - """ - metadata_all.r ${metadata} + """ + metadata_all.r ${metadata} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - R: \$(R --version 2>&1 | sed -n 1p | sed 's/R version //' | sed 's/ (.*//') - END_VERSIONS - """ - } else { - """ - printf ${metadata_category} - """ - } + cat <<-END_VERSIONS > versions.yml + "${task.process}": + R: \$(R --version 2>&1 | sed -n 1p | sed 's/R version //' | sed 's/ (.*//') + END_VERSIONS + """ } diff --git a/modules/local/qiime2_diversity_alpha.nf b/modules/local/qiime2_diversity_alpha.nf index 4a597eb5..20acd60e 100644 --- a/modules/local/qiime2_diversity_alpha.nf +++ b/modules/local/qiime2_diversity_alpha.nf @@ -6,38 +6,26 @@ process QIIME2_DIVERSITY_ALPHA { container "quay.io/qiime2/core:2021.8" input: - tuple path(metadata), path(core), val(category) + tuple path(metadata), path(core) output: path("alpha_diversity/*"), emit: alpha path "versions.yml" , emit: versions script: - if ( category.length() > 0 ) { - """ - export XDG_CONFIG_HOME="\${PWD}/HOME" + """ + export XDG_CONFIG_HOME="\${PWD}/HOME" - qiime diversity alpha-group-significance \ - --i-alpha-diversity ${core} \ - --m-metadata-file ${metadata} \ - --o-visualization ${core.baseName}-vis.qzv - qiime tools export --input-path ${core.baseName}-vis.qzv \ - --output-path "alpha_diversity/${core.baseName}" + qiime diversity alpha-group-significance \ + --i-alpha-diversity ${core} \ + --m-metadata-file ${metadata} \ + --o-visualization ${core.baseName}-vis.qzv + qiime tools export --input-path ${core.baseName}-vis.qzv \ + --output-path "alpha_diversity/${core.baseName}" - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qiime2: \$( qiime --version | sed -e "s/q2cli version //g" | tr -d '`' | sed -e "s/Run qiime info for more version details.//g" ) - END_VERSIONS - """ - } else { - """ - mkdir alpha_diversity - echo "" > "alpha_diversity/WARNING No column in ${metadata.baseName} seemed suitable.txt" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qiime2: \$( qiime --version | sed -e "s/q2cli version //g" | tr -d '`' | sed -e "s/Run qiime info for more version details.//g" ) - END_VERSIONS - """ - } + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed -e "s/q2cli version //g" | tr -d '`' | sed -e "s/Run qiime info for more version details.//g" ) + END_VERSIONS + """ } diff --git a/modules/local/qiime2_diversity_beta.nf b/modules/local/qiime2_diversity_beta.nf index 0e9b2113..49b11e3e 100644 --- a/modules/local/qiime2_diversity_beta.nf +++ b/modules/local/qiime2_diversity_beta.nf @@ -1,5 +1,5 @@ process QIIME2_DIVERSITY_BETA { - tag "${core.baseName}" + tag "${core.baseName} - ${category}" label 'process_low' conda (params.enable_conda ? { exit 1 "QIIME2 has no conda package" } : null) @@ -13,37 +13,21 @@ process QIIME2_DIVERSITY_BETA { path "versions.yml" , emit: versions script: - if ( category.length() > 0 ) { - """ - export XDG_CONFIG_HOME="\${PWD}/HOME" + """ + export XDG_CONFIG_HOME="\${PWD}/HOME" - IFS=',' read -r -a metacategory <<< \"$category\" - for j in \"\${metacategory[@]}\" - do - qiime diversity beta-group-significance \ - --i-distance-matrix ${core} \ - --m-metadata-file ${metadata} \ - --m-metadata-column \"\$j\" \ - --o-visualization ${core.baseName}-\$j.qzv \ - --p-pairwise - qiime tools export --input-path ${core.baseName}-\$j.qzv \ - --output-path beta_diversity/${core.baseName}-\$j - done + qiime diversity beta-group-significance \ + --i-distance-matrix ${core} \ + --m-metadata-file ${metadata} \ + --m-metadata-column \"${category}\" \ + --o-visualization ${core.baseName}-${category}.qzv \ + --p-pairwise + qiime tools export --input-path ${core.baseName}-${category}.qzv \ + --output-path beta_diversity/${core.baseName}-${category} - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qiime2: \$( qiime --version | sed -e "s/q2cli version //g" | tr -d '`' | sed -e "s/Run qiime info for more version details.//g" ) - END_VERSIONS - """ - } else { - """ - mkdir beta_diversity - echo "" > "beta_diversity/WARNING No column in ${metadata.baseName} seemed suitable.txt" - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qiime2: \$( qiime --version | sed -e "s/q2cli version //g" | tr -d '`' | sed -e "s/Run qiime info for more version details.//g" ) - END_VERSIONS - """ - } + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed -e "s/q2cli version //g" | tr -d '`' | sed -e "s/Run qiime info for more version details.//g" ) + END_VERSIONS + """ } diff --git a/modules/local/qiime2_filterasv.nf b/modules/local/qiime2_filterasv.nf index 22ba666a..63b911c1 100644 --- a/modules/local/qiime2_filterasv.nf +++ b/modules/local/qiime2_filterasv.nf @@ -6,44 +6,25 @@ process QIIME2_FILTERASV { container "quay.io/qiime2/core:2021.8" input: - path(metadata) - path(table) - val(category) + tuple path(metadata), path(table), val(category) output: path("*.qza") , emit: qza path "versions.yml" , emit: versions script: - if ( category.length() > 0 ) { - """ - export XDG_CONFIG_HOME="\${PWD}/HOME" + """ + export XDG_CONFIG_HOME="\${PWD}/HOME" - IFS=',' read -r -a metacategory <<< \"$category\" + qiime feature-table filter-samples \ + --i-table ${table} \ + --m-metadata-file ${metadata} \ + --p-where \"${category}<>\'\'\" \ + --o-filtered-table ${category}.qza - #remove samples that do not have any value - for j in \"\${metacategory[@]}\" - do - qiime feature-table filter-samples \ - --i-table ${table} \ - --m-metadata-file ${metadata} \ - --p-where \"\$j<>\'\'\" \ - --o-filtered-table \$j.qza - done - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qiime2: \$( qiime --version | sed -e "s/q2cli version //g" | tr -d '`' | sed -e "s/Run qiime info for more version details.//g" ) - END_VERSIONS - """ - } else { - """ - mkdir beta_diversity - echo "" > "WARNING No column in ${metadata.baseName} seemed suitable.qza" - cat <<-END_VERSIONS > versions.yml - "${task.process}": - qiime2: \$( qiime --version | sed -e "s/q2cli version //g" | tr -d '`' | sed -e "s/Run qiime info for more version details.//g" ) - END_VERSIONS - """ - } + cat <<-END_VERSIONS > versions.yml + "${task.process}": + qiime2: \$( qiime --version | sed -e "s/q2cli version //g" | tr -d '`' | sed -e "s/Run qiime info for more version details.//g" ) + END_VERSIONS + """ } diff --git a/subworkflows/local/qiime2_ancom.nf b/subworkflows/local/qiime2_ancom.nf index 21b4be75..bf061c97 100644 --- a/subworkflows/local/qiime2_ancom.nf +++ b/subworkflows/local/qiime2_ancom.nf @@ -17,12 +17,16 @@ workflow QIIME2_ANCOM { main: //Filter ASV table to get rid of samples that have no metadata values - QIIME2_FILTERASV ( ch_metadata, ch_asv, ch_metacolumn_all ) + ch_metadata + .combine( ch_asv ) + .combine( ch_metacolumn_all ) + .set{ ch_for_filterasv } + QIIME2_FILTERASV ( ch_for_filterasv ) //ANCOM on various taxonomic levels ch_taxlevel = Channel.from( tax_agglom_min..tax_agglom_max ) ch_metadata - .combine( QIIME2_FILTERASV.out.qza.flatten() ) + .combine( QIIME2_FILTERASV.out.qza ) .combine( ch_tax ) .combine( ch_taxlevel ) .set{ ch_for_ancom_tax } diff --git a/subworkflows/local/qiime2_diversity.nf b/subworkflows/local/qiime2_diversity.nf index 9508a11a..7b02a7a2 100644 --- a/subworkflows/local/qiime2_diversity.nf +++ b/subworkflows/local/qiime2_diversity.nf @@ -37,10 +37,9 @@ workflow QIIME2_DIVERSITY { //Print warning if rarefaction depth is <10000 QIIME2_DIVERSITY_CORE.out.depth.subscribe { if ( it.baseName.toString().startsWith("WARNING") ) log.warn it.baseName.toString().replace("WARNING ","QIIME2_DIVERSITY_CORE: ") } - //alpha_diversity ( ch_metadata, DIVERSITY_CORE.out.qza, ch_metacolumn_all ) + //alpha_diversity ( ch_metadata, DIVERSITY_CORE.out.qza ) ch_metadata .combine( QIIME2_DIVERSITY_CORE.out.vector.flatten() ) - .combine( ch_metacolumn_all ) .set{ ch_to_diversity_alpha } QIIME2_DIVERSITY_ALPHA ( ch_to_diversity_alpha ) diff --git a/workflows/ampliseq.nf b/workflows/ampliseq.nf index b34fe80c..2a85d1e6 100644 --- a/workflows/ampliseq.nf +++ b/workflows/ampliseq.nf @@ -76,8 +76,6 @@ if ( !is_fasta_input && (!params.FW_primer || !params.RV_primer) && !params.skip System.exit(1) } -metadata_category = params.metadata_category ?: "" - //only run QIIME2 when taxonomy is actually calculated and all required data is available if ( !params.enable_conda && !params.skip_taxonomy && !params.skip_qiime ) { run_qiime2 = true @@ -546,13 +544,19 @@ workflow AMPLISEQ { } //Select metadata categories for diversity analysis & ancom - if (!params.skip_ancom || !params.skip_diversity_indices) { - METADATA_ALL ( ch_metadata, metadata_category ).set { ch_metacolumn_all } + if (params.metadata_category) { + ch_metacolumn_all = Channel.from(params.metadata_category.tokenize(',')) + METADATA_PAIRWISE ( ch_metadata ).set { ch_metacolumn_pairwise } + ch_metacolumn_pairwise = ch_metacolumn_pairwise.splitCsv().flatten() + ch_metacolumn_pairwise = ch_metacolumn_all.join(ch_metacolumn_pairwise) + } else if (!params.skip_ancom || !params.skip_diversity_indices) { + METADATA_ALL ( ch_metadata ).set { ch_metacolumn_all } //return empty channel if no appropriate column was found ch_metacolumn_all.branch { passed: it != "" }.set { result } ch_metacolumn_all = result.passed - + ch_metacolumn_all = ch_metacolumn_all.splitCsv().flatten() METADATA_PAIRWISE ( ch_metadata ).set { ch_metacolumn_pairwise } + ch_metacolumn_pairwise = ch_metacolumn_pairwise.splitCsv().flatten() } else { ch_metacolumn_all = Channel.empty() ch_metacolumn_pairwise = Channel.empty() From 8fe10d0b5a3bad37e04e7ca83a7d9322711393c7 Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 13 Jun 2022 16:16:55 +0200 Subject: [PATCH 2/4] update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d2964315..55076c96 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#448](https://github.com/nf-core/ampliseq/pull/448) - Pairwise statistics will be now performed on a subset of metadata columns specified with `--metadata_category` instead of ignoring that setting. +- [#451](https://github.com/nf-core/ampliseq/pull/451) - Pairwise statistics will be now performed on a subset of metadata columns specified with `--metadata_category` instead of ignoring that setting. ### `Dependencies` From 06d010588b60502f9a64752597e7197b9db924bf Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 13 Jun 2022 16:26:55 +0200 Subject: [PATCH 3/4] Replace busybox with Ubuntu base image --- modules/local/format_taxonomy.nf | 4 ++-- modules/local/format_taxonomy_qiime.nf | 4 ++-- modules/local/rename_raw_data_files.nf | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/format_taxonomy.nf b/modules/local/format_taxonomy.nf index bcb4edd0..959279de 100644 --- a/modules/local/format_taxonomy.nf +++ b/modules/local/format_taxonomy.nf @@ -3,8 +3,8 @@ process FORMAT_TAXONOMY { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: path(database) diff --git a/modules/local/format_taxonomy_qiime.nf b/modules/local/format_taxonomy_qiime.nf index bd7bb3ae..e93aa9c6 100644 --- a/modules/local/format_taxonomy_qiime.nf +++ b/modules/local/format_taxonomy_qiime.nf @@ -4,8 +4,8 @@ process FORMAT_TAXONOMY_QIIME { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: path(database) diff --git a/modules/local/rename_raw_data_files.nf b/modules/local/rename_raw_data_files.nf index 7695a851..3bf582d5 100644 --- a/modules/local/rename_raw_data_files.nf +++ b/modules/local/rename_raw_data_files.nf @@ -4,8 +4,8 @@ process RENAME_RAW_DATA_FILES { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(reads) From 4211134c6cec0234b8eb5f57bec7a1a3947f2207 Mon Sep 17 00:00:00 2001 From: d4straub Date: Mon, 13 Jun 2022 16:28:34 +0200 Subject: [PATCH 4/4] update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 55076c96..01053224 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` - [#451](https://github.com/nf-core/ampliseq/pull/451) - Pairwise statistics will be now performed on a subset of metadata columns specified with `--metadata_category` instead of ignoring that setting. +- [#451](https://github.com/nf-core/ampliseq/pull/451) - Replace busybox with Ubuntu base image for GCP support. ### `Dependencies`