diff --git a/CHANGELOG.md b/CHANGELOG.md index 291d30ef..1c0bc3c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` - [#751](https://github.com/nf-core/ampliseq/pull/751) - Added version R08-RS214 of curated GTDB 16S taxonomy: `sbdi-gtdb=R08-RS214-1` or `sbdi-gtdb` as parameter to `--dada_ref_taxonomy` +- [#752](https://github.com/nf-core/ampliseq/pull/752) - Added version R09-RS220 of GTDB 16S taxonomy: `gtdb=R09-RS220` or `gtdb` as parameter to `--dada_ref_taxonomy` ### `Changed` diff --git a/bin/taxref_reformat_gtdb.sh b/bin/taxref_reformat_gtdb.sh index f2e65b56..500a008c 100755 --- a/bin/taxref_reformat_gtdb.sh +++ b/bin/taxref_reformat_gtdb.sh @@ -1,10 +1,15 @@ #!/bin/sh -# Reads the ar* and bac* SSU fasta files from GTDB (after first untarring) +# Reads the ar* and bac* SSU fasta files from GTDB (after first untarring/unzipping) # and outputs two new fasta files, one suitable for DADA2's assignTaxonomy() # and addSpecies() functions. -# Untar any tar file in the working directory +# Unzip any .fna.gz file in the working directory - versions 220 and newer +for f in *.fna.gz; do + gunzip -c $f > $(basename "$f" .gz) +done + +# Untar any tar file in the working directory - versions 214.1 and older for f in *.tar.gz; do tar xzf $f done diff --git a/conf/ref_databases.config b/conf/ref_databases.config index 9c024e85..c2db0ed2 100644 --- a/conf/ref_databases.config +++ b/conf/ref_databases.config @@ -26,11 +26,18 @@ params { dbversion = "COIDB 221216 (https://doi.org/10.17044/scilifelab.20514192.v2)" } 'gtdb' { - title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1" - file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/bac120_ssu_reps_r214.tar.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1/genomic_files_reps/ar53_ssu_reps_r214.tar.gz" ] + title = "GTDB - Genome Taxonomy Database - Release R09-RS220" + file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/genomic_files_reps/bac120_ssu_reps_r220.fna.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/genomic_files_reps/ar53_ssu_reps_r220.fna.gz" ] citation = "Parks DH, Chuvochina M, Waite DW, Rinke C, Skarshewski A, Chaumeil PA, Hugenholtz P. A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life. Nat Biotechnol. 2018 Nov;36(10):996-1004. doi: 10.1038/nbt.4229. Epub 2018 Aug 27. PMID: 30148503." fmtscript = "taxref_reformat_gtdb.sh" - dbversion = "GTDB R08-RS214.1 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release214/214.1)" + dbversion = "GTDB R09-RS220 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0)" + } + 'gtdb=R09-RS220' { + title = "GTDB - Genome Taxonomy Database - Release R09-RS220" + file = [ "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/genomic_files_reps/bac120_ssu_reps_r220.fna.gz", "https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0/genomic_files_reps/ar53_ssu_reps_r220.fna.gz" ] + citation = "Parks DH, Chuvochina M, Waite DW, Rinke C, Skarshewski A, Chaumeil PA, Hugenholtz P. A standardized bacterial taxonomy based on genome phylogeny substantially revises the tree of life. Nat Biotechnol. 2018 Nov;36(10):996-1004. doi: 10.1038/nbt.4229. Epub 2018 Aug 27. PMID: 30148503." + fmtscript = "taxref_reformat_gtdb.sh" + dbversion = "GTDB R09-RS220 (https://data.ace.uq.edu.au/public/gtdb/data/releases/release220/220.0)" } 'gtdb=R08-RS214' { title = "GTDB - Genome Taxonomy Database - Release R08-RS214.1" diff --git a/nextflow_schema.json b/nextflow_schema.json index 60f639e8..a8a1e397 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -360,6 +360,7 @@ "gtdb=R06-RS202", "gtdb=R07-RS207", "gtdb=R08-RS214", + "gtdb=R09-RS220", "midori2-co1", "midori2-co1=gb250", "pr2", diff --git a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf index 16b0c2ac..321463ae 100644 --- a/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_ampliseq_pipeline/main.nf @@ -237,7 +237,7 @@ def validateInputParameters() { error("Incompatible parameters: `--filter_ssu` cannot be used with `--skip_barrnap` because filtering for SSU's depends on barrnap.") } - String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R08-RS214","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] + String[] sbdi_compatible_databases = ["coidb","coidb=221216","gtdb","gtdb=R09-RS220","gtdb=R08-RS214","gtdb=R07-RS207","gtdb=R06-RS202","gtdb=R05-RS95","midori2-co1","midori2-co1=gb250","pr2","pr2=5.0.0","pr2=4.14.0","pr2=4.13.0","rdp","rdp=18","sbdi-gtdb","sbdi-gtdb=R08-RS214-1","sbdi-gtdb=R07-RS207-1","silva","silva=138","silva=132","unite-fungi","unite-fungi=9.0","unite-fungi=8.3","unite-fungi=8.2","unite-alleuk","unite-alleuk=9.0","unite-alleuk=8.3","unite-alleuk=8.2"] if (params.sbdiexport){ if (params.sintax_ref_taxonomy ) { if (!Arrays.stream(sbdi_compatible_databases).anyMatch(entry -> params.sintax_ref_taxonomy.toString().equals(entry)) ) {