Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MRG: add utilities for using ictv taxonomic ranks with sourmash tax #2608

Merged
merged 26 commits into from
Feb 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
b0f231b
add LineageInfo for ictv viral taxonomy
bluegenes May 4, 2023
42c1b3e
use ictv
bluegenes May 5, 2023
a54c750
get working
bluegenes May 23, 2023
2078fbe
allow empty
bluegenes Jun 12, 2023
0221cc6
actually allow empty
bluegenes Jun 12, 2023
35f0e7f
Merge branch 'latest' into ictv-tax
bluegenes Sep 27, 2023
02edfaf
tsv for ictv for now
bluegenes Oct 2, 2023
796cc69
Merge branch 'latest' into ictv-tax
bluegenes Feb 1, 2024
9ee017d
back to csv for ictv tax
bluegenes Feb 1, 2024
a767e32
init testing
bluegenes Feb 3, 2024
a3db361
Merge branch 'latest' into ictv-tax
bluegenes Feb 9, 2024
38d8214
upd ictv test lineages
bluegenes Feb 9, 2024
caedfd4
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 9, 2024
d458d2f
upd fake tax
bluegenes Feb 9, 2024
8d1976e
tests for genome, metagenome, annotate
bluegenes Feb 9, 2024
099fd1c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 9, 2024
870f3c3
add basic ictv test for summarize
bluegenes Feb 9, 2024
1e73d68
Merge branch 'ictv-tax' of github.com:sourmash-bio/sourmash into ictv…
bluegenes Feb 9, 2024
8a3ecdd
re-remove force option
bluegenes Feb 9, 2024
91daf96
more tests
bluegenes Feb 9, 2024
a3a0a22
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 9, 2024
f43e5c2
Merge branch 'latest' into ictv-tax
bluegenes Feb 9, 2024
0ac3a34
re-enable ranks so self.replace() works
bluegenes Feb 10, 2024
57cc461
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Feb 10, 2024
bfb92a4
add ictv note to docs
bluegenes Feb 10, 2024
b58217b
Merge branch 'latest' into ictv-tax
bluegenes Feb 10, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/command-line.md
Original file line number Diff line number Diff line change
Expand Up @@ -551,8 +551,8 @@ The sourmash `tax` or `taxonomy` commands integrate taxonomic
`gather` command (we cannot combine separate `gather` runs for the
same query). For supported databases (e.g. GTDB, NCBI), we provide
taxonomy csv files, but they can also be generated for user-generated
databases. As of v4.8, some sourmash taxonomy commands can also use `LIN`
lineage information. For more information, see [databases](databases.md).
databases. As of v4.8 and 4.8.6, respectively, some sourmash taxonomy
commands can also use `LIN` or `ICTV` lineage information.

`tax` commands rely upon the fact that `gather` provides both the total
fraction of the query matched to each database matched, as well as a
Expand Down
7 changes: 7 additions & 0 deletions src/sourmash/cli/tax/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ def subparser(subparsers):
default=False,
help="use LIN taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain LIN lineage information.",
)
subparser.add_argument(
"--ictv",
"--ictv-taxonomy",
action="store_true",
default=False,
help="use ICTV taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain ICTV ranks.",
)


def main(args):
Expand Down
7 changes: 7 additions & 0 deletions src/sourmash/cli/tax/genome.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@ def subparser(subparsers):
default=None,
help="CSV containing 'name', 'lin' columns, where 'lin' is the lingroup prefix. Will restrict classification to these groups.",
)
subparser.add_argument(
"--ictv",
"--ictv-taxonomy",
action="store_true",
default=False,
help="use ICTV taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain ICTV ranks.",
)
add_tax_threshold_arg(subparser, 0.1)
add_rank_arg(subparser)

Expand Down
7 changes: 7 additions & 0 deletions src/sourmash/cli/tax/metagenome.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ def subparser(subparsers):
default=None,
help="CSV containing 'name', 'lin' columns, where 'lin' is the lingroup prefix. Will produce a 'lingroup' report containing taxonomic summarization for each group.",
)
subparser.add_argument(
"--ictv",
"--ictv-taxonomy",
action="store_true",
default=False,
help="use ICTV taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain ICTV ranks.",
)
add_rank_arg(subparser)


Expand Down
7 changes: 7 additions & 0 deletions src/sourmash/cli/tax/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,13 @@ def subparser(subparsers):
default=False,
help="use LIN taxonomy in place of standard taxonomic ranks.",
)
subparser.add_argument(
"--ictv",
"--ictv-taxonomy",
action="store_true",
default=False,
help="use ICTV taxonomy in place of standard taxonomic ranks. Note that the taxonomy CSV must contain ICTV ranks.",
)


def main(args):
Expand Down
10 changes: 10 additions & 0 deletions src/sourmash/tax/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
RankLineageInfo,
LINLineageInfo,
AnnotateTaxResult,
ICTVRankLineageInfo,
)

usage = """
Expand Down Expand Up @@ -82,6 +83,7 @@ def metagenome(args):
keep_identifier_versions=args.keep_identifier_versions,
force=args.force,
lins=args.lins,
ictv=args.ictv,
)
available_ranks = tax_assign.available_ranks
except ValueError as exc:
Expand Down Expand Up @@ -113,6 +115,7 @@ def metagenome(args):
keep_full_identifiers=args.keep_full_identifiers,
keep_identifier_versions=args.keep_identifier_versions,
lins=args.lins,
ictv=args.ictv,
)
except ValueError as exc:
error(f"ERROR: {str(exc)}")
Expand Down Expand Up @@ -258,6 +261,7 @@ def genome(args):
keep_identifier_versions=args.keep_identifier_versions,
force=args.force,
lins=args.lins,
ictv=args.ictv,
)
available_ranks = tax_assign.available_ranks

Expand Down Expand Up @@ -297,6 +301,7 @@ def genome(args):
keep_full_identifiers=args.keep_full_identifiers,
keep_identifier_versions=args.keep_identifier_versions,
lins=args.lins,
ictv=args.ictv,
)

except ValueError as exc:
Expand Down Expand Up @@ -402,6 +407,7 @@ def annotate(args):
keep_identifier_versions=args.keep_identifier_versions,
force=args.force,
lins=args.lins,
ictv=args.ictv,
)

except ValueError as exc:
Expand Down Expand Up @@ -466,6 +472,7 @@ def annotate(args):
raw=row,
id_col=id_col,
lins=args.lins,
ictv=args.ictv,
keep_full_identifiers=args.keep_full_identifiers,
keep_identifier_versions=args.keep_identifier_versions,
)
Expand Down Expand Up @@ -591,6 +598,7 @@ def summarize(args):
keep_full_identifiers=args.keep_full_identifiers,
keep_identifier_versions=args.keep_identifier_versions,
lins=args.lins,
ictv=args.ictv,
)
except ValueError as exc:
error("ERROR while loading taxonomies!")
Expand Down Expand Up @@ -637,6 +645,8 @@ def summarize(args):
rank = lineage[-1].rank
if args.lins:
inf = LINLineageInfo(lineage=lineage)
elif args.ictv:
inf = ICTVRankLineageInfo(lineage=lineage)
else:
inf = RankLineageInfo(lineage=lineage)
lin = inf.display_lineage()
Expand Down