From 6b8929652f6b4180f711f4016b2b19170e56eab3 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Tue, 29 Aug 2023 11:15:50 -0700 Subject: [PATCH 1/4] Remove unnecessary whitespace Co-authored-by: Richard Neher --- augur/clades.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/augur/clades.py b/augur/clades.py index ab8614d09..618d396fb 100644 --- a/augur/clades.py +++ b/augur/clades.py @@ -1,7 +1,7 @@ """ Assign clades to nodes in a tree based on amino-acid or nucleotide signatures. -Nodes which are members of a clade are stored via +Nodes which are members of a clade are stored via → nodes → → clade_membership and if this file is used in `augur export v2` these will automatically become a coloring. @@ -181,7 +181,7 @@ def ensure_no_multiple_mutations(all_muts): aa_positions = [int(mut[1:-1])-1 for mut in node['aa_muts'][gene]] if len(set(aa_positions))!=len(aa_positions): multiples.append(f"Node {name} ({gene})") - + if multiples: raise AugurError(f"Multiple mutations at the same position on a single branch were found: {', '.join(multiples)}") @@ -310,7 +310,7 @@ def get_reference_sequence_from_root_node(all_muts, root_name): except KeyError: missing.append(gene) - if missing: + if missing: print(f"WARNING in augur.clades: sequences at the root node have not been specified for {{{', '.join(missing)}}}, \ even though mutations were observed. Clades which are annotated using bases/codons present at the root \ of the tree may not be correctly inferred.") @@ -358,7 +358,6 @@ def run(args): ref = get_reference_sequence_from_root_node(all_muts, tree.root.name) clade_designations = read_in_clade_definitions(args.clades) - membership, labels = assign_clades(clade_designations, all_muts, tree, ref) warn_if_clades_not_found(membership, clade_designations) From c9e71fd49e8e69b5ced11af6b97b4721ea48e137 Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Tue, 29 Aug 2023 11:16:29 -0700 Subject: [PATCH 2/4] fix: disable non-a-number filtering in clade file parsing Co-authored-by: Richard Neher --- augur/clades.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/augur/clades.py b/augur/clades.py index 618d396fb..14f148269 100644 --- a/augur/clades.py +++ b/augur/clades.py @@ -62,7 +62,8 @@ def read_in_clade_definitions(clade_file): df = pd.read_csv( clade_file, sep='\t' if clade_file.endswith('.tsv') else ',', - comment='#' + comment='#', + na_filter=False, ) clade_inheritance_rows = df[df['gene'] == 'clade'] From f5f959083c63a4d49ed00acca03818ae393f9a7d Mon Sep 17 00:00:00 2001 From: Richard Neher Date: Fri, 25 Aug 2023 21:56:50 +0200 Subject: [PATCH 3/4] fix: filter ghost clades that result from empty lines in clade tsv file --- augur/clades.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/augur/clades.py b/augur/clades.py index 14f148269..39053aa31 100644 --- a/augur/clades.py +++ b/augur/clades.py @@ -84,9 +84,13 @@ def read_in_clade_definitions(clade_file): # Use integer 0 as root so as not to conflict with any string clade names # String '0' can still be used this way root = 0 + + # Skip rows that are missing a clade name. + defined_clades = (clade for clade in df.clade.unique() if clade != '') + # For every clade, add edge from root as default # This way all clades can be reached by traversal - for clade in df.clade.unique(): + for clade in defined_clades: G.add_edge(root, clade) # Build inheritance graph From bf1493399e6bbda822c98c498bba747dfee7739c Mon Sep 17 00:00:00 2001 From: Victor Lin <13424970+victorlin@users.noreply.github.com> Date: Tue, 29 Aug 2023 11:20:54 -0700 Subject: [PATCH 4/4] Update changelog --- CHANGES.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 075e8486b..58f4245a9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,12 +8,14 @@ ### Bug fixes +* clades: Fix outputs for genes named `NA` (previously the value was replaced by `nan`). [#1293][] (@rneher) * distance: Improve documentation by describing how gaps get treated as indels and how users can ignore specific characters in distance calculations. [#1285][] (@huddlej) * Fix help output compatibility with non-Unicode streams. [#1290][] (@victorlin) [#1284]: https://github.com/nextstrain/augur/pull/1284 [#1285]: https://github.com/nextstrain/augur/pull/1285 [#1290]: https://github.com/nextstrain/augur/pull/1290 +[#1293]: https://github.com/nextstrain/augur/pull/1293 ## 22.3.0 (14 August 2023)