Skip to content
This repository was archived by the owner on Jul 1, 2024. It is now read-only.

Commit e42dc62

Browse files
committed
Allow quoted tree taxa
1 parent e799fba commit e42dc62

File tree

6 files changed

+69
-14
lines changed

6 files changed

+69
-14
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ snazzy_clades.*
2121

2222
TEST
2323

24+
time.html
25+
2426
# rspec failure tracking
2527
.rspec_status
2628
*.lock

exe/snazzy_clades_key_cols

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,42 @@ opts = Trollop.options do
1919

2020
banner <<-EOS
2121
22-
Note that if a clade's parent would be the root of the tree, no
23-
columns will be subtracted when removing the parent columns as it
24-
would be the entire alignment.
22+
23+
Checking IDs
24+
------------
25+
26+
IDs for the sequences must match between the three input files.
27+
28+
The tree file is allowed to have quoted taxa names, but the mapping
29+
file and alignment file are not.
30+
31+
If your alignment file has spaces in the name, the ID part of the
32+
header (i.e., the part up until the space) must match with the
33+
sequence IDs in the tree and the mapping file.
34+
35+
Example: This would be okay.
36+
37+
tree file:
38+
('genome_A', 'genome_B');
39+
40+
aln file:
41+
>genome_A apple pie
42+
AAAAA
43+
>genome_B brown sugar
44+
AATTA
45+
46+
mapping file:
47+
name coolness
48+
genome_A cool
49+
genome_B notcool
50+
51+
52+
Subtracting parent nodes
53+
------------------------
54+
55+
If a clade's parent would be the root of the tree, no columns will
56+
be subtracted when removing the parent columns as it would be the
57+
entire alignment.
2558
2659
Options:
2760
EOS

lib/tree_clusters.rb

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ def clade_nodes
2222
end
2323
return clades
2424
end
25+
26+
def unquoted_taxa
27+
self.taxa.map { |str| str.tr %q{"'}, "" }
28+
end
2529
end
2630

2731
# Top level namespace of the Gem.
@@ -82,8 +86,10 @@ def low_ent_cols leaves, leaf2attrs, entropy_cutoff
8286
Set.new low_ent_cols
8387
end
8488

89+
# @note If there are quoted names in the tree file, they are
90+
# unquoted first.
8591
def check_ids tree, mapping, aln
86-
tree_ids = Set.new(NewickTree.fromFile(tree).taxa)
92+
tree_ids = Set.new(NewickTree.fromFile(tree).unquoted_taxa)
8793

8894
mapping_ids = Set.new
8995
File.open(mapping, "rt").each_line.with_index do |line, idx|
@@ -143,8 +149,11 @@ def snazzy_clades tree, metadata
143149
metadata.each do |md_cat, leaf2mdtag|
144150
already_checked = Set.new
145151
single_tag_clades = {}
152+
p [md_cat, leaf2mdtag]
146153

147154
clades.each do |clade|
155+
p [clade.name, clade.all_leaves]
156+
148157
assert clade.all_leaves.count > 1,
149158
"A clade cannot also be a leaf"
150159

@@ -173,7 +182,7 @@ def snazzy_clades tree, metadata
173182
end
174183

175184
single_tag_clades.each do |clade, md_tag|
176-
non_clade_leaves = tree.taxa - clade.all_leaves
185+
non_clade_leaves = tree.unquoted_taxa - clade.all_leaves
177186

178187
non_clade_leaves_with_this_md_tag = non_clade_leaves.map do |leaf|
179188
[leaf, leaf2mdtag[leaf]]
@@ -288,10 +297,15 @@ class Clade
288297
:single_tag_info,
289298
:all_tags
290299

300+
# @note If a node name is quoted, then those quotes are removed
301+
# first.
302+
#
291303
# @param node [NewickNode] a NewickNode from a NewickTree
292304
# @param tree [NewickTree] a NewickTree
293305
def initialize node, tree, metadata=nil
294-
@name = node.name
306+
tree_taxa = tree.unquoted_taxa
307+
308+
@name = unquote node.name
295309
@all_leaves = descendant_leaves node
296310

297311
if (children = node.children).count == 2
@@ -317,10 +331,10 @@ def initialize node, tree, metadata=nil
317331
@parent_leaves = descendant_leaves parent
318332

319333
@other_leaves =
320-
Object::Set.new(tree.taxa) - Object::Set.new(all_leaves)
334+
Object::Set.new(tree_taxa) - Object::Set.new(all_leaves)
321335

322336
@non_parent_leaves =
323-
Object::Set.new(tree.taxa) - Object::Set.new(parent_leaves)
337+
Object::Set.new(tree_taxa) - Object::Set.new(parent_leaves)
324338

325339
if metadata
326340
@metadata = metadata
@@ -345,7 +359,8 @@ def == clade
345359
self.each_sibling_leaf_set == clade.each_sibling_leaf_set &&
346360
self.parent_leaves == clade.parent_leaves &&
347361
self.other_leaves == clade.other_leaves &&
348-
self.single_tag_info == clade.single_tag_info
362+
self.single_tag_info == clade.single_tag_info &&
363+
self.all_tags == clade.all_tags
349364
)
350365
end
351366

@@ -379,14 +394,19 @@ def get_all_tags
379394

380395
def descendant_leaves node
381396
if node.leaf?
382-
[node.name]
397+
[unquote(node.name)]
383398
else
384399
node.
385400
descendants.
386401
flatten.
387402
uniq.
388-
select { |node| node.leaf? }.map(&:name)
403+
select { |node| node.leaf? }.
404+
map { |node| unquote(node.name) }
389405
end
390406
end
407+
408+
def unquote str
409+
str.tr %q{"'}, ""
410+
end
391411
end
392412
end

lib/tree_clusters/version.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
module TreeClusters
2-
VERSION = "0.5.1"
2+
VERSION = "0.5.2"
33
end

test_files/small.tre

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
((a-1, a-2)cluster_A, ((b-1, b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;
1+
(('a-1', 'a-2')cluster_A, (("b-1", b-2)cluster_B1, (bb-1, (bbb-1, bbb-2)cluster_B3)cluster_B2)cluster_B)cluster_C;

test_files/test.tre

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
((((((g1:1, g2:1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, g6:2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;
1+
((((((g1:1, 'g2':1)cluster1:1, g3:2)cluster4:1, (g4a:1, g4b:1)cluster7:2)cluster6:1, ((g5:2, "g6":2)cluster11:1, g7:3)cluster14:1)cluster10:1, g8:5)cluster16:1, ((g9:1, g10:1)cluster19:1, g11:2)cluster22:4)cluster18:1;

0 commit comments

Comments
 (0)