-
Notifications
You must be signed in to change notification settings - Fork 18
/
stree.ml
150 lines (128 loc) · 3.84 KB
/
stree.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
(* Our basic tree data structure without information.
*
* *)
open Ppatteries
type stree = Node of int * stree list | Leaf of int
type t = stree
let node i tL = Node(i, tL)
let leaf i = Leaf i
let of_id i = function
| Node(_, tL) -> Node(i, tL)
| Leaf(_) -> Leaf(i)
let rec n_taxa = function
| Node(_,tL) -> List.fold_left ( + ) 0 (List.map n_taxa tL)
| Leaf(_) -> 1
(* Number of non-root edges. *)
let n_edges stree =
let rec aux = function
| Node(_,tL) -> List.fold_left ( + ) 1 (List.map aux tL)
| Leaf(_) -> 1
in
(aux stree) - 1
let rec node_ids_aux = function
| Node(i,tL) -> i :: (List.flatten (List.map node_ids_aux tL))
| Leaf(i) -> [i]
let node_ids stree = List.sort compare (node_ids_aux stree)
let nonroot_node_ids stree =
try List.sort compare (List.tl (node_ids_aux stree)) with
| Failure "tl" -> invalid_arg "nonroot_node_ids"
let rec leaf_ids = function
| Node(_,tL) -> List.flatten (List.map leaf_ids tL)
| Leaf(i) -> [i]
let top_id = function
| Node(i, _) -> i
| Leaf(i) -> i
let rec max_id = function
| Node(i, tL) -> List.fold_left max i (List.map max_id tL)
| Leaf(i) -> i
let multifurcating_at_root = function
| Node(_, tL) -> List.length tL > 2
| Leaf(_) -> false
let rec plain_to_newick = function
| Node(i, tL) ->
"("^(String.concat "," (List.map plain_to_newick tL))^")"^(string_of_int i)
| Leaf i -> string_of_int i
let rec ppr ff = function
| Node(i, tL) ->
Format.fprintf ff "@[(%a)@]%d"
(Ppr.ppr_gen_list_inners "," ppr) tL i
| Leaf i -> Format.pp_print_int ff i
(* the maximal outdegree of nodes in the tree *)
let rec outdegree = function
| Node(_,tL) ->
List.fold_left max (List.length tL) (List.map outdegree tL)
| Leaf _ -> 0
(* increase all of the indices of the tree by "by" *)
let rec boost by = function
| Node(i,tL) -> Node(i+by, List.map (boost by) tL)
| Leaf(i) -> Leaf(i+by)
let recur f_node f_leaf tree =
let rec aux = function
| Node(id, tL) -> f_node id (List.map aux tL)
| Leaf id -> f_leaf id
in
aux tree
(* for functions that don't treat leaves differently than a node with empty
* leaves. *)
let recur_listly f = recur f (fun id -> f id [])
let find target tree =
let rec aux = function
| (Leaf i as x) :: _
| (Node (i, _) as x) :: _ when i = target -> x
| Leaf _ :: rest -> aux rest
| Node (_, subtrees) :: rest -> List.append subtrees rest |> aux
| [] -> raise Not_found
in
aux [tree]
let parent_map t =
let maybe_add accum i = function
| Some p -> IntMap.add i p accum
| None -> accum
in
let rec aux accum = function
| (parent, Leaf i) :: rest ->
aux
(maybe_add accum i parent)
rest
| (parent, Node (i, tL)) :: rest ->
aux
(maybe_add accum i parent)
(List.fold_left
(fun accum t -> (Some i, t) :: accum)
rest
tL)
| [] -> accum
in
aux IntMap.empty [None, t]
let reroot tree root =
if top_id tree = root then tree else
let rec aux = function
| [] -> failwith "root not found (tried to reroot at leaf?)"
| (Node (i, subtrees), path) :: _ when i = root ->
(i, subtrees) :: path
| (Leaf _, _) :: rest -> aux rest
| (Node (i, subtrees), path) :: rest ->
List.map
(fun subtree ->
let path' = (i, List.remove subtrees subtree) :: path in
subtree, path')
subtrees
|> List.append rest
|> aux
in
aux [tree, []]
|> List.rev
|> List.reduce (fun (bi, btl) (ai, atl) -> ai, node bi btl :: atl)
|> uncurry node
let rec nodes_containing nodes = function
| Leaf _ -> nodes
| Node (i, subtrees) ->
let nodes' = List.fold_left
(nodes_containing nodes |- IntSet.union |> flip)
IntSet.empty
subtrees
in
if List.for_all (top_id |- flip IntSet.mem nodes') subtrees then
IntSet.add i nodes'
else
nodes'