Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Merge branch 'dev' into 215-biom-support

  • Loading branch information...
commit 63c06c62947d95d21c156c240853fd7ad0be81e5 2 parents 52029c1 + 76cbd02
@habnabit habnabit authored
View
5 CHANGELOG
@@ -4,11 +4,16 @@
* Closed GH-181: Added `guppy indep_c`; added `--leaf-values` to `guppy mft`.
* Closed GH-256: Added `--mmap-file` to pplacer; changed `--pretend` in
pplacer to show the amount of memory which would be allocated.
+ * Closed GH-260: Added `rooted_pd` to `guppy fpd`; Added a `--variance` flag
+ to `guppy rarefact`.
* Closed GH-261: Added `rppr convex_taxids`.
* Closed GH-263: Fixed a bug that could sometimes cause failures in `rppr
min_adcl` with the PAM algorithm.
* Closed GH-265: Fixed a bug that could cometimes cause `rppr reroot` to
return a suboptimal rooting.
+ * Closed GH-266: Adding `map_ratio` and `map_overlap` columns to `guppy
+ to_csv`.
+ * Closed GH-267: Added a `leaf_count` column to `rppr convex_taxids`.
1.1.alpha13
View
47 common_src/ppatteries.ml
@@ -103,16 +103,16 @@ let median l =
aux (l, l)
let verbosity = ref 1
-let dprintf ?(l = 1) ?(flush = true) fmt =
+let dfprintf ?(l = 1) ?(flush = true) ch fmt =
if !verbosity >= l then begin
- finally (if flush then flush_all else identity) (Printf.printf fmt)
+ finally (if flush then flush_all else identity) (Printf.fprintf ch fmt)
end else
Printf.ifprintf IO.stdnull fmt
-let dprint ?(l = 1) ?(flush = true) s =
- if !verbosity >= l then begin
- print_string s;
- if flush then flush_all ();
- end
+let dprintf ?l ?flush = dfprintf ?l ?flush stdout
+let deprintf ?l ?flush = dfprintf ?l ?flush stderr
+let dfprint ?l ?flush ch s = dfprintf ?l ?flush ch "%s" s
+let dprint ?l ?flush = dfprint ?l ?flush stdout
+let deprint ?l ?flush = dfprint ?l ?flush stderr
let align_with_space =
List.map (Tuple3.map3 ((^) " ")) |- Arg.align
@@ -686,3 +686,36 @@ let () =
fn
op)
| _ -> None)
+
+let memory_stats_ch =
+ match begin
+ try Some (Sys.getenv "PPLACER_MEMORY_STATS")
+ with Not_found -> None
+ end with
+ | None -> None
+ | Some statsfile ->
+ let ch = Legacy.open_out_gen
+ [Open_append; Open_creat; Open_trunc]
+ 0o600
+ statsfile
+ in
+ let write = Csv.to_channel ch |> Csv.output_record in
+ let word_in_bytes = Sys.word_size / 8 in
+ let start = Unix.gettimeofday () in
+ let last_top = ref None in
+ write ["time"; "pid"; "top_heap_bytes"];
+ let check_stats () =
+ let stats = Gc.quick_stat () in
+ match !last_top with
+ | Some top when stats.Gc.top_heap_words <= top -> ()
+ | _ ->
+ write
+ [Printf.sprintf "%f" (Unix.gettimeofday () -. start);
+ string_of_int (Unix.getpid ());
+ string_of_int (stats.Gc.top_heap_words * word_in_bytes)];
+ Legacy.flush ch;
+ last_top := Some stats.Gc.top_heap_words
+ in
+ let _ = Gc.create_alarm check_stats in
+ at_exit check_stats;
+ Some ch
View
4 docs/details/guppy_fpd.rst
@@ -3,7 +3,9 @@
By default, ``guppy fpd`` outputs a matrix containing in each row: * the
placefile name, the phylogenetic entropy (``phylo_entropy``, `Allen 2009`_),
the quadratic entropy (``quadratic``, `Rao 1982`_, `Warwick and Clark 1995`_)
-phylogenetic diversity (``pd``, `Faith 1992`_), and a new diversity metric
+phylogenetic diversity (``unrooted_pd``, `Faith 1992`_), phylogenetic diversity
+which only requires distal mass (``rooted_pd``, this is as oppposed to ``pd``
+requiring both distal and proximal mass), and a new diversity metric
generalizing PD to incorporate abundance: abundance-weighted phylogenetic
diversity (``awpd``).
View
10 docs/details/guppy_rarefact.rst
@@ -1,3 +1,13 @@
Calculate phylogenetic rarefaction curves.
+For every :math:`k \in [2, n]`, where n is the number of pqueries in a
+placefile, subsample the given placefile to contain every combination of
+:math:`k` pqueries and calculate the mean and variance of phylogenetic divesity
+for all of these subsampled placefiles.
+
+The ``rooted_mean`` and ``rooted_variance`` columns are respectively the mean
+and variance of the phylogenetic diversity only requiring mass on the distal
+side of the edge (which normally requires mass proximal and distal to the
+edge).
+
*Experimental.*
View
4 pplacer_src/convex.ml
@@ -579,8 +579,8 @@ let rank_tax_map_of_refpkg rp =
Array.iteri
(fun rank rankname ->
if not (IntMap.mem rank m) then
- dprintf "warning: rank %s not represented in the lineage of any \
- sequence in reference package %s.\n"
+ deprintf "warning: rank %s not represented in the lineage of any \
+ sequence in reference package %s.\n"
rankname
(Refpkg.get_name rp))
td.Tax_taxonomy.rank_names)
View
15 pplacer_src/guppy_fpd.ml
@@ -47,14 +47,17 @@ let total_along_mass ?(include_pendant = false) criterion pr cb =
* we're either before or after the induced tree and the multiplier should
* be 0. *)
let bump_function r =
- if r = 0. || approx_equal ~epsilon r 1. then 0. else 1.
+ if r =~ 0. || approx_equal ~epsilon r 1. then 0. else 1.
-let pd_of_placerun ?include_pendant criterion pr =
+let bump_with_root r =
+ if r =~ 0. then 0. else 1.
+
+let pd_of_placerun ?include_pendant ?(bump = bump_function) criterion pr =
total_along_mass
?include_pendant
criterion
pr
- (fun r bl -> bump_function r *. bl)
+ (fun r bl -> bump r *. bl)
let reflect x =
if approx_equal ~epsilon x 1. then 0.
@@ -113,17 +116,19 @@ object (self)
and include_pendant = fv include_pendant in
let awpd = awpd_of_placerun ~include_pendant criterion
and pd = pd_of_placerun ~include_pendant criterion
+ and rpd = pd_of_placerun ~include_pendant ~bump:bump_with_root criterion
and entropy = entropy_of_placerun ~include_pendant criterion in
prl
|> List.map
(fun pr ->
let pe, qe = entropy pr in
- [pe; qe; pd pr; awpd 1. pr]
+ [pe; qe; pd pr; rpd pr; awpd 1. pr]
|> (flip List.append (List.map (flip awpd pr) exponents))
|> List.map (Printf.sprintf "%g")
|> List.cons (Placerun.get_name pr))
|> List.cons
- (["placerun"; "phylo_entropy"; "quadratic"; "pd"; "awpd"]
+ (["placerun"; "phylo_entropy"; "quadratic"; "unrooted_pd";
+ "rooted_pd"; "awpd"]
@ List.map (Printf.sprintf "awpd_%g") exponents)
|> self#write_ll_tab
View
39 pplacer_src/guppy_rarefact.ml
@@ -9,9 +9,13 @@ object (self)
inherit placefile_cmd () as super_placefile
inherit tabular_cmd () as super_tabular
+ val variance = flag "--variance"
+ (Plain (false, "Calculate variance of phylogenetic entropy."))
+
method specl =
super_mass#specl
@ super_tabular#specl
+ @ [toggle_flag variance]
method desc = "calculates phylogenetic rarefaction curves"
method usage = "usage: rarefact [options] placefile"
@@ -19,11 +23,38 @@ object (self)
method private placefile_action = function
| [pr] ->
let criterion = self#criterion in
+ let is_uniform_mass =
+ Placerun.get_pqueries pr
+ |> List.map (Pquery.namlom |- List.map snd)
+ |> List.flatten
+ |> List.sort_unique (<~>)
+ |> List.length
+ |> (=) 1
+ and fmt = Printf.sprintf "%g" in
+ if not is_uniform_mass then begin
+ if fv variance then
+ failwith "not all sequences have uniform weight; variance can't be \
+ calculated";
+ deprint "warning: not all sequences have uniform weight; expectation \
+ of quadratic entropy can't be calculated\n"
+ end;
Rarefaction.of_placerun criterion pr
- |> Enum.map (fun (a, b) -> [string_of_int a; Printf.sprintf "%g" b])
- |> List.of_enum
- |> List.cons ["k"; "r"]
- |> self#write_ll_tab
+ |> Enum.map
+ (fun (k, um, rm, qm) ->
+ [string_of_int k; fmt um; fmt rm]
+ @ (if is_uniform_mass then [fmt qm] else []))
+ |> begin
+ if fv variance then
+ curry Enum.combine (Rarefaction.variance_of_placerun criterion pr)
+ |- Enum.map (fun ((_, uv, rv), sl) -> sl @ [fmt uv; fmt rv])
+ else identity
+ end
+ |> List.of_enum
+ |> List.cons
+ (["k"; "unrooted_mean"; "rooted_mean"]
+ @ (if is_uniform_mass then ["quadratic_mean"] else [])
+ @ (if fv variance then ["unrooted_variance"; "rooted_variance"] else []))
+ |> self#write_ll_tab
| l ->
List.length l
View
2  pplacer_src/guppy_to_csv.ml
@@ -35,7 +35,7 @@ object (self)
|> List.cons
["origin"; "name"; "multiplicity"; "edge_num"; "like_weight_ratio";
"post_prob"; "likelihood"; "marginal_like"; "distal_length";
- "pendant_length"; "classification"]
+ "pendant_length"; "classification"; "map_ratio"; "map_overlap"]
|> self#write_ll_tab
end
View
9 pplacer_src/multiprocessing.ml
@@ -116,13 +116,18 @@ class virtual ['a] process child_func =
(* Only these descriptors are used in the child. The parent has no use for
* them, so it closes them. The child has no use for anything but them, so
* it closes everything but them. *)
- let child_only = [child_rd; parent_wr; progress_wr]
- in
+ let child_only = [child_rd; parent_wr; progress_wr] in
+ let () = flush_all () in
let pid = match Unix.fork () with
| 0 ->
(* Do the actual closing of the irrelevant descriptors. *)
begin
let ignored = List.map fd_of_file_descr child_only in
+ let ignored = match Ppatteries.memory_stats_ch with
+ | None -> ignored
+ | Some ch ->
+ fd_of_file_descr (Unix.descr_of_out_channel ch) :: ignored
+ in
List.iter
(fun fd -> if not (List.mem fd ignored) then quiet_close fd)
(range 256)
View
31 pplacer_src/placement.ml
@@ -134,31 +134,33 @@ let placement_of_str str =
(* *** WRITING *** *)
(* usual output *)
-let opt_to_str f = function
- | Some x -> f x
- | None -> "-"
-
let string_of_8gfloat = Printf.sprintf "%8g"
let string_of_gfloat = Printf.sprintf "%g"
-let to_strl_gen fint ffloat ffloato ftaxido place =
+let to_strl_gen fint ffloat ftaxid default place =
+ let map_ratio, map_overlap =
+ Option.map_default (Tuple2.mapn some some) (None, None) place.map_identity
+ (* eta expansion !! *)
+ and fopt f xo = Option.map_default f default xo in
[
fint place.location;
ffloat place.ml_ratio;
- ffloato place.post_prob;
+ fopt ffloat place.post_prob;
ffloat place.log_like;
- ffloato place.marginal_prob;
+ fopt ffloat place.marginal_prob;
ffloat place.distal_bl;
ffloat place.pendant_bl;
- ftaxido place.classif;
+ fopt ftaxid place.classif;
+ fopt ffloat map_ratio;
+ fopt fint map_overlap;
]
let to_strl =
to_strl_gen
string_of_int
string_of_8gfloat
- (opt_to_str string_of_8gfloat)
- (opt_to_str Tax_id.to_string)
+ Tax_id.to_string
+ "-"
let to_str place = String.concat "\t" (to_strl place)
@@ -255,14 +257,9 @@ let of_json fields a =
}
(* CSV *)
-let opt_to_csv_str f = function
- | Some x -> f x
- | None -> "NA"
-
let to_csv_strl =
to_strl_gen
string_of_int
string_of_gfloat
- (opt_to_csv_str string_of_gfloat)
- (opt_to_csv_str Tax_id.to_string)
-
+ Tax_id.to_string
+ "NA"
View
210 pplacer_src/rarefaction.ml
@@ -24,28 +24,16 @@ let count_along_mass gt mass cb =
(fun () -> ref 0)
(Gtree.get_stree gt)
-(* Compute the rarefaction curve of a placerun, given a placement criterion and
- * optionally the highest X value for the curve. *)
-let of_placerun:
- (Placement.t -> float) -> ?k_max:int -> Newick_bark.t Placerun.t -> (int * float) Enum.t
-= fun criterion ?k_max pr ->
- let gt = Placerun.get_ref_tree pr |> Newick_gtree.add_zero_root_bl
- and mass = I.of_placerun
- Mass_map.Point
- criterion
- pr
- in
+let k_maps_of_placerun: int -> Newick_bark.t Placerun.t -> float IntMap.t IntMap.t
+= fun k_max pr ->
let n = Placerun.get_pqueries pr |> List.length in
let n' = float_of_int n
- and k_max = match k_max with
- | Some k when k < n -> k
- | _ -> n
and base_k_map = 0 -- n
|> Enum.map (identity &&& const 1.)
|> IntMap.of_enum
|> IntMap.singleton 0
in
- let k_maps = Enum.fold
+ Enum.fold
(fun k_maps k ->
let prev_map = IntMap.find k k_maps
and diff = n' -. float_of_int k in
@@ -56,16 +44,198 @@ let of_placerun:
|> flip (IntMap.add (k + 1)) k_maps)
base_k_map
(0 --^ k_max)
+
+(* Compute the rarefaction curve of a placerun, given a placement criterion and
+ * optionally the highest X value for the curve. *)
+let of_placerun:
+ (Placement.t -> float) -> ?k_max:int -> Newick_bark.t Placerun.t
+ -> (int * float * float * float) Enum.t
+= fun criterion ?k_max pr ->
+ let gt = Placerun.get_ref_tree pr |> Newick_gtree.add_zero_root_bl
+ and mass = I.of_placerun
+ Mass_map.Point
+ criterion
+ pr
in
+ let n = Placerun.get_pqueries pr |> List.length in
+ let n' = float_of_int n in
+ let k_max = match k_max with
+ | Some k when k < n -> k
+ | _ -> n
+ in
+ let k_maps = k_maps_of_placerun k_max pr in
let count k =
let q_k = IntMap.find k k_maps |> flip IntMap.find in
- count_along_mass
- gt
- mass
+ let count_fn = count_along_mass gt mass in
+ count_fn
(fun d bl ->
let d = !d in
let p = n - d in
- (1. -. (q_k d) -. (q_k p)) *. bl)
+ (1. -. (q_k d) -. (q_k p)) *. bl),
+ count_fn (fun d bl -> (1. -. (q_k !d)) *. bl),
+ count_fn
+ (fun d bl ->
+ let d' = float_of_int !d in
+ bl *. d' *. (n' -. d'))
+ in
+ 2 -- k_max
+ |> Enum.map (fun k ->
+ let u, r, q = count k in
+ let k' = float_of_int k in
+ let q' = q *. (k' -. 1.) /. (k' *. n' *. (n' -. 1.)) in
+ k, u, r, q')
+
+let mass_induced_tree gt mass =
+ let edge = ref 0
+ and bark_map = ref IntMap.empty
+ and mass_counts = ref IntMap.empty in
+ let next_edge mass_count bl =
+ let x = !edge in
+ incr edge;
+ bark_map := Newick_bark.map_set_bl x bl !bark_map;
+ if mass_count > 0 then
+ mass_counts := IntMap.add x mass_count !mass_counts;
+ x
+ in
+ let open Stree in
+ let rec aux t =
+ let i, below = match t with
+ | Leaf i -> i, None
+ | Node (i, subtrees) -> i, Some (List.map aux subtrees)
+ in
+ let ml = IntMap.get i [] mass
+ |> List.map (fun {I.distal_bl} -> distal_bl)
+ |> List.cons 0.
+ |> List.group approx_compare
+ |> List.map
+ (function
+ | hd :: tl when hd =~ 0. -> hd, List.length tl
+ | hd :: tl -> hd, List.length tl + 1
+ | [] -> invalid_arg "ml")
+ and bl = Gtree.get_bl gt i in
+ let rec snips tree pl =
+ let j, tl = match pl with
+ | (p1, c1) :: ((p2, _) :: _ as tl) -> next_edge c1 (p2 -. p1), tl
+ | [p, c] -> next_edge c (bl -. p), []
+ | [] -> invalid_arg "snips"
+ in
+ let tree' = Some
+ [match tree with
+ | None -> leaf j
+ | Some subtrees -> node j subtrees]
+ in
+ match tl with
+ | [] -> tree'
+ | tl -> snips tree' tl
+ in
+ snips below ml
+ |> Option.get
+ |> List.hd
+ in
+ let st' = aux (Gtree.get_stree gt) in
+ !mass_counts, Gtree.gtree st' !bark_map
+
+let distal_proximal_maps st marks_map =
+ let open Stree in
+ let rec aux = function
+ | Leaf i -> IntMap.singleton i (IntMap.get i 0 marks_map)
+ | Node (i, subtrees) ->
+ let distal_marks = List.map aux subtrees |> List.reduce IntMap.union in
+ let marks = List.enum subtrees
+ |> Enum.map (top_id |- flip IntMap.find distal_marks)
+ |> Enum.sum
+ |> (+) (IntMap.get i 0 marks_map)
+ in
+ IntMap.add i marks distal_marks
+ in
+ let distal_map = aux st in
+ let total_marks = IntMap.values marks_map |> Enum.sum in
+ let proximal_map = IntMap.map ((-) total_marks) distal_map in
+ distal_map, proximal_map
+
+let distal_edges_map st =
+ let open Stree in
+ let rec aux = function
+ | Leaf i -> IntMap.singleton i IntSet.empty
+ | Node (i, subtrees) ->
+ let distal_edges = List.map aux subtrees |> List.reduce IntMap.union in
+ let subtree_edges = List.map top_id subtrees in
+ let edges = List.enum subtree_edges
+ |> Enum.map (flip IntMap.find distal_edges)
+ |> Enum.reduce IntSet.union
+ |> IntSet.union (IntSet.of_list subtree_edges)
+ in
+ IntMap.add i edges distal_edges
+ in
+ aux st
+
+exception What of (int * int * int * int)
+
+let auto_cache ?(count = 1024 * 1024) f =
+ curry (Cache.make_ht ~gen:(uncurry f) count).Cache.get
+
+let variance_of_placerun criterion pr =
+ let gt = Placerun.get_ref_tree pr |> Newick_gtree.add_zero_root_bl
+ and mass = I.of_placerun
+ Mass_map.Point
+ criterion
+ pr
+ in
+ let n = Placerun.get_pqueries pr |> List.length in
+ let k_max = n in
+ let k_maps = k_maps_of_placerun k_max pr in
+ let marks_map, gt' = mass_induced_tree gt mass in
+ let bl = Gtree.get_bl gt' in
+ let st' = Gtree.get_stree gt' in
+ let distal_marks, proximal_marks = distal_proximal_maps st' marks_map in
+ let distal_edges = distal_edges_map st' in
+ (* is i proximal to j? *)
+ let _is_proximal i j = IntSet.mem j (IntMap.find i distal_edges) in
+ let is_proximal = auto_cache _is_proximal in
+ let d i = IntMap.find i distal_marks
+ and c i = IntMap.find i proximal_marks in
+ let _o i j =
+ if is_proximal i j then c i else d i
+ and _s i j =
+ if is_proximal i j then d i else c i
+ and _q k x = IntMap.find k k_maps |> IntMap.find x in
+ let o = auto_cache _o and s = auto_cache _s and q = auto_cache _q in
+ let cov_without_root k i j =
+ let q_k = q k in
+ if i = j then
+ let cov = 1. -. q_k (d i) -. q_k (c i) in
+ cov -. cov ** 2.
+ else
+ q_k (o i j + o j i) -. q_k (o i j) *. q_k (o j i)
+ +. (1. -. q_k (o i j)) *. q_k (s j i)
+ +. (1. -. q_k (o j i)) *. q_k (s i j)
+ -. q_k (s i j) *. q_k (s j i)
+ and cov_with_root k i j =
+ let q_k = q k in
+ if i = j then
+ let cov = 1. -. q_k (d i) in
+ cov -. cov ** 2.
+ else
+ let union =
+ if is_proximal i j then d i
+ else if is_proximal j i then d j
+ else d i + d j
+ in
+ q_k union -. q_k (d i) *. q_k (d j)
+ in
+ let n_edges = Stree.top_id st' in
+ let var cov_fn k =
+ let cov_times_bl k i j =
+ cov_fn k i j *. bl i *. bl j
+ in
+ let diag = 0 --^ n_edges
+ |> Enum.map (fun i -> cov_times_bl k i i)
+ |> Enum.fold (+.) 0.
+ in
+ Uptri.init n_edges (cov_times_bl k)
+ |> Uptri.fold_left (+.) 0.
+ |> ( *.) 2.
+ |> (+.) diag
in
2 -- k_max
- |> Enum.map (identity &&& count)
+ |> Enum.map (fun k -> k, var cov_without_root k, var cov_with_root k)
View
10 pplacer_src/rppr_convex_taxids.ml
@@ -13,6 +13,7 @@ let of_refpkg rp =
|> Enum.map
(fun (rank, taxmap) ->
let sizemim, cutsetim = build_sizemim_and_cutsetim (taxmap, st) in
+ let top_sizem = IntMap.find top_id sizemim in
let cutsetim = IntMap.add (Stree.top_id st) ColorSet.empty cutsetim in
let unconvex_colors = IntMap.fold
(fun _ colors unconvex ->
@@ -20,13 +21,11 @@ let of_refpkg rp =
ColorSet.union unconvex colors)
cutsetim
ColorSet.empty
- and all_colors = IntMap.find top_id sizemim
- |> ColorMap.keys
- |> ColorSet.of_enum
+ and all_colors = ColorMap.keys top_sizem |> ColorSet.of_enum
and rank_name = Tax_taxonomy.get_rank_name td rank in
ColorSet.diff all_colors unconvex_colors
|> ColorSet.enum
- |> Enum.map (fun c -> [rank_name; Tax_id.to_string c]))
+ |> Enum.map (fun c -> rank_name, c, ColorMap.find c top_sizem))
|> Enum.flatten
@@ -45,8 +44,9 @@ object (self)
method action _ =
of_refpkg self#get_rp
+ |> Enum.map (fun (a, b, c) -> [a; Tax_id.to_string b; string_of_int c])
|> List.of_enum
- |> List.cons ["rank"; "tax_id"]
+ |> List.cons ["rank"; "tax_id"; "leaf_count"]
|> self#write_ll_tab
end
View
61 pplacer_src/rppr_reroot.ml
@@ -29,7 +29,7 @@ let find_root rp gt =
(Tax_taxonomy.list_mrca td |- some)
in
let rec aux: ?top_mrca:Tax_id.t -> stree -> unit = fun ?top_mrca -> function
- | Leaf _ as n -> raise (Found_root n)
+ | Leaf _ -> failwith "tried to reroot at a leaf"
| Node (_, subtrees) as n ->
(* walk the tree, and at each node, find the MRCA of all of the leaves
* below that node, then the rank of each MRCA. if we're not at the root
@@ -50,9 +50,9 @@ let find_root rp gt =
raise (Found_root n);
(* otherwise, descend toward the node with the highest MRCA rank. this
* will never ascend the tree, as the entry representing "the tree above
- * us" is None. *)
+ * us" is None. if the highest MRCA rank is a leaf, stop here. *)
match subrks with
- | (_, Some node) :: _ ->
+ | (_, Some (Node _ as node)) :: _ ->
(* find the MRCA of everything above the selected node in the entire
* tree by taking the MRCA of the previous MRCA and the other
* subtrees (which will then be above us at the next node). *)
@@ -83,31 +83,40 @@ object (self)
method desc = "reroots a given reference package in place"
method usage = "usage: reroot -c my.refpkg"
+ method private rerooted_tree lbl =
+ let rp = self#get_rp in
+ let gt = Refpkg.get_ref_tree rp in
+ let st = gt.Gtree.stree
+ and td = Refpkg.get_taxonomy rp in
+ (* first, determine the highest rank that has more than one tax_id. *)
+ let rank_tax_map = Convex.rank_tax_map_of_refpkg rp in
+ let rank, taxmap =
+ try
+ Enum.find
+ (snd |- IntMap.values |- TaxIdSet.of_enum |- TaxIdSet.cardinal |- (<) 1)
+ (IntMap.enum rank_tax_map)
+ with Not_found ->
+ dprint "ref tree has <= 1 taxon; writing unmodified tree\n";
+ Return.return lbl gt
+ in
+ Tax_taxonomy.get_rank_name td rank
+ |> dprintf "rerooting at %s\n";
+ (* next, find the convex subset of leaves at that rank. *)
+ let phi, _ = Convex.solve (taxmap, st) in
+ let not_cut = Convex.nodeset_of_phi_and_tree phi st in
+ (* reroot after pruning the tree down to that convex subset of leaves. *)
+ Gtree.get_bark_map gt
+ |> IntMap.filteri (flip IntSet.mem not_cut |> const |> flip)
+ |> Gtree.set_bark_map gt
+ |> find_root rp
+ |> tap (dprintf "root found at node %d\n")
+ |> Gtree.reroot gt
+
method action = function
| [] ->
- let rp = self#get_rp in
- let gt = Refpkg.get_ref_tree rp in
- let st = gt.Gtree.stree
- and td = Refpkg.get_taxonomy rp in
- (* first, determine the highest rank that has more than one tax_id. *)
- let rank_tax_map = Convex.rank_tax_map_of_refpkg rp in
- let rank, taxmap = Enum.find
- (snd |- IntMap.values |- TaxIdSet.of_enum |- TaxIdSet.cardinal |- (<) 1)
- (IntMap.enum rank_tax_map)
- in
- Tax_taxonomy.get_rank_name td rank
- |> dprintf "rerooting at %s\n";
- (* next, find the convex subset of leaves at that rank. *)
- let phi, _ = Convex.solve (taxmap, st) in
- let not_cut = Convex.nodeset_of_phi_and_tree phi st in
- (* reroot after pruning the tree down to that convex subset of leaves. *)
- Gtree.get_bark_map gt
- |> IntMap.filteri (flip IntSet.mem not_cut |> const |> flip)
- |> Gtree.set_bark_map gt
- |> find_root rp
- |> tap (dprintf "root found at node %d\n")
- |> Gtree.reroot gt
- |> flip Newick_gtree.to_file (self#single_file ())
+ Newick_gtree.to_file
+ (Return.label self#rerooted_tree)
+ (self#single_file ())
| _ -> failwith "reroot doesn't take any positional arguments"
View
2  pplacer_src/stree.ml
@@ -117,7 +117,7 @@ let parent_map t =
let reroot tree root =
if top_id tree = root then tree else
let rec aux = function
- | [] -> failwith "root not found"
+ | [] -> failwith "root not found (tried to reroot at leaf?)"
| (Node (i, subtrees), path) :: _ when i = root ->
(i, subtrees) :: path
| (Leaf _, _) :: rest -> aux rest
View
16 tests/data/misc/test_rarefaction.jplace
@@ -0,0 +1,16 @@
+{"tree": "((A:2,B:3):1,C:4);", "placements":
+[
+{"p": [[0, -1309.830000, 1.000000, 1.000000, 10.000000]], "n": ["a"]},
+{"p": [[1, -1309.830000, 1.000000, 1.000000, 10.000000]], "n": ["b"]},
+{"p": [[3, -1309.830000, 1.000000, 1.000000, 10.000000]], "n": ["c"]}
+], "metadata":
+{"invocation": "by hand"
+}, "version":
+3, "fields":
+["edge_num",
+"likelihood",
+"like_weight_ratio",
+"distal_length",
+"pendant_length"
+]
+}
View
46 tests/guppy/test_rarefact.ml
@@ -2,25 +2,57 @@ open Ppatteries
open OUnit
open Test_util
+let check_triples =
+ Enum.iter2
+ (fun (a1, b1, c1) (a2, b2, c2) ->
+ (Printf.sprintf "unequal (%d(%g, %g) and %d(%g, %g))"
+ a1 b1 c1 a2 b2 c2)
+ @? (a1 = a2 && b1 =~ b2 && c1 =~ c2))
+
+let check_quipples =
+ Enum.iter2
+ (fun (a1, b1, c1, d1) (a2, b2, c2, d2) ->
+ (Printf.sprintf "unequal (%d(%g, %g, %g) and %d(%g, %g, %g))"
+ a1 b1 c1 d1 a2 b2 c2 d2)
+ @? (a1 = a2 && b1 =~ b2 && c1 =~ c2 && d1 =~ d2))
+
let suite = [
"test_simple" >:: begin fun () ->
placerun_of_dir "simple" "test1"
|> Rarefaction.of_placerun Placement.ml_ratio
- |> check_map_approx_equal
- "unequal (%d(%g) and %d(%g))"
+ |> check_quipples
(List.enum [
- 2, 8.;
+ 2, 8., 15., 2.;
])
end;
"test_multi" >:: begin fun () ->
placerun_of_dir "multi" "test1and3"
|> Rarefaction.of_placerun Placement.ml_ratio
- |> check_map_approx_equal
- "unequal (%d(%g) and %d(%g))"
+ |> check_quipples
+ (List.enum [
+ 2, 6.66667, 12.33333, 1.66667;
+ 3, 10., 15., 2.22222;
+ ])
+ end;
+
+ "test_hand_mean" >:: begin fun () ->
+ placerun_of_dir "misc" "test_rarefaction"
+ |> Rarefaction.of_placerun Placement.ml_ratio
+ |> check_quipples
+ (List.enum [
+ 2, 4.66667, 5., 1.16667;
+ 3, 7., 7., 1.55556;
+ ])
+ end;
+
+ "test_hand_variance" >:: begin fun () ->
+ placerun_of_dir "misc" "test_rarefaction"
+ |> Rarefaction.variance_of_placerun Placement.ml_ratio
+ |> check_triples
(List.enum [
- 2, 6.66667;
- 3, 10.;
+ 2, 1.55556, 0.66667;
+ 3, 0., 0.;
])
end;
View
37 tests/rppr/test_convex_taxids.ml
@@ -4,14 +4,17 @@ open Ppatteries
let printer sll =
let ch = IO.output_string () in
- Printf.fprintf ch "%a" (List.print (List.print String.print)) sll;
+ Printf.fprintf ch "%a"
+ (List.print (Tuple3.printn String.print String.print Int.print))
+ sll;
IO.close_out ch
let make_test tree expected () =
simple_refpkg tree
|> Rppr_convex_taxids.of_refpkg
+ |> Enum.map (Tuple3.map2 Tax_id.to_string)
|> List.of_enum
- |> List.sort (List.make_compare String.compare)
+ |> List.sort (comparing Tuple3.first)
|> assert_equal ~printer expected
@@ -19,36 +22,36 @@ let suite = [
"convex_tree" >:: make_test
"((C_1,D_1),(F_1,G_1));"
[
- ["family"; "A"];
- ["genus"; "B"];
- ["genus"; "E"];
- ["species"; "C"];
- ["species"; "D"];
- ["species"; "F"];
- ["species"; "G"];
+ "family", "A", 4;
+ "genus", "B", 2;
+ "genus", "E", 2;
+ "species", "C", 1;
+ "species", "D", 1;
+ "species", "F", 1;
+ "species", "G", 1;
];
"nonconvex_species_tree" >:: make_test
"((C_1,D_1),(C_2,D_2));"
[
- ["family"; "A"];
- ["genus"; "B"];
+ "family", "A", 4;
+ "genus", "B", 4;
];
"one_convex_species_tree" >:: make_test
"((C_1,(F_1,D_1)),(C_2,D_2));"
[
- ["family"; "A"];
- ["genus"; "B"];
- ["genus"; "E"];
- ["species"; "F"];
+ "family", "A", 5;
+ "genus", "B", 4;
+ "genus", "E", 1;
+ "species", "F", 1;
];
"one_convex_species_with_more_discordance_tree" >:: make_test
"((C_1,(F_1,G_1)),(C_2,G_2));"
[
- ["family"; "A"];
- ["species"; "F"];
+ "family", "A", 5;
+ "species", "F", 1;
];
]
Please sign in to comment.
Something went wrong with that request. Please try again.