Permalink
Browse files

Merge branch 'dev' into 215-biom-support

  • Loading branch information...
2 parents 52029c1 + 76cbd02 commit 63c06c62947d95d21c156c240853fd7ad0be81e5 @habnabit habnabit committed Aug 17, 2012
View
@@ -4,11 +4,16 @@
* Closed GH-181: Added `guppy indep_c`; added `--leaf-values` to `guppy mft`.
* Closed GH-256: Added `--mmap-file` to pplacer; changed `--pretend` in
pplacer to show the amount of memory which would be allocated.
+ * Closed GH-260: Added `rooted_pd` to `guppy fpd`; Added a `--variance` flag
+ to `guppy rarefact`.
* Closed GH-261: Added `rppr convex_taxids`.
* Closed GH-263: Fixed a bug that could sometimes cause failures in `rppr
min_adcl` with the PAM algorithm.
* Closed GH-265: Fixed a bug that could cometimes cause `rppr reroot` to
return a suboptimal rooting.
+ * Closed GH-266: Adding `map_ratio` and `map_overlap` columns to `guppy
+ to_csv`.
+ * Closed GH-267: Added a `leaf_count` column to `rppr convex_taxids`.
1.1.alpha13
View
@@ -103,16 +103,16 @@ let median l =
aux (l, l)
let verbosity = ref 1
-let dprintf ?(l = 1) ?(flush = true) fmt =
+let dfprintf ?(l = 1) ?(flush = true) ch fmt =
if !verbosity >= l then begin
- finally (if flush then flush_all else identity) (Printf.printf fmt)
+ finally (if flush then flush_all else identity) (Printf.fprintf ch fmt)
end else
Printf.ifprintf IO.stdnull fmt
-let dprint ?(l = 1) ?(flush = true) s =
- if !verbosity >= l then begin
- print_string s;
- if flush then flush_all ();
- end
+let dprintf ?l ?flush = dfprintf ?l ?flush stdout
+let deprintf ?l ?flush = dfprintf ?l ?flush stderr
+let dfprint ?l ?flush ch s = dfprintf ?l ?flush ch "%s" s
+let dprint ?l ?flush = dfprint ?l ?flush stdout
+let deprint ?l ?flush = dfprint ?l ?flush stderr
let align_with_space =
List.map (Tuple3.map3 ((^) " ")) |- Arg.align
@@ -686,3 +686,36 @@ let () =
fn
op)
| _ -> None)
+
+let memory_stats_ch =
+ match begin
+ try Some (Sys.getenv "PPLACER_MEMORY_STATS")
+ with Not_found -> None
+ end with
+ | None -> None
+ | Some statsfile ->
+ let ch = Legacy.open_out_gen
+ [Open_append; Open_creat; Open_trunc]
+ 0o600
+ statsfile
+ in
+ let write = Csv.to_channel ch |> Csv.output_record in
+ let word_in_bytes = Sys.word_size / 8 in
+ let start = Unix.gettimeofday () in
+ let last_top = ref None in
+ write ["time"; "pid"; "top_heap_bytes"];
+ let check_stats () =
+ let stats = Gc.quick_stat () in
+ match !last_top with
+ | Some top when stats.Gc.top_heap_words <= top -> ()
+ | _ ->
+ write
+ [Printf.sprintf "%f" (Unix.gettimeofday () -. start);
+ string_of_int (Unix.getpid ());
+ string_of_int (stats.Gc.top_heap_words * word_in_bytes)];
+ Legacy.flush ch;
+ last_top := Some stats.Gc.top_heap_words
+ in
+ let _ = Gc.create_alarm check_stats in
+ at_exit check_stats;
+ Some ch
@@ -3,7 +3,9 @@
By default, ``guppy fpd`` outputs a matrix containing in each row: * the
placefile name, the phylogenetic entropy (``phylo_entropy``, `Allen 2009`_),
the quadratic entropy (``quadratic``, `Rao 1982`_, `Warwick and Clark 1995`_)
-phylogenetic diversity (``pd``, `Faith 1992`_), and a new diversity metric
+phylogenetic diversity (``unrooted_pd``, `Faith 1992`_), phylogenetic diversity
+which only requires distal mass (``rooted_pd``, this is as oppposed to ``pd``
+requiring both distal and proximal mass), and a new diversity metric
generalizing PD to incorporate abundance: abundance-weighted phylogenetic
diversity (``awpd``).
@@ -1,3 +1,13 @@
Calculate phylogenetic rarefaction curves.
+For every :math:`k \in [2, n]`, where n is the number of pqueries in a
+placefile, subsample the given placefile to contain every combination of
+:math:`k` pqueries and calculate the mean and variance of phylogenetic divesity
+for all of these subsampled placefiles.
+
+The ``rooted_mean`` and ``rooted_variance`` columns are respectively the mean
+and variance of the phylogenetic diversity only requiring mass on the distal
+side of the edge (which normally requires mass proximal and distal to the
+edge).
+
*Experimental.*
View
@@ -579,8 +579,8 @@ let rank_tax_map_of_refpkg rp =
Array.iteri
(fun rank rankname ->
if not (IntMap.mem rank m) then
- dprintf "warning: rank %s not represented in the lineage of any \
- sequence in reference package %s.\n"
+ deprintf "warning: rank %s not represented in the lineage of any \
+ sequence in reference package %s.\n"
rankname
(Refpkg.get_name rp))
td.Tax_taxonomy.rank_names)
View
@@ -47,14 +47,17 @@ let total_along_mass ?(include_pendant = false) criterion pr cb =
* we're either before or after the induced tree and the multiplier should
* be 0. *)
let bump_function r =
- if r = 0. || approx_equal ~epsilon r 1. then 0. else 1.
+ if r =~ 0. || approx_equal ~epsilon r 1. then 0. else 1.
-let pd_of_placerun ?include_pendant criterion pr =
+let bump_with_root r =
+ if r =~ 0. then 0. else 1.
+
+let pd_of_placerun ?include_pendant ?(bump = bump_function) criterion pr =
total_along_mass
?include_pendant
criterion
pr
- (fun r bl -> bump_function r *. bl)
+ (fun r bl -> bump r *. bl)
let reflect x =
if approx_equal ~epsilon x 1. then 0.
@@ -113,17 +116,19 @@ object (self)
and include_pendant = fv include_pendant in
let awpd = awpd_of_placerun ~include_pendant criterion
and pd = pd_of_placerun ~include_pendant criterion
+ and rpd = pd_of_placerun ~include_pendant ~bump:bump_with_root criterion
and entropy = entropy_of_placerun ~include_pendant criterion in
prl
|> List.map
(fun pr ->
let pe, qe = entropy pr in
- [pe; qe; pd pr; awpd 1. pr]
+ [pe; qe; pd pr; rpd pr; awpd 1. pr]
|> (flip List.append (List.map (flip awpd pr) exponents))
|> List.map (Printf.sprintf "%g")
|> List.cons (Placerun.get_name pr))
|> List.cons
- (["placerun"; "phylo_entropy"; "quadratic"; "pd"; "awpd"]
+ (["placerun"; "phylo_entropy"; "quadratic"; "unrooted_pd";
+ "rooted_pd"; "awpd"]
@ List.map (Printf.sprintf "awpd_%g") exponents)
|> self#write_ll_tab
@@ -9,21 +9,52 @@ object (self)
inherit placefile_cmd () as super_placefile
inherit tabular_cmd () as super_tabular
+ val variance = flag "--variance"
+ (Plain (false, "Calculate variance of phylogenetic entropy."))
+
method specl =
super_mass#specl
@ super_tabular#specl
+ @ [toggle_flag variance]
method desc = "calculates phylogenetic rarefaction curves"
method usage = "usage: rarefact [options] placefile"
method private placefile_action = function
| [pr] ->
let criterion = self#criterion in
+ let is_uniform_mass =
+ Placerun.get_pqueries pr
+ |> List.map (Pquery.namlom |- List.map snd)
+ |> List.flatten
+ |> List.sort_unique (<~>)
+ |> List.length
+ |> (=) 1
+ and fmt = Printf.sprintf "%g" in
+ if not is_uniform_mass then begin
+ if fv variance then
+ failwith "not all sequences have uniform weight; variance can't be \
+ calculated";
+ deprint "warning: not all sequences have uniform weight; expectation \
+ of quadratic entropy can't be calculated\n"
+ end;
Rarefaction.of_placerun criterion pr
- |> Enum.map (fun (a, b) -> [string_of_int a; Printf.sprintf "%g" b])
- |> List.of_enum
- |> List.cons ["k"; "r"]
- |> self#write_ll_tab
+ |> Enum.map
+ (fun (k, um, rm, qm) ->
+ [string_of_int k; fmt um; fmt rm]
+ @ (if is_uniform_mass then [fmt qm] else []))
+ |> begin
+ if fv variance then
+ curry Enum.combine (Rarefaction.variance_of_placerun criterion pr)
+ |- Enum.map (fun ((_, uv, rv), sl) -> sl @ [fmt uv; fmt rv])
+ else identity
+ end
+ |> List.of_enum
+ |> List.cons
+ (["k"; "unrooted_mean"; "rooted_mean"]
+ @ (if is_uniform_mass then ["quadratic_mean"] else [])
+ @ (if fv variance then ["unrooted_variance"; "rooted_variance"] else []))
+ |> self#write_ll_tab
| l ->
List.length l
@@ -35,7 +35,7 @@ object (self)
|> List.cons
["origin"; "name"; "multiplicity"; "edge_num"; "like_weight_ratio";
"post_prob"; "likelihood"; "marginal_like"; "distal_length";
- "pendant_length"; "classification"]
+ "pendant_length"; "classification"; "map_ratio"; "map_overlap"]
|> self#write_ll_tab
end
@@ -116,13 +116,18 @@ class virtual ['a] process child_func =
(* Only these descriptors are used in the child. The parent has no use for
* them, so it closes them. The child has no use for anything but them, so
* it closes everything but them. *)
- let child_only = [child_rd; parent_wr; progress_wr]
- in
+ let child_only = [child_rd; parent_wr; progress_wr] in
+ let () = flush_all () in
let pid = match Unix.fork () with
| 0 ->
(* Do the actual closing of the irrelevant descriptors. *)
begin
let ignored = List.map fd_of_file_descr child_only in
+ let ignored = match Ppatteries.memory_stats_ch with
+ | None -> ignored
+ | Some ch ->
+ fd_of_file_descr (Unix.descr_of_out_channel ch) :: ignored
+ in
List.iter
(fun fd -> if not (List.mem fd ignored) then quiet_close fd)
(range 256)
View
@@ -134,31 +134,33 @@ let placement_of_str str =
(* *** WRITING *** *)
(* usual output *)
-let opt_to_str f = function
- | Some x -> f x
- | None -> "-"
-
let string_of_8gfloat = Printf.sprintf "%8g"
let string_of_gfloat = Printf.sprintf "%g"
-let to_strl_gen fint ffloat ffloato ftaxido place =
+let to_strl_gen fint ffloat ftaxid default place =
+ let map_ratio, map_overlap =
+ Option.map_default (Tuple2.mapn some some) (None, None) place.map_identity
+ (* eta expansion !! *)
+ and fopt f xo = Option.map_default f default xo in
[
fint place.location;
ffloat place.ml_ratio;
- ffloato place.post_prob;
+ fopt ffloat place.post_prob;
ffloat place.log_like;
- ffloato place.marginal_prob;
+ fopt ffloat place.marginal_prob;
ffloat place.distal_bl;
ffloat place.pendant_bl;
- ftaxido place.classif;
+ fopt ftaxid place.classif;
+ fopt ffloat map_ratio;
+ fopt fint map_overlap;
]
let to_strl =
to_strl_gen
string_of_int
string_of_8gfloat
- (opt_to_str string_of_8gfloat)
- (opt_to_str Tax_id.to_string)
+ Tax_id.to_string
+ "-"
let to_str place = String.concat "\t" (to_strl place)
@@ -255,14 +257,9 @@ let of_json fields a =
}
(* CSV *)
-let opt_to_csv_str f = function
- | Some x -> f x
- | None -> "NA"
-
let to_csv_strl =
to_strl_gen
string_of_int
string_of_gfloat
- (opt_to_csv_str string_of_gfloat)
- (opt_to_csv_str Tax_id.to_string)
-
+ Tax_id.to_string
+ "NA"
Oops, something went wrong.

0 comments on commit 63c06c6

Please sign in to comment.