Permalink
Browse files

rename CamlPaml.Infer -> CamlPaml.PhyloLik

  • Loading branch information...
1 parent b12e634 commit 6bb5edaa2158394897c3bac5cca7007650ee62d7 @mlin committed Sep 6, 2010
@@ -4,10 +4,10 @@ NewickLexer
T
P
Q
-Infer
Expr
Fit
PhyloModel
+PhyloLik
PhyloEM
Code
Parsimony
@@ -2,10 +2,10 @@ Code
Newick
T
Parsimony
-Expr
+PhyloModel
P
Q
-PhyloModel
+Expr
+PhyloLik
PhyloEM
-Infer
Fit
View
@@ -11,12 +11,12 @@ let new_sufficient_statistics m =
let t0 = PM.tree m
Array.init (T.size t0 - 1) (fun _ -> Array.make_matrix n n 0.)
-let collect_sufficient_statistics m leaves ss =
- let inter = PM.infer m leaves
- let lik = Infer.likelihood inter
+let collect_sufficient_statistics ?workspace m leaves ss =
+ let inter = PM.prepare_lik ?workspace m leaves
+ let lik = PhyloLik.likelihood inter
let t = PM.tree m
for i = 0 to T.root t - 1 do
- Infer.add_branch_posteriors inter i ss.(i)
+ PhyloLik.add_branch_posteriors inter i ss.(i)
lik
let clean_sufficient_statistics ?(tol=1e-6) ss =
View
@@ -8,8 +8,8 @@ type sufficient_statistics = float array array array
val new_sufficient_statistics : PhyloModel.t -> sufficient_statistics
-(** compute the E-step statistics for one site, given the leaves (as would be given to {{:PhyloModel}PhyloModel.infer}). Update the given [sufficient_statistics] and return the probability of the leaves under the model. This should be called for each site to collect the statistics for the whole alignment. *)
-val collect_sufficient_statistics : PhyloModel.t -> Infer.leaf array -> sufficient_statistics -> float
+(** compute the E-step statistics for one site, given the leaves (as would be given to {{:PhyloModel}PhyloModel.prepare_lik}). Update the given [sufficient_statistics] and return the probability of the leaves under the model. This should be called for each site to collect the statistics for the whole alignment. *)
+val collect_sufficient_statistics : ?workspace:PhyloLik.workspace -> PhyloModel.t -> PhyloLik.leaf array -> sufficient_statistics -> float
(** any entry in the sufficient statistics less than [tol] is set to zero. useful if e.g. planning to compress the sufficient statistics for transport between compute nodes *)
val clean_sufficient_statistics : ?tol:float -> sufficient_statistics -> unit
@@ -18,21 +18,29 @@ let get_leaf k leaf = match leaf with
| `Distribution ar -> Gsl_vector.of_array ar
| `Marginalize -> hashcons_raw_marg k
+type workspace = {
+ mutable generation : int;
+ data : Gsl_matrix.matrix
+}
+
type intermediate = {
tree : T.t;
pms : Gsl_matrix.matrix array;
leaves : leaf array;
- alpha : Gsl_matrix.matrix;
+
+ workspace : workspace;
+ my_generation : int;
+
+ alpha : Gsl_matrix.matrix; (** actually a sub-matrix of workspace.data *)
mutable z : float;
mutable have_alpha : bool;
- beta : Gsl_matrix.matrix;
- mutable have_beta : bool
+ beta : Gsl_matrix.matrix; (** actually a sub-matrix of workspace.data *)
+ mutable have_beta : bool;
}
-type workspace = Gsl_matrix.matrix
let new_workspace tree dim =
let rows = 2 * (T.size tree) - (T.leaves tree)
- Gsl_matrix.create rows dim
+ { generation = min_int; data = Gsl_matrix.create rows dim }
let empty = [||]
let prepare ?workspace tree pms prior leaves =
@@ -44,14 +52,17 @@ let prepare ?workspace tree pms prior leaves =
if Array.length pms < n-1 then invalid_arg "CamlPaml.Infer.prepare: not enough P matrices"
let workspace = match workspace with Some x -> x | None -> new_workspace tree k
- let rows,cols = Gsl_matrix.dims workspace
+ workspace.generation <- (if workspace.generation = max_int then min_int else workspace.generation+1)
+
+ let rows,cols = Gsl_matrix.dims workspace.data
if rows < (2*n-nl) || cols <> k then invalid_arg "CamlPaml.Infer.prepare: inappropriate workspace dimensions"
- let alpha = Bigarray.Array2.sub_left workspace 0 (n-nl)
- let beta = Bigarray.Array2.sub_left workspace (n-nl) n
+ let alpha = Bigarray.Array2.sub_left workspace.data 0 (n-nl)
+ let beta = Bigarray.Array2.sub_left workspace.data (n-nl) n
for a = 0 to k-1 do beta.{n-1,a} <- prior.(a)
- { tree = tree; pms = pms; leaves = leaves; alpha = alpha; z = nan; have_alpha = false; beta = beta; have_beta = false }
+ { tree = tree; pms = pms; leaves = leaves; workspace = workspace; my_generation = workspace.generation;
+ alpha = alpha; z = nan; have_alpha = false; beta = beta; have_beta = false }
(* Inside algo (aka Felsenstein algo.) *)
let alpha_get x br =
@@ -106,10 +117,12 @@ let ensure_beta x =
x.have_beta <- true
let likelihood x =
+ if x.workspace.generation <> x.my_generation then failwith "CamlPaml.PhyloLik.likelihood: invalidated workspace"
ensure_alpha x
x.z
let node_posterior inferred i =
+ if inferred.workspace.generation <> inferred.my_generation then failwith "CamlPaml.PhyloLik.node_posterior: invalidated workspace"
ensure_beta inferred
let k = snd (Gsl_matrix.dims inferred.alpha)
if inferred.z = 0. then
@@ -123,6 +136,7 @@ let node_posterior inferred i =
Array.init k (fun x -> (alpha_get inferred i).{x} *. inferred.beta.{i,x} /. inferred.z)
let add_branch_posteriors ?(weight=1.0) inferred branch ecounts =
+ if inferred.workspace.generation <> inferred.my_generation then failwith "CamlPaml.PhyloLik.add_branch_posterior: invalidated workspace"
ensure_beta inferred
let k = snd (Gsl_matrix.dims inferred.alpha)
@@ -1,29 +1,31 @@
-(** Probabilistic inference on phylogenetic trees given the substitution (P) matrices on each branch. These are low-level routines that should usually be used through the higher-level {{:PhyloModel}[PhyloModel]} abstractions. *)
+(** core phylogenetic likelihood calculations
-type intermediate
+These should usually be accessed through the higher-level {{:PhyloModel}[PhyloModel]} abstractions. *)
(** Specifying a leaf (extant) character. Usually the extant character is known with certainty; in this case, use [`Certain] with the index of the character, e.g. [`Certain (Code.Codon61.index ('A','T','G'))]. Alternatively, you can specify an arbitrary probability distribution over extant characters. Lastly, you can specify to marginalize a leaf out of the likelihood calculations entirely. *)
type leaf = [`Certain of int | `Distribution of float array | `Marginalize]
type workspace
-(** The calculations use a workspace of [((2 * T.size tree - T.leaves tree) * k)] [float]s where [k] is the alphabet size. As a performance optimization, you can create a workspace with [new_workspace tree k] and use it across multiple calls to [prepare]; otherwise it will allocate by itself. *)
+(** The calculations use a workspace of [((2 * T.size tree - T.leaves tree) * k)] [float]s where [k] is the alphabet size. As a performance optimization, you can create a workspace with [new_workspace tree k] and use it across multiple calls to [prepare]; otherwise, it will allocate automatically. *)
val new_workspace : T.t -> int -> workspace
-(** [prepare tree p_matrices root_prior leaves] infers ancestral states, given:
+type intermediate
+
+(** [prepare tree p_matrices root_prior leaves] initializes the likelihood calculations, given:
- [tree] the phylogenetic tree
-- [p_matrices.(i)] is the substitution matrix for the branch leading TO node [i] FROM its parent.
+- [p_matrices.(i)] is the substitution matrix for the branch leading {e to} node [i] {e from} its parent.
- [root_prior] the prior probability distribution over characters at the root.
- [leaves] is an array of [leaf]s (see above), the appropriate number for the tree
-- [workspace] is an appropriately sized workspace; it will be allocated if not given.
-@return an abstract value from which various information about ancestral states can be extracted (see below). If using a shared workspace, be sure to get all the results you need before the next call to [prepare].
+- [workspace] is an appropriately sized workspace; one will be allocated if not given.
+@return an abstract value from which various information about ancestral states can be extracted (see below). The time-consuming calculations are not actually performed until needed. If using a shared workspace, be sure to get all the results you need before the next call to [prepare].
*)
val prepare : ?workspace:workspace -> T.t -> P.matrix array -> float array -> leaf array -> intermediate
(** calculate the probability of the leaves under the substitution model *)
val likelihood : intermediate -> float
-(** compute the posterior probability distribution over characters at the specified node *)
+(** compute the posterior probability distribution over characters at the specified node. *)
val node_posterior : intermediate -> int -> float array
(** [branch_posteriors intermediate k] computes the posterior probability of each possible substitution on the specified branch [k]. That is, entry [(i,j)] of the returned matrix is [P(parent(k) = i && k = j | Leaves)] *)
@@ -29,7 +29,7 @@ let q { qms = qms } br = qms.(br)
let p { pms = pms } br = pms.(br) (* Array.map Array.copy pms.(br) *)
let prior { prior = rp } = Array.copy rp
-let infer ?workspace m leaves = Infer.prepare ?workspace m.tree m.pms m.prior leaves
+let prepare_lik ?workspace m leaves = PhyloLik.prepare ?workspace m.tree m.pms m.prior leaves
let checksum = 1., 1e-6
@@ -25,13 +25,13 @@ val prior : t -> float array
*)
val simulate : t -> (?a:(int array) -> unit -> int array)
-(** Probabilistically infer ancestral characters based on the given configuration of extant characters (leaves).
+(** Prepare likelihood calculations for the given configuration of extant characters (leaves).
-@param workspace (optional) a preallocated workspace for [Infer.prepare]
+@param workspace (optional) a preallocated workspace for {{:PhyloLik}PhyloLik.prepare}
-@return {{:Infer}intermediate values}, from which additional information can be extracted.
+@return {{:PhyloLik}PhyloLik.intermediate} values, from which additional information can be extracted.
*)
-val infer : ?workspace:Infer.workspace -> t -> Infer.leaf array -> Infer.intermediate
+val prepare_lik : ?workspace:PhyloLik.workspace -> t -> PhyloLik.leaf array -> PhyloLik.intermediate
(** {1 Symbolic parameterizations}
View
@@ -52,11 +52,11 @@ let lpr_leaves inst leaves t =
let ts = PM.P14n.tree_settings inst
ts.(0) <- t
let inst = PM.P14n.update ~tree_settings:ts inst
- let workspace = Infer.new_workspace (PM.tree (PM.P14n.model inst)) Codon.dim
+ let workspace = PhyloLik.new_workspace (PM.tree (PM.P14n.model inst)) Codon.dim
let lik = ref 0.
leaves |> Array.iter
fun lvs ->
- let lvslik = Infer.likelihood (PM.infer ~workspace:workspace (PM.P14n.model inst) lvs)
+ let lvslik = PhyloLik.likelihood (PM.prepare_lik ~workspace:workspace (PM.P14n.model inst) lvs)
lik := !lik +. log lvslik
!lik, inst

0 comments on commit 6bb5eda

Please sign in to comment.