From 2b02c62717c32ec63909852955a28036d800a076 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Tue, 26 Oct 2010 18:28:53 -0400 Subject: [PATCH] CamlPaml.PhyloEM: a bit of cleanup --- lib/CamlPaml/PhyloEM.ml | 17 ++--------------- lib/CamlPaml/PhyloEM.mli | 31 ++++++++++++++++++------------- 2 files changed, 20 insertions(+), 28 deletions(-) diff --git a/lib/CamlPaml/PhyloEM.ml b/lib/CamlPaml/PhyloEM.ml index 08421ec..8125735 100644 --- a/lib/CamlPaml/PhyloEM.ml +++ b/lib/CamlPaml/PhyloEM.ml @@ -134,21 +134,8 @@ let d_ell_dQ_dxi inst ss i = tot := !tot +. ssbrab *. dPt_dxi_a.(b) /. pa.{b} !tot -let d_ell_dQ_dx inst ss = Array.init (Array.length (PM.P14n.p14n inst).PM.P14n.q_domains) (d_ell_dQ_dxi inst ss) - -(* numerical estimate - for debugging *) -let nd_ell_dQ_dx inst ss = - let q_settings = PM.P14n.q_settings inst - let np = Array.length (PM.P14n.p14n inst).PM.P14n.q_domains - Array.init np - fun i -> - let f x = - let settings = Array.copy q_settings - settings.(i) <- x - let xinst = PM.P14n.update ~q_settings:q_settings inst - ell (PM.P14n.model xinst) ss - let { Gsl_fun.res = res } = Gsl_diff.central f q_settings.(i) - res +let d_ell_dQ_dx inst ss = + Array.init (Array.length (PM.P14n.p14n inst).PM.P14n.q_domains) (d_ell_dQ_dxi inst ss) let d_ell_dbranch inst br ss = let m = PM.P14n.model inst diff --git a/lib/CamlPaml/PhyloEM.mli b/lib/CamlPaml/PhyloEM.mli index 29280a1..f343d76 100644 --- a/lib/CamlPaml/PhyloEM.mli +++ b/lib/CamlPaml/PhyloEM.mli @@ -1,23 +1,29 @@ -(** supporting functions for estimating the parameters of phylogenetic models by expectation-maximization (EM) *) +(** supporting functions for estimating the parameters of phylogenetic models by +expectation-maximization (EM) *) (** {1 E-step} *) -(** sufficient statistics for the E-step, composed of estimates of how many times each character-character substitution occurred on each branch of the tree (summed over all sites) -*) +(** sufficient statistics for the E-step, composed of estimates of how many times each +character-character substitution occurred on each branch of the tree (summed over all sites) *) type sufficient_statistics = float array array array val new_sufficient_statistics : PhyloModel.t -> sufficient_statistics -(** compute the E-step statistics for one site, given the leaves (as would be given to {{:PhyloModel}PhyloModel.prepare_lik}). Update the given [sufficient_statistics] and return the probability of the leaves under the model. This should be called for each site to collect the statistics for the whole alignment. *) +(** compute the E-step statistics for one site, given the leaves (as would be given to +{{:PhyloModel}PhyloModel.prepare_lik}). Update the given [sufficient_statistics] and return the +probability of the leaves under the model. This should be called for each site to collect the +statistics for the whole alignment. *) val collect_sufficient_statistics : ?workspace:PhyloLik.workspace -> PhyloModel.t -> PhyloLik.leaf array -> sufficient_statistics -> float -(** any entry in the sufficient statistics less than [tol] is set to zero. useful if e.g. planning to compress the sufficient statistics for transport between compute nodes *) +(** any entry in the sufficient statistics less than [tol] is set to zero. useful if e.g. planning +to compress the sufficient statistics for transport between compute nodes *) val clean_sufficient_statistics : ?tol:float -> sufficient_statistics -> unit (** {1 M-step} *) -(** expected log-likelihood of the model, where the expectation is over the soft assignment of the ancestral characters represented by the sufficient statistics of the E-step. (objective function of the M-step, sometimes called the Q function) -*) +(** expected log-likelihood of the model, where the expectation is over the soft assignment of the +ancestral characters represented by the sufficient statistics of the E-step. (objective function of +the M-step, sometimes called the Q function) *) val ell : PhyloModel.t -> sufficient_statistics -> float (** compute the portion of the ell arising from the prior over the ancestral sequence *) @@ -29,16 +35,15 @@ val branches_ell : PhyloModel.t -> sufficient_statistics -> float (** compute the portion of the ell arising from the substitutions on a specific branch*) val branch_ell : PhyloModel.t -> sufficient_statistics -> int -> float -(** compute the gradient of the ell with respect to the variables in the model's rate matrix p14n, based on their symbolic expressions *) +(** compute the gradient of the ell with respect to the variables in the model's rate matrix p14n, +based on their symbolic expressions *) val d_ell_dQ_dx : PhyloModel.P14n.instance -> sufficient_statistics -> float array -(** numerically estimate the same *) -val nd_ell_dQ_dx : PhyloModel.P14n.instance -> sufficient_statistics -> float array - -(** compute the derivative with respect to one specific rate matrix variable only *) +(** compute the partial derivative with respect to one specific rate matrix variable only *) val d_ell_dQ_dxi : PhyloModel.P14n.instance -> sufficient_statistics -> int -> float -(** compute the gradient with respect to the variables in the model's p14n for branch lengths, based on their symbolic expressions*) +(** compute the gradient with respect to the variables in the model's p14n for branch lengths, based +on their symbolic expressions*) val d_ell_dtree : PhyloModel.P14n.instance -> sufficient_statistics -> float array (** compute the derivative with respect to one specific branch length variable only *)