From 3b5bd0e0ee3442c79001d3fa6852c9b66a08a724 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Mon, 14 Apr 2014 20:49:53 -0700 Subject: [PATCH] CamlPaml.PhyloLik.node_posterior: avoid unnecessary computation of all betas when computing posterior distribution for the root. This commit approximately doubles the speed of PhyloCSF, since it was calling node_posterior on the root to compute the ancestral composition score. --- lib/CamlPaml/PhyloLik.ml | 20 +++++++++++--------- src/test.ml | 6 ++++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/lib/CamlPaml/PhyloLik.ml b/lib/CamlPaml/PhyloLik.ml index 6157f59..b1c12e1 100644 --- a/lib/CamlPaml/PhyloLik.ml +++ b/lib/CamlPaml/PhyloLik.ml @@ -112,7 +112,10 @@ let ensure_beta x = inter.{a} <- bp.{a} *. (Gsl.Blas.dot (Gsl.Matrix.row ss a) xas) for b = 0 to k-1 do - for a = 0 to k-1 do ps_colb.{a} <- ps.{a,b} + for a = 0 to k-1 do + (* idea: instead of this loop it'd probably be a little faster + to transpose ps *) + Bigarray.Array1.unsafe_set ps_colb a (Bigarray.Array2.unsafe_get ps a b) x.beta.{i,b} <- Gsl.Blas.dot inter ps_colb x.have_beta <- true @@ -123,17 +126,16 @@ let likelihood x = let node_posterior inferred i = if inferred.workspace.generation <> inferred.my_generation then failwith "CamlPaml.PhyloLik.node_posterior: invalidated workspace" - ensure_beta inferred + ensure_alpha inferred let k = snd (Gsl.Matrix.dims inferred.alpha) - if inferred.z = 0. then + if inferred.z = 0. then (* the data were impossible by the model *) Array.make k 0. + else if i < T.leaves inferred.tree then (* leaf: usually certain *) + Gsl.Vector.to_array (alpha_get inferred i) else - let nleaves = T.leaves inferred.tree - - if i < nleaves then - Gsl.Vector.to_array (alpha_get inferred i) - else - Array.init k (fun x -> (alpha_get inferred i).{x} *. inferred.beta.{i,x} /. inferred.z) + (* calculate the betas (except for the root, whose betas get prefilled in prepare) *) + if i <> T.root inferred.tree then ensure_beta inferred + Array.init k (fun x -> (alpha_get inferred i).{x} *. inferred.beta.{i,x} /. inferred.z) let add_branch_posteriors ?(weight=1.0) inferred branch ecounts = if inferred.workspace.generation <> inferred.my_generation then failwith "CamlPaml.PhyloLik.add_branch_posterior: invalidated workspace" diff --git a/src/test.ml b/src/test.ml index a5d511d..d5b744b 100644 --- a/src/test.ml +++ b/src/test.ml @@ -25,16 +25,18 @@ let run_PhyloCSF species params = | _ -> raise Exit let talAA () = - let ans = run_PhyloCSF "12flies" "../PhyloCSF_Examples/tal-AA.fa" + let ans = run_PhyloCSF "12flies" "../PhyloCSF_Examples/tal-AA.fa --ancComp" print_endline (String.join "\t" ans) List.nth ans 1 $hould # equal "score(decibans)" float_of_string (List.nth ans 2) $hould # be # within (297.62,297.63) + float_of_string (List.nth ans 3) $hould # be # within (48.25,48.26) let aldh2_ex5_out () = - let ans = run_PhyloCSF "29mammals" "../PhyloCSF_Examples/ALDH2.exon5.fa" + let ans = run_PhyloCSF "29mammals" "../PhyloCSF_Examples/ALDH2.exon5.fa --ancComp" print_endline (String.join "\t" ans) List.nth ans 1 $hould # equal "score(decibans)" float_of_string (List.nth ans 2) $hould # be # within (-178.93,-178.92) + float_of_string (List.nth ans 3) $hould # be # within (-38.29,-38.28) let aldh2_ex5_in () = let abbreviate = (try ignore (Sys.getenv "SKIP_SLOW"); true with Not_found -> false)