From cacb792b0e4d0cffa1ba0ce6aecd891b4c746c9e Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Tue, 3 Jan 2012 21:48:35 -0500 Subject: [PATCH 1/9] FitECM: initial skeleton import --- src/FitECM/ApproxQ.ml | 5 ++ src/FitECM/Makefile | 9 +++ src/FitECM/TestApproxQ.ml | 7 ++ src/FitECM/_tags | 3 + src/FitECM/all.itarget | 1 + src/FitECM/myocamlbuild.ml | 185 +++++++++++++++++++++++++++++++++++++++++++++ src/FitECM/test.itarget | 1 + 7 files changed, 211 insertions(+) create mode 100644 src/FitECM/ApproxQ.ml create mode 100644 src/FitECM/Makefile create mode 100644 src/FitECM/TestApproxQ.ml create mode 100644 src/FitECM/_tags create mode 100644 src/FitECM/all.itarget create mode 100644 src/FitECM/myocamlbuild.ml create mode 100644 src/FitECM/test.itarget diff --git a/src/FitECM/ApproxQ.ml b/src/FitECM/ApproxQ.ml new file mode 100644 index 0000000..562b321 --- /dev/null +++ b/src/FitECM/ApproxQ.ml @@ -0,0 +1,5 @@ +open CamlPaml +open Batteries_uni +open Printf + +printf "Hello, world!\n" diff --git a/src/FitECM/Makefile b/src/FitECM/Makefile new file mode 100644 index 0000000..dcab06d --- /dev/null +++ b/src/FitECM/Makefile @@ -0,0 +1,9 @@ +all: + ocamlbuild all.otarget + +test: + ocamlbuild test.otarget + +clean: + rm -f *~ + ocamlbuild -clean diff --git a/src/FitECM/TestApproxQ.ml b/src/FitECM/TestApproxQ.ml new file mode 100644 index 0000000..fc056fe --- /dev/null +++ b/src/FitECM/TestApproxQ.ml @@ -0,0 +1,7 @@ +open OUnit + +let test1 () = todo "test1 todo" + +let all_tests = "test1" >:: test1 + +run_test_tt_main all_tests diff --git a/src/FitECM/_tags b/src/FitECM/_tags new file mode 100644 index 0000000..7c14582 --- /dev/null +++ b/src/FitECM/_tags @@ -0,0 +1,3 @@ +true: debug, pkg_batteries, pkg_CamlPaml +<**/*.ml> or <**/*.mli>: pp(ocaml+twt) +: pkg_oUnit diff --git a/src/FitECM/all.itarget b/src/FitECM/all.itarget new file mode 100644 index 0000000..443edcc --- /dev/null +++ b/src/FitECM/all.itarget @@ -0,0 +1 @@ +ApproxQ.native diff --git a/src/FitECM/myocamlbuild.ml b/src/FitECM/myocamlbuild.ml new file mode 100644 index 0000000..5ffd91e --- /dev/null +++ b/src/FitECM/myocamlbuild.ml @@ -0,0 +1,185 @@ +open Ocamlbuild_plugin +open Command (* no longer needed for OCaml >= 3.10.2 *) + +(** + Overview of tags: + - [pkg_batteries] to use Batteries as a library, without syntax extensions + - [use_batteries] and [use_batteries_r] to use both Batteries and all the non-destructive syntax extensions + - [pkg_sexplib.syntax] with [syntax_camlp4o] or [syntax_camlp4r] for sexplib +*) + + +(** + {1 OCamlFind} +*) + +let run_and_read = Ocamlbuild_pack.My_unix.run_and_read + +let blank_sep_strings = Ocamlbuild_pack.Lexers.blank_sep_strings + +module OCamlFind = +struct + (* this lists all supported packages *) + let find_packages () = + blank_sep_strings & + Lexing.from_string & + run_and_read "ocamlfind list | cut -d' ' -f1" + + (* this is supposed to list available syntaxes, but I don't know how to do it. *) + let find_syntaxes () = ["camlp4o"; "camlp4r"] + + (* ocamlfind command *) + let ocamlfind x = S[A"ocamlfind"; x] + + let before_options () = + (* by using Before_options one let command line options have an higher priority *) + (* on the contrary using After_options will guarantee to have the higher priority *) + + (* override default commands by ocamlfind ones *) + Options.ocamlc := ocamlfind & A"ocamlc"; + Options.ocamlopt := ocamlfind & A"ocamlopt"; + Options.ocamldep := ocamlfind & A"ocamldep"; + Options.ocamldoc := ocamlfind & A"ocamldoc"; + Options.ocamlmktop := ocamlfind & A"ocamlmktop" + + let get_ocamldoc_directory () = + let ocamldoc_directory = run_and_read "ocamlfind ocamldoc -customdir" in + let length = String.length ocamldoc_directory in + assert (length != 0); + let char = ocamldoc_directory.[length - 1] in + if (char = '\n') || (char = '\r') then String.sub ocamldoc_directory 0 (length - 1) + else ocamldoc_directory + + let after_rules () = + (* When one link an OCaml library/binary/package, one should use -linkpkg *) + flag ["ocaml"; "byte"; "link"; "program"] & A"-linkpkg"; + flag ["ocaml"; "native"; "link"; "program"] & A"-linkpkg"; + + + (* For each ocamlfind package one inject the -package option when + * compiling, computing dependencies, generating documentation and + * linking. *) + List.iter begin fun pkg -> + flag ["ocaml"; "compile"; "pkg_"^pkg] & S[A"-package"; A pkg]; + flag ["ocaml"; "ocamldep"; "pkg_"^pkg] & S[A"-package"; A pkg]; + flag ["ocaml"; "doc"; "pkg_"^pkg] & S[A"-package"; A pkg]; + flag ["ocaml"; "link"; "pkg_"^pkg] & S[A"-package"; A pkg]; + end (find_packages ()); + + (* Like -package but for extensions syntax. Morover -syntax is useless + * when linking. *) + List.iter begin fun syntax -> + flag ["ocaml"; "compile"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; + flag ["ocaml"; "ocamldep"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; + flag ["ocaml"; "doc"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; + end (find_syntaxes ()); + + (* The default "thread" tag is not compatible with ocamlfind. + Indeed, the default rules add the "threads.cma" or "threads.cmxa" + options when using this tag. When using the "-linkpkg" option with + ocamlfind, this module will then be added twice on the command line. + + To solve this, one approach is to add the "-thread" option when using + the "threads" package using the previous plugin. + *) + flag ["ocaml"; "pkg_threads"; "compile"] (S[A "-thread"]); + flag ["ocaml"; "pkg_threads"; "link"] (S[A "-thread"]); +end + +(** + {1 OCaml Batteries Included} +*) + +module Batteries = +struct + let before_options () = () + + let after_rules () = + flag ["ocaml"; "link"; "byte"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"; A "odoc_info.cma"]); + flag ["ocaml"; "link"; "native"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"(*; A "odoc_info.cmxa"*)]); + flag ["ocaml"; "docfile"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"]); + flag ["ocaml"; "docdir"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"]); + flag ["ocaml"; "doc"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"]); + + (*The command-line for [use_batteries] and [use_batteries_r]*) + + let cl_use_boilerplate = [A"-package"; A "batteries.pa_type_conv.syntax,batteries,sexplib.syntax"] + and cl_use_batteries = [A"-package"; A "batteries.pa_openin.syntax,batteries.pa_where.syntax,batteries.pa_batteries.syntax"; A "-package"; A "batteries"] + and cl_use_batteries_o = [] + (*[cl_use_batteries_o]: extensions which only make sense in original syntax*) + and cl_camlp4o = [A"-syntax"; A "camlp4o"] + and cl_camlp4r = [A"-syntax"; A "camlp4r"] in + + let cl_boilerplate_original = cl_use_boilerplate @ cl_camlp4o + and cl_boilerplate_revised = cl_use_boilerplate @ cl_camlp4r + and cl_batteries_original = cl_use_batteries @ cl_use_batteries_o @ cl_camlp4o + and cl_batteries_revised = cl_use_batteries @ cl_camlp4r in + + (** Tag [use_boilerplate] provides boilerplate syntax extensions, + in original syntax*) + + flag ["ocaml"; "compile"; "use_boilerplate"] & S cl_boilerplate_original ; + flag ["ocaml"; "ocamldep"; "use_boilerplate"] & S cl_boilerplate_original ; + flag ["ocaml"; "doc"; "use_boilerplate"] & S cl_boilerplate_original ; + flag ["ocaml"; "link"; "use_boilerplate"] & S cl_boilerplate_original ; + + (** Tag [use_boilerplate_r] provides boilerplate syntax extensions, + in original syntax*) + + flag ["ocaml"; "compile"; "use_boilerplate_r"] & S cl_boilerplate_revised ; + flag ["ocaml"; "ocamldep"; "use_boilerplate_r"] & S cl_boilerplate_revised ; + flag ["ocaml"; "doc"; "use_boilerplate_r"] & S cl_boilerplate_revised ; + flag ["ocaml"; "link"; "use_boilerplate_r"] & S cl_boilerplate_revised ; + + (** Tag [use_batteries] provides both package [batteries] + and all syntax extensions, in original syntax. *) + + flag ["ocaml"; "compile"; "use_batteries"] & S cl_batteries_original ; + flag ["ocaml"; "ocamldep"; "use_batteries"] & S cl_batteries_original ; + flag ["ocaml"; "doc"; "use_batteries"] & S cl_batteries_original ; + flag ["ocaml"; "link"; "use_batteries"] & S cl_batteries_original ; + + (** Tag [use_batteries_r] provides both package [batteries] + and all syntax extensions, in revised syntax. *) + + flag ["ocaml"; "compile"; "use_batteries_r"] & S cl_batteries_revised; + flag ["ocaml"; "ocamldep"; "use_batteries_r"] & S cl_batteries_revised; + flag ["ocaml"; "doc"; "use_batteries_r"] & S cl_batteries_revised; + flag ["ocaml"; "link"; "use_batteries_r"] & S cl_batteries_revised + + +(* flag ["ocaml"; "compile"; "use_batteries"] & S[A "-verbose"; + A"-package"; A "batteries.syntax.full"; + A"-syntax"; A "batteries.syntax.full"]; + flag ["ocaml"; "ocamldep"; "use_batteries"] & S[A "-verbose"; + A"-package"; A "batteries.syntax.full"; + A"-syntax"; A "batteries.syntax.full"]; + flag ["ocaml"; "doc"; "use_batteries"] & S[A "-verbose"; + A"-package"; A "batteries.syntax.full"; + A"-syntax"; A "batteries.syntax.full"]; + flag ["ocaml"; "link"; "use_batteries"] & S[A "-verbose"; + A"-package"; A "batteries.syntax.full"; + A"-syntax"; A "batteries.syntax.full"];*) + + +end + +let _ = dispatch begin function + | Before_options -> + OCamlFind.before_options (); + Batteries.before_options () + | After_rules -> + OCamlFind.after_rules (); + Batteries.after_rules () + + + | _ -> () +end + + + +(** + which ocamlrun -> header + + print_backtrace -> ajouter "-b" après le header +**) diff --git a/src/FitECM/test.itarget b/src/FitECM/test.itarget new file mode 100644 index 0000000..cefb85f --- /dev/null +++ b/src/FitECM/test.itarget @@ -0,0 +1 @@ +TestApproxQ.native From 14af3facbcd7032c02123b3c98ae0589b9f6d658 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Tue, 3 Jan 2012 23:26:56 -0500 Subject: [PATCH 2/9] ArvestadBruno1997.symmetrizeP --- src/FitECM/ArvestadBruno1997.ml | 41 +++++++++++++++++++++++++++++++++ src/FitECM/TestApproxQ.ml | 51 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 src/FitECM/ArvestadBruno1997.ml diff --git a/src/FitECM/ArvestadBruno1997.ml b/src/FitECM/ArvestadBruno1997.ml new file mode 100644 index 0000000..5bc15da --- /dev/null +++ b/src/FitECM/ArvestadBruno1997.ml @@ -0,0 +1,41 @@ +(* + +Subroutines for the Q matrix estimation strategy described in: + +Arvestad L, Bruno WJ (1997). Estimation of Reversible Substitution Matrices from Multiple Pairs of +Sequences. J Mol Evol 45:696-703 + +*) + +open Batteries_uni + +let diag v = + let n = Gsl_vector.length v + let m = Gsl_matrix.create ~init:0. n n + for i = 0 to n-1 do m.{i,i} <- v.{i} + m + +(* symmetrize the P matrix according to eq. 12 *) +let symmetrizeP pi p = + let (m,n) = Gsl_matrix.dims p + assert (m=n) + assert (n=Gsl_vector.length pi) + + let pi_sqrt = Gsl_vector.copy pi + for i = 0 to n-1 do pi_sqrt.{i} <- sqrt pi.{i} + + let pi_invsqrt = Gsl_vector.copy pi_sqrt + for i = 0 to n-1 do pi_invsqrt.{i} <- 1.0 /. pi_sqrt.{i} + + let mm a b = + let c = Gsl_matrix.create ~init:0.0 n n + Gsl_blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) + c + let x = mm (diag pi_sqrt) (mm p (diag pi_invsqrt)) + + let xT = Gsl_matrix.create n n + Gsl_matrix.transpose xT x + + Gsl_matrix.add x xT + Gsl_matrix.scale x 0.5 + x diff --git a/src/FitECM/TestApproxQ.ml b/src/FitECM/TestApproxQ.ml index fc056fe..115618f 100644 --- a/src/FitECM/TestApproxQ.ml +++ b/src/FitECM/TestApproxQ.ml @@ -1,7 +1,54 @@ +open Batteries_uni open OUnit +open CamlPaml -let test1 () = todo "test1 todo" +let tol = 1e-6 -let all_tests = "test1" >:: test1 +exception False +let is_symmetric mat = + let open Gsl_matrix + let (m,n) = dims mat + if m <> n then false + else + try + for i = 0 to m-1 do + for j = i+1 to m-1 do + if abs_float (mat.{i,j} -. mat.{j,i}) > tol then raise False + true + with False -> false + +let test_symmetrizeP n () = + let p = Gsl_matrix.create n n + let rowsum i = + let tot = ref 0. + for j = 0 to n-1 do tot := !tot +. p.{i,j} + !tot + for i = 0 to n-1 do + for j = 0 to n-1 do p.{i,j} <- float (Random.int 10000 + if i=j then 10000 else 1) + let z = rowsum i + for j = 0 to n-1 do p.{i,j} <- p.{i,j} /. z + + assert_bool "synthesized P matrix is valid" (try P.validate ~tol p; true with Invalid_argument _ -> false) + assert_bool "synthesized P matrix is asymmetric" (not (is_symmetric p)) + + let pi = Gsl_vector.create n + for j = 0 to n-1 do pi.{j} <- float (1 + Random.int 10000) + let z = pi |> Gsl_vector.to_array |> Array.enum |> fold (+.) 0. + for j = 0 to n-1 do pi.{j} <- pi.{j} /. z + + let sp = ArvestadBruno1997.symmetrizeP pi p + assert_bool "symmetrizeP outputs symmetric matrix" (is_symmetric sp) + + for i = 0 to n-1 do + for j = 0 to n-1 do + assert_bool "symmetrizeP outputs positive matrix" (sp.{i,j} > tol) + +let symmetrizeP_tests = [ + "symmetrizeP 4x4" >:: (fun () -> Random.init 0; test_symmetrizeP 4 ()); + "symmetrizeP 4x4 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_symmetrizeP 4 ())); + "symmetrizeP 64x64 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_symmetrizeP 64 ())) +] + +let all_tests = "symmetrizeP" >::: symmetrizeP_tests run_test_tt_main all_tests From 03a6940424b9f867289aa36f9c319ed43684f107 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sat, 5 May 2012 23:08:27 -0700 Subject: [PATCH 3/9] ArvestadBruno1997.est_eigenvectors --- src/FitECM/ArvestadBruno1997.ml | 40 +++++++++++++++++-- src/FitECM/TestApproxQ.ml | 85 +++++++++++++++++++++++++++++++++++++++-- src/FitECM/_tags | 4 +- 3 files changed, 119 insertions(+), 10 deletions(-) diff --git a/src/FitECM/ArvestadBruno1997.ml b/src/FitECM/ArvestadBruno1997.ml index 5bc15da..f49346d 100644 --- a/src/FitECM/ArvestadBruno1997.ml +++ b/src/FitECM/ArvestadBruno1997.ml @@ -15,6 +15,14 @@ let diag v = for i = 0 to n-1 do m.{i,i} <- v.{i} m +let mm a b = + let (x,y) = Gsl_matrix.dims a + let (y',z) = Gsl_matrix.dims b + if (y <> y') then invalid_arg "mm: matrix dimensions mismatched" + let c = Gsl_matrix.create ~init:0.0 x z + Gsl_blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) + c + (* symmetrize the P matrix according to eq. 12 *) let symmetrizeP pi p = let (m,n) = Gsl_matrix.dims p @@ -23,14 +31,13 @@ let symmetrizeP pi p = let pi_sqrt = Gsl_vector.copy pi for i = 0 to n-1 do pi_sqrt.{i} <- sqrt pi.{i} + (* pi_sqrt is pi^(1/2) as in eq. 10 *) let pi_invsqrt = Gsl_vector.copy pi_sqrt for i = 0 to n-1 do pi_invsqrt.{i} <- 1.0 /. pi_sqrt.{i} + (* pi_invsqrt is pi^(-1/2) as in eq. 10 *) - let mm a b = - let c = Gsl_matrix.create ~init:0.0 n n - Gsl_blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) - c + (* now compute eq. 12 *) let x = mm (diag pi_sqrt) (mm p (diag pi_invsqrt)) let xT = Gsl_matrix.create n n @@ -39,3 +46,28 @@ let symmetrizeP pi p = Gsl_matrix.add x xT Gsl_matrix.scale x 0.5 x + +(* compute the eigenvectors of the symmetrized P matrix, + and use them to estimate the eigenvectors of P and Q *) +let est_eigenvectors pi symP = + let (m,n) = Gsl_matrix.dims symP + assert (m=n) + assert (n=Gsl_vector.length pi) + + let (_,u) = Gsl_eigen.symmv ~protect:true (`M symP) + (* columns of u are the eigenvectors of symP *) + + let pi_sqrt = Gsl_vector.copy pi + for i = 0 to n-1 do pi_sqrt.{i} <- sqrt pi.{i} + + let pi_invsqrt = Gsl_vector.copy pi_sqrt + for i = 0 to n-1 do pi_invsqrt.{i} <- 1.0 /. pi_sqrt.{i} + + let rv = mm (diag pi_invsqrt) u + (* columns of rv are the estimated right eigenvectors of P and Q (eq. 14) *) + + Gsl_matrix.transpose_in_place u + let lv = mm u (diag pi_sqrt) + (* rows of lv are estimated left eigenvectors of P and Q (eq. 13) *) + + lv, rv \ No newline at end of file diff --git a/src/FitECM/TestApproxQ.ml b/src/FitECM/TestApproxQ.ml index 115618f..2b1b8b1 100644 --- a/src/FitECM/TestApproxQ.ml +++ b/src/FitECM/TestApproxQ.ml @@ -1,9 +1,13 @@ open Batteries_uni open OUnit open CamlPaml +open Printf +Random.init 12345 let tol = 1e-6 + + exception False let is_symmetric mat = let open Gsl_matrix @@ -17,7 +21,7 @@ let is_symmetric mat = true with False -> false -let test_symmetrizeP n () = +let random_P n () = let p = Gsl_matrix.create n n let rowsum i = let tot = ref 0. @@ -30,12 +34,16 @@ let test_symmetrizeP n () = assert_bool "synthesized P matrix is valid" (try P.validate ~tol p; true with Invalid_argument _ -> false) assert_bool "synthesized P matrix is asymmetric" (not (is_symmetric p)) - + let pi = Gsl_vector.create n for j = 0 to n-1 do pi.{j} <- float (1 + Random.int 10000) let z = pi |> Gsl_vector.to_array |> Array.enum |> fold (+.) 0. for j = 0 to n-1 do pi.{j} <- pi.{j} /. z - + + p, pi + +let test_symmetrizeP n () = + let p, pi = random_P n () let sp = ArvestadBruno1997.symmetrizeP pi p assert_bool "symmetrizeP outputs symmetric matrix" (is_symmetric sp) @@ -49,6 +57,75 @@ let symmetrizeP_tests = [ "symmetrizeP 64x64 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_symmetrizeP 64 ())) ] -let all_tests = "symmetrizeP" >::: symmetrizeP_tests + + +let mm a b = + let (x,y) = Gsl_matrix.dims a + let (y',z) = Gsl_matrix.dims b + if (y <> y') then invalid_arg "mm: matrix dimensions mismatched" + let c = Gsl_matrix.create ~init:0.0 x z + Gsl_blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) + c + +let d v1 v2 = + let maxd = ref 0. + for i = 0 to (Array.length v1) - 1 do + maxd := max !maxd (abs_float (log (abs_float v1.(i)) -. log (abs_float v2.(i)))) + !maxd + +let ppm m = + let (r,c) = Gsl_matrix.dims m + for i = 0 to r-1 do + for j = 0 to c-1 do + printf "\t%.4f" m.{i,j} + printf "\n" + +let test_est_eigenvectors () = + (* based on example worked out in Mathematica *) + let pi = Gsl_vector.of_array [| 0.21; 0.31; 0.29; 0.19 |] + let p = + Gsl_matrix.of_arrays + [| + [| 2.2248 ; 0.422137; 0.212191; 0.140869 |]; + [| 0.285964; 2.37529 ; 0.20583 ; 0.132912 |]; + [| 0.153656; 0.220025; 2.40466 ; 0.221659 |]; + [| 0.155697; 0.216856; 0.338322; 2.28912 |] + |] + + let lv = + [| + [| -0.21 ; -0.31 ; -0.29 ; -0.19 |]; + [| -0.191046 ; -0.308325 ; 0.300389 ; 0.198981 |]; + [| 0.00767626; -0.00545097; -0.340088 ; 0.337863 |]; + [| 0.359642 ; -0.344683 ; -0.00250571; -0.0124537 |] + |] + + let rv = + [| + [| -1. ; -1. ; -1. ; -1. |]; + [| -0.909744 ; -0.994596 ; 1.03583 ; 1.04727 |]; + [| 0.0365536; -0.0175838; -1.17272 ; 1.77823 |]; + [| 1.71258 ; -1.11188 ; -0.00864038; -0.0655459 |] + |] + + let symP = ArvestadBruno1997.symmetrizeP pi p + let lv', rv' = ArvestadBruno1997.est_eigenvectors pi symP + + let lv' = Gsl_matrix.to_arrays lv' + for i = 0 to 3 do + let mind = ref infinity + for j = 0 to 3 do + mind := min !mind (d lv.(i) lv'.(j)) + assert (!mind < 0.001) + + Gsl_matrix.transpose_in_place rv' + let rv' = Gsl_matrix.to_arrays rv' + for i = 0 to 3 do + let mind = ref infinity + for j = 0 to 3 do + mind := min !mind (d rv.(i) rv'.(j)) + assert (!mind < 0.001) + +let all_tests = "FitECM tests" >::: ["symmetrizeP" >::: symmetrizeP_tests; "est_eigenvectors" >:: test_est_eigenvectors] run_test_tt_main all_tests diff --git a/src/FitECM/_tags b/src/FitECM/_tags index 7c14582..5f5e7fe 100644 --- a/src/FitECM/_tags +++ b/src/FitECM/_tags @@ -1,3 +1,3 @@ -true: debug, pkg_batteries, pkg_CamlPaml -<**/*.ml> or <**/*.mli>: pp(ocaml+twt) +true: debug, pkg_batteries, pkg_CamlPaml, pkg_gsl : pkg_oUnit +<**/*.ml> or <**/*.mli>: pp(ocaml+twt) From 42ad62794a4efecffd5e68d89b03aeeca9d9fd21 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sat, 11 Aug 2012 22:40:55 -0700 Subject: [PATCH 4/9] ArvestadBruno1997.est_eigenvectors --- src/FitECM/ArvestadBruno1997.ml | 40 +++++++++- src/FitECM/Makefile | 1 + src/FitECM/TestApproxQ.ml | 170 ++++++++++++++++++++++++++++++++-------- 3 files changed, 174 insertions(+), 37 deletions(-) diff --git a/src/FitECM/ArvestadBruno1997.ml b/src/FitECM/ArvestadBruno1997.ml index f49346d..743e1f1 100644 --- a/src/FitECM/ArvestadBruno1997.ml +++ b/src/FitECM/ArvestadBruno1997.ml @@ -23,7 +23,7 @@ let mm a b = Gsl_blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) c -(* symmetrize the P matrix according to eq. 12 *) +(* symmetrize the ~P matrix according to eq. 12 *) let symmetrizeP pi p = let (m,n) = Gsl_matrix.dims p assert (m=n) @@ -37,7 +37,7 @@ let symmetrizeP pi p = for i = 0 to n-1 do pi_invsqrt.{i} <- 1.0 /. pi_sqrt.{i} (* pi_invsqrt is pi^(-1/2) as in eq. 10 *) - (* now compute eq. 12 *) + (* now compute eq. 12. TODO this more efficiently than actually constructing & multiplying the diagonal matrices *) let x = mm (diag pi_sqrt) (mm p (diag pi_invsqrt)) let xT = Gsl_matrix.create n n @@ -64,10 +64,42 @@ let est_eigenvectors pi symP = for i = 0 to n-1 do pi_invsqrt.{i} <- 1.0 /. pi_sqrt.{i} let rv = mm (diag pi_invsqrt) u - (* columns of rv are the estimated right eigenvectors of P and Q (eq. 14) *) + Gsl_matrix.transpose_in_place rv + (* rows of rv are the estimated right eigenvectors of P and Q (eq. 14) *) Gsl_matrix.transpose_in_place u let lv = mm u (diag pi_sqrt) (* rows of lv are estimated left eigenvectors of P and Q (eq. 13) *) - lv, rv \ No newline at end of file + lv, rv + +(* estimate the eigenvalues of Q based on the observations and estimated eigenvectors *) +let est_eigenvalues sms lv rv = + let (m,n) = Gsl_matrix.dims lv + assert (m=n) + assert ((m,n) = Gsl_matrix.dims rv) + let k = Array.length sms + assert (k>0) + sms |> Array.iter (fun sm -> assert ((m,n) = Gsl_matrix.dims sm)) + + (* compute distance estimate for each pair of sequences (sm) and nucleotide (eq. 17) *) + let distances = + sms |> Array.map + fun sm -> + let y = Gsl_vector.create ~init:0. m + Array.init m + fun i -> + Gsl_blas.(gemv NoTrans ~alpha:1.0 ~a:sm ~x:(Gsl_matrix.row rv i) ~beta:0.0 ~y) + log (Gsl_blas.(dot (Gsl_matrix.row lv i) y)) + + (* least-squares estimates of the eigenvalues (eq. 18), arbitrarily assigning the last eigenvalue to 1. + TODO make sure that cannot be the zero eigenvalue. *) + let denom = distances |> Array.map (fun d -> d.(m-1) *. d.(m-1)) |> Array.fold_left (+.) 0.0 + assert (denom > 10. *. epsilon_float) + Gsl_vector.of_array + Array.init m + fun i -> + if i < m-1 then + distances |> Array.map (fun d -> d.(i) *. d.(m-1)) |> Array.fold_left (+.) 0.0 |> ( *. ) (1.0 /. denom) + else 1.0 + diff --git a/src/FitECM/Makefile b/src/FitECM/Makefile index dcab06d..12deecd 100644 --- a/src/FitECM/Makefile +++ b/src/FitECM/Makefile @@ -3,6 +3,7 @@ all: test: ocamlbuild test.otarget + _build/TestApproxQ.native -verbose clean: rm -f *~ diff --git a/src/FitECM/TestApproxQ.ml b/src/FitECM/TestApproxQ.ml index 2b1b8b1..9edffa4 100644 --- a/src/FitECM/TestApproxQ.ml +++ b/src/FitECM/TestApproxQ.ml @@ -67,11 +67,28 @@ let mm a b = Gsl_blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) c -let d v1 v2 = - let maxd = ref 0. - for i = 0 to (Array.length v1) - 1 do - maxd := max !maxd (abs_float (log (abs_float v1.(i)) -. log (abs_float v2.(i)))) - !maxd +let veq ?(tol=0.001) v1 v2 = + let n = Gsl_vector.length v1 + if Gsl_vector.length v2 <> n then false + else + let ans = ref true + for i = 0 to n-1 do + let d = abs_float (v1.{i} -. v2.{i}) + let mag = min (abs_float v1.{i}) (abs_float v2.{i}) + assert (match classify_float d with FP_infinite | FP_nan -> false | _ -> true) + assert (match classify_float mag with FP_infinite | FP_nan -> false | _ -> true) + if (mag > 0. && d /. mag > tol) || (mag = 0. && d > tol) then ans := false + !ans + +let meq ?tol m1 m2 = + let (r,c) = Gsl_matrix.dims m1 + let (r',c') = Gsl_matrix.dims m2 + if (r <> r' || c <> c') then false + else + let ans = ref true + for i = 0 to r-1 do + if not (veq ?tol (Gsl_matrix.row m1 i) (Gsl_matrix.row m2 i)) then ans := false + !ans let ppm m = let (r,c) = Gsl_matrix.dims m @@ -80,18 +97,75 @@ let ppm m = printf "\t%.4f" m.{i,j} printf "\n" -let test_est_eigenvectors () = - (* based on example worked out in Mathematica *) - let pi = Gsl_vector.of_array [| 0.21; 0.31; 0.29; 0.19 |] - let p = - Gsl_matrix.of_arrays +module Example = struct + (* Test that the code can step-by-step reproduce a 4x4 example I previously worked out in Mathematica *) + + (* symmetric exchangeabilities *) + let s = + [| + [| 0.0 ; 1.1 ; 0.51; 0.52 |]; + [| 1.1 ; 0.0 ; 0.49; 0.48 |]; + [| 0.51; 0.49; 0.0 ; 0.90 |]; + [| 0.52; 0.48; 0.90; 0.0 |] + |] + + (* "nucleotide" frequencies *) + let pi = [| 0.21; 0.31; 0.29; 0.19 |] + + (* reversible rate matrix q_ij = pi_j s_ij *) + let q = + [| + [| -1.19777 ; 0.694982; 0.301431; 0.201361 |]; + [| 0.470794; -0.946276; 0.28961 ; 0.185872 |]; + [| 0.218277; 0.309583; -0.876371; 0.34851 |]; + [| 0.222557; 0.303265; 0.531937; -1.05776 |] + |] + + (* pairwise substitution matrices we supposedly observe (generated as MatrixExp[0.1*q], MatrixExp[0.25*q], + and MatrixExp[0.5*q]). Our code should recover q from these observations (+pi). *) + let sms = + [| [| - [| 2.2248 ; 0.422137; 0.212191; 0.140869 |]; - [| 0.285964; 2.37529 ; 0.20583 ; 0.132912 |]; - [| 0.153656; 0.220025; 2.40466 ; 0.221659 |]; - [| 0.155697; 0.216856; 0.338322; 2.28912 |] + [| 0.889106 ; 0.063203 ; 0.0286138; 0.019077 |]; + [| 0.042815 ; 0.911884 ; 0.0274714; 0.0177294 |]; + [| 0.0207203; 0.0294729; 0.917676 ; 0.0321309 |]; + [| 0.0210851; 0.0289269; 0.0490419; 0.900946 |] + |]; + + [| + [| 0.752013 ; 0.137632 ; 0.0662809; 0.0440743 |]; + [| 0.0932345; 0.801305 ; 0.0641217; 0.0413385 |]; + [| 0.0479964; 0.0685439; 0.812112 ; 0.0713473 |]; + [| 0.0487137; 0.067447 ; 0.108899 ; 0.774941 |] + |]; + + [| + [| 0.583684 ; 0.221302; 0.117296; 0.0777178 |]; + [| 0.149914 ; 0.662105; 0.114137; 0.0738438 |]; + [| 0.0849388; 0.122008; 0.674872; 0.118181 |]; + [| 0.0858987; 0.120482; 0.180381; 0.613238 |] |] + |] + + (* intermediate result: sum of substitution matrices (~P) *) + let p = + [| + [| 2.2248 ; 0.422137; 0.212191; 0.140869 |]; + [| 0.285964; 2.37529 ; 0.20583 ; 0.132912 |]; + [| 0.153656; 0.220025; 2.40466 ; 0.221659 |]; + [| 0.155697; 0.216856; 0.338322; 2.28912 |] + |] + + (* intermediate result: symmetrized sum of substitution matrices (eq. 12) *) + let symP = + [| + [| 2.2248 ; 0.347442; 0.180567; 0.148098 |]; + [| 0.347442; 2.37529 ; 0.212809; 0.169772 |]; + [| 0.180567; 0.212809; 2.40466 ; 0.273847 |]; + [| 0.148098; 0.169772; 0.273847; 2.28912 |] + |] + (* intermediate result: left and right eigenvectors of q (both row-oriented) *) let lv = [| [| -0.21 ; -0.31 ; -0.29 ; -0.19 |]; @@ -108,24 +182,54 @@ let test_est_eigenvectors () = [| 1.71258 ; -1.11188 ; -0.00864038; -0.0655459 |] |] - let symP = ArvestadBruno1997.symmetrizeP pi p - let lv', rv' = ArvestadBruno1997.est_eigenvectors pi symP - - let lv' = Gsl_matrix.to_arrays lv' - for i = 0 to 3 do - let mind = ref infinity - for j = 0 to 3 do - mind := min !mind (d lv.(i) lv'.(j)) - assert (!mind < 0.001) - - Gsl_matrix.transpose_in_place rv' - let rv' = Gsl_matrix.to_arrays rv' - for i = 0 to 3 do - let mind = ref infinity - for j = 0 to 3 do - mind := min !mind (d rv.(i) rv'.(j)) - assert (!mind < 0.001) - -let all_tests = "FitECM tests" >::: ["symmetrizeP" >::: symmetrizeP_tests; "est_eigenvectors" >:: test_est_eigenvectors] + (* intermediate result: eigenvalues of q *) + let ev = [| 0.; 0.610885; 0.848496; 1.0 |] + + let test_symP () = + let sms' = Array.map Gsl_matrix.of_arrays sms + Gsl_matrix.add sms'.(0) sms'.(1) + Gsl_matrix.add sms'.(0) sms'.(2) + assert (meq (Gsl_matrix.of_arrays symP) (ArvestadBruno1997.symmetrizeP (Gsl_vector.of_array pi) sms'.(0))) + + let test_est_eigenvectors () = + let lv', rv' = ArvestadBruno1997.est_eigenvectors (Gsl_vector.of_array pi) (Gsl_matrix.of_arrays symP) + let lv'' = Gsl_matrix.to_arrays lv' + + let d v1 v2 = + let maxd = ref 0. + for i = 0 to (Array.length v1) - 1 do + maxd := max !maxd (abs_float (log (abs_float v1.(i)) -. log (abs_float v2.(i)))) + !maxd + + for i = 0 to 3 do + let mind = ref infinity + for j = 0 to 3 do + (* since the eigenvectors may be returned in arbitrary order, + find the smallest distance between lv.(i) and any row of lv'' *) + mind := min !mind (d lv.(i) lv''.(j)) + assert (!mind < 0.001) + + let rv'' = Gsl_matrix.to_arrays rv' + for i = 0 to 3 do + let mind = ref infinity + for j = 0 to 3 do + mind := min !mind (d rv.(i) rv''.(j)) + assert (!mind < 0.001) + + let test_est_eigenvalues () = + let lv' = Gsl_matrix.of_arrays lv + let rv' = Gsl_matrix.of_arrays rv + let ev' = Gsl_vector.to_array (ArvestadBruno1997.est_eigenvalues (Array.map Gsl_matrix.of_arrays sms) lv' rv') + Array.sort compare ev' + + assert (veq (Gsl_vector.of_array ev) (Gsl_vector.of_array ev')) + + let tests = ["symP" >:: test_symP; "est_eigenvectors" >:: test_est_eigenvectors; "est_eigenvalues" >:: test_est_eigenvalues] + +let all_tests = ("FitECM tests" >::: + [ + "symmetrizeP" >::: symmetrizeP_tests; + "previously worked-out 4x4 example" >::: Example.tests + ]) run_test_tt_main all_tests From ede9a8181acc843b5db346e2c6383337528db691 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sun, 12 Aug 2012 16:11:28 -0700 Subject: [PATCH 5/9] ArvestadBruno1997.est_Q --- src/FitECM/ArvestadBruno1997.ml | 29 +++++++++++++++++++++++++++++ src/FitECM/TestApproxQ.ml | 6 +++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/FitECM/ArvestadBruno1997.ml b/src/FitECM/ArvestadBruno1997.ml index 743e1f1..2490915 100644 --- a/src/FitECM/ArvestadBruno1997.ml +++ b/src/FitECM/ArvestadBruno1997.ml @@ -103,3 +103,32 @@ let est_eigenvalues sms lv rv = distances |> Array.map (fun d -> d.(i) *. d.(m-1)) |> Array.fold_left (+.) 0.0 |> ( *. ) (1.0 /. denom) else 1.0 +(* Normalize rate matrix to unity mean rate of replacement at equilibrium *) +let normalize_Q pi q = + let (m,n) = Gsl_matrix.dims q + assert (m=n) + let qdiag = Gsl_vector.of_array (Array.init m (fun i -> q.{i,i})) + + let z = 1.0 /. (Gsl_blas.dot pi qdiag) + (* 'sign' of eigenvectors is arbitrary *) + Gsl_matrix.scale q (if z>0. then 0. -. z else z) + +(* putting it all together *) +let est_Q pi sms = + let n = Gsl_vector.length pi + sms |> Array.iter (fun sm -> assert (Gsl_matrix.dims sm = (n,n))) + + let p = Gsl_matrix.create ~init:0. n n + sms |> Array.iter (fun sm -> Gsl_matrix.add p sm) + + let symP = symmetrizeP pi p + + let lv, rv = est_eigenvectors pi symP + let ev = est_eigenvalues sms lv rv + + (* reconstruct Q according to the spectral thm Q = rv'*diag(ev)*lv + http://www.maths.lancs.ac.uk/~gilbert/m306c/node4.html *) + Gsl_matrix.transpose_in_place rv + let q = mm rv (mm (diag ev) lv) + normalize_Q pi q + q diff --git a/src/FitECM/TestApproxQ.ml b/src/FitECM/TestApproxQ.ml index 9edffa4..7aefadc 100644 --- a/src/FitECM/TestApproxQ.ml +++ b/src/FitECM/TestApproxQ.ml @@ -224,7 +224,11 @@ module Example = struct assert (veq (Gsl_vector.of_array ev) (Gsl_vector.of_array ev')) - let tests = ["symP" >:: test_symP; "est_eigenvectors" >:: test_est_eigenvectors; "est_eigenvalues" >:: test_est_eigenvalues] + let test_est_Q () = + let q' = ArvestadBruno1997.est_Q (Gsl_vector.of_array pi) (Array.map Gsl_matrix.of_arrays sms) + assert (meq (Gsl_matrix.of_arrays q) q') + + let tests = ["symP" >:: test_symP; "est_eigenvectors" >:: test_est_eigenvectors; "est_eigenvalues" >:: test_est_eigenvalues; "est_Q" >:: test_est_Q] let all_tests = ("FitECM tests" >::: [ From cb9181c39122c34274fe4aacac5116c5264c9f48 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Sat, 25 Aug 2012 01:01:19 -0700 Subject: [PATCH 6/9] ArvestadBruno1997 tests on random data --- src/FitECM/TestApproxQ.ml | 58 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/src/FitECM/TestApproxQ.ml b/src/FitECM/TestApproxQ.ml index 7aefadc..5b646c7 100644 --- a/src/FitECM/TestApproxQ.ml +++ b/src/FitECM/TestApproxQ.ml @@ -230,10 +230,66 @@ module Example = struct let tests = ["symP" >:: test_symP; "est_eigenvectors" >:: test_est_eigenvectors; "est_eigenvalues" >:: test_est_eigenvalues; "est_Q" >:: test_est_Q] +module RandomData = struct + (* Test that the code accurately recovers random rate matrices *) + + open CamlPaml + + (* generate random exchangeabilities *) + let random_s n = + let s = Array.make_matrix n n 0. + for i = 0 to n-1 do + for j = i+1 to n-1 do + s.(i).(j) <- Random.float 1.0 + s.(j).(i) <- s.(i).(j) + s + + (* generate random frequencies *) + let random_pi n = + let wts = Array.init n (fun _ -> Random.float 1.0) + let z = Array.fold_left (+.) 0. wts + Array.map (( *. ) (1.0 /. z)) wts + + (* generate a random n-by-n rate matrix *) + let random_Q n = + let m = random_s n + let pi = random_pi n + for i = 0 to n-1 do + for j = 0 to n-1 do + m.(i).(j) <- pi.(j) *. m.(i).(j) + m.(i).(i) <- 0. -. (Array.fold_left (+.) 0. m.(i)) + Q.validate m + Q.Diag.of_Q m + + (* generate k substitution matrices from q, at random distances between 0.2 and 2.0 *) + let random_sms k q = Array.init k (fun _ -> Q.Diag.to_Pt q (0.2 +. Random.float 1.8)) + + let test_est_Q n k = + let q = random_Q n + let qm = Q.Diag.to_Q q + let pi = Q.Diag.equilibrium q + let z = Array.fold_left (+.) 0. (Array.init n (fun i -> 0. -. pi.(i) *. qm.(i).(i))) + let q = Q.Diag.scale q (1.0 /. z) + let qm = Q.Diag.to_Q q + + let sms = random_sms k q + let q' = ArvestadBruno1997.est_Q (Gsl_vector.of_array pi) sms + + assert (meq (Gsl_matrix.of_arrays qm) q') + + let tests = [ + "4x4/1 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_est_Q 4 1)); + "4x4/8 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_est_Q 4 8)); + "61x61/1 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_est_Q 61 1)); + "61x61/8 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_est_Q 61 8)); + "64x64/32 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_est_Q 64 32)) + ] + let all_tests = ("FitECM tests" >::: [ "symmetrizeP" >::: symmetrizeP_tests; - "previously worked-out 4x4 example" >::: Example.tests + "previously worked-out 4x4 example" >::: Example.tests; + "random data" >::: RandomData.tests ]) run_test_tt_main all_tests From 1b0b61585b94ba2ec2b56556ee22c3584ece3cdb Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Fri, 14 Sep 2012 23:05:27 -0700 Subject: [PATCH 7/9] Test degenerate inputs to ApproxQ --- src/FitECM/ArvestadBruno1997.ml | 63 +++++++++++++++++++++++++------ src/FitECM/TestApproxQ.ml | 83 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 131 insertions(+), 15 deletions(-) diff --git a/src/FitECM/ArvestadBruno1997.ml b/src/FitECM/ArvestadBruno1997.ml index 2490915..c2bee16 100644 --- a/src/FitECM/ArvestadBruno1997.ml +++ b/src/FitECM/ArvestadBruno1997.ml @@ -8,6 +8,9 @@ Sequences. J Mol Evol 45:696-703 *) open Batteries_uni +open Printf + +exception Numerical_error of string let diag v = let n = Gsl_vector.length v @@ -84,25 +87,49 @@ let est_eigenvalues sms lv rv = (* compute distance estimate for each pair of sequences (sm) and nucleotide (eq. 17) *) let distances = - sms |> Array.map - fun sm -> - let y = Gsl_vector.create ~init:0. m - Array.init m + sms |> Array.mapi + fun which_pair sm -> + let y = Gsl_vector.create ~init:0. n + Array.init n fun i -> Gsl_blas.(gemv NoTrans ~alpha:1.0 ~a:sm ~x:(Gsl_matrix.row rv i) ~beta:0.0 ~y) - log (Gsl_blas.(dot (Gsl_matrix.row lv i) y)) + let exp_d_i = (Gsl_blas.(dot (Gsl_matrix.row lv i) y)) + if (exp_d_i <= 0.) then raise (Numerical_error (sprintf "Bad distance estimate for species pair %d, residue %d: log(%e)" which_pair i exp_d_i)) + (* TODO: test cases to provoke negative exp_d_i + http://www2.gsu.edu/~mkteer/npdmatri.html + *) + log exp_d_i (* least-squares estimates of the eigenvalues (eq. 18), arbitrarily assigning the last eigenvalue to 1. TODO make sure that cannot be the zero eigenvalue. *) - let denom = distances |> Array.map (fun d -> d.(m-1) *. d.(m-1)) |> Array.fold_left (+.) 0.0 - assert (denom > 10. *. epsilon_float) - Gsl_vector.of_array - Array.init m + let denom = distances |> Array.map (fun d -> d.(n-1) *. d.(n-1)) |> Array.fold_left (+.) 0.0 + let denomfpc = classify_float denom + if (denomfpc = FP_nan || denomfpc = FP_infinite || denom < 1e-6) then + raise (Numerical_error (sprintf "Bad denominator for eigenvalue estimates: %e" denom)) + let ans = + Array.init n fun i -> - if i < m-1 then - distances |> Array.map (fun d -> d.(i) *. d.(m-1)) |> Array.fold_left (+.) 0.0 |> ( *. ) (1.0 /. denom) + if i < n-1 then + distances |> Array.map (fun d -> d.(i) *. d.(n-1)) |> Array.fold_left (+.) 0.0 |> ( *. ) (1.0 /. denom) else 1.0 + (* check all eigenvalues are nonnegative reals *) + let ans_tot = Array.fold_left (+.) 0. ans + + match classify_float ans_tot with + | FP_infinite | FP_nan -> raise (Numerical_error "Infinite/undefined eigenvalue estimate") + | _ -> () + + (* round slightly negative eigenvalue estimates up to zero *) + ans |> Array.iteri + fun i ans_i -> + if ans_i < 0. then + if (abs_float ans_i >= 1e-3 *. (float n) *. ans_tot) then + raise (Numerical_error (sprintf "Eigenvalue estimate %d is too negative: %e" i ans_i)) + ans.(i) <- 0. + + Gsl_vector.of_array ans + (* Normalize rate matrix to unity mean rate of replacement at equilibrium *) let normalize_Q pi q = let (m,n) = Gsl_matrix.dims q @@ -113,10 +140,22 @@ let normalize_Q pi q = (* 'sign' of eigenvectors is arbitrary *) Gsl_matrix.scale q (if z>0. then 0. -. z else z) + (* round slightly negative off-diagonal entries to zero *) + for i = 0 to n-1 do + let row_tot = ref 0. + for j = 0 to n-1 do if i <> j then row_tot := !row_tot +. q.{i,j} + + for j = 0 to n-1 do + if i <> j && q.{i,j} < 0. then + if abs_float q.{i,j} >= 1e-3 *. (float n) *. !row_tot then + raise (Numerical_error (sprintf "Rate estimate Q[%d,%d] is too negative: %e" i j q.{i,j})) + q.{i,j} <- 0. + (* putting it all together *) let est_Q pi sms = let n = Gsl_vector.length pi - sms |> Array.iter (fun sm -> assert (Gsl_matrix.dims sm = (n,n))) + if (n < 2 || Array.length sms = 0) then invalid_arg "ArvestadBruno1997.est_Q" + sms |> Array.iter (fun sm -> if (Gsl_matrix.dims sm <> (n,n)) then invalid_arg "ArvestadBruno1997.est_Q") let p = Gsl_matrix.create ~init:0. n n sms |> Array.iter (fun sm -> Gsl_matrix.add p sm) diff --git a/src/FitECM/TestApproxQ.ml b/src/FitECM/TestApproxQ.ml index 5b646c7..b6d90e1 100644 --- a/src/FitECM/TestApproxQ.ml +++ b/src/FitECM/TestApproxQ.ml @@ -267,6 +267,7 @@ module RandomData = struct let test_est_Q n k = let q = random_Q n let qm = Q.Diag.to_Q q + assert (Q.Diag.reversible q) let pi = Q.Diag.equilibrium q let z = Array.fold_left (+.) 0. (Array.init n (fun i -> 0. -. pi.(i) *. qm.(i).(i))) let q = Q.Diag.scale q (1.0 /. z) @@ -285,11 +286,87 @@ module RandomData = struct "64x64/32 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_est_Q 64 32)) ] +module Degenerate = struct + (* Test that the code has acceptable behavior given degenerate inputs *) + + (* identity substitution matrix *) + let ident n = + let pi = Gsl_vector.of_array (Array.make n (1.0 /. float n)) + let sms = [| Gsl_matrix.create ~init:0. n n |] + Gsl_matrix.add_diagonal sms.(0) 1.0 + (pi,sms) + + (* near-identity substitution matrix (one slightly nonzero off-diagonal entry in each row) *) + let sparse ?(delta=1e-6) n = + let pi = Gsl_vector.of_array (Array.make n (1.0 /. float n)) + let sms = [| Gsl_matrix.create ~init:0. n n |] + Gsl_matrix.add_diagonal sms.(0) (1. -. delta) + for i = 0 to n-1 do + sms.(0).{i,(if i < n/2 then i+1 else i-1)} <- delta + (pi,sms) + + (* random near-identity substitution matrix (all offdiagonal entries are very small random values) *) + let random_sparse ?(delta=1e-6) n = + Random.init 0 + let pi = Gsl_vector.of_array (Array.make n (1.0 /. float n)) + let sms = [| Gsl_matrix.create ~init:0. n n |] + for i = 0 to n-1 do + let noise = Array.init n (fun j -> if (i <> j) then Random.float delta else 0.) + let z = Array.fold_left (+.) 0. noise + for j = 0 to n-1 do + if i <> j then + sms.(0).{i,j} <- noise.(j) + else + sms.(0).{i,j} <- 1.0 -. z + pi,sms + + (* half of off-diagonal entries are zero *) + let checkerboard ?(delta=1e-6) n = + Random.init 0 + let pi = Gsl_vector.of_array (Array.make n (1.0 /. float n)) + let sms = [| Gsl_matrix.create ~init:0. n n |] + for i = 0 to n-1 do + for j = 0 to n-1 do + if i <> j then + sms.(0).{i,j} <- if (j mod 2 = i mod 2) then delta else 0. + else + sms.(0).{i,j} <- 1.0 -. (delta *. float (n/2 - 1)) + pi,sms + + (* assert we either raise Numerical_error or return a valid rate matrix *) + let assert_acceptable (pi,sms) () = + try + let q = ArvestadBruno1997.est_Q pi sms + try + CamlPaml.Q.validate (Gsl_matrix.to_arrays q) + with + | (Invalid_argument _) as exn -> + ppm q + raise exn + with + | ArvestadBruno1997.Numerical_error _ -> () + + let tests = [ + "identity 2x2" >:: (assert_acceptable (ident 2)); + "identity 4x4" >:: (assert_acceptable (ident 4)); + "identity 64x64" >:: (assert_acceptable (ident 64)); + "sparse 2x2" >:: (assert_acceptable (sparse 2)); + "sparse 4x4" >:: (assert_acceptable (sparse 4)); + "sparse 64x64" >:: (assert_acceptable (sparse 64)); + "random sparse 2x2" >:: (assert_acceptable (random_sparse 2)); + "random sparse 4x4" >:: (assert_acceptable (random_sparse 4)); + "random sparse 64x64" >:: (assert_acceptable (random_sparse 64)); + "checkerboard 4x4" >:: (assert_acceptable (checkerboard 4)); + "checkerboard 64x64" >:: (assert_acceptable (checkerboard 64)); + "dark checkerboard 64x64" >:: (assert_acceptable (checkerboard ~delta:5e-3 64)) + ] + let all_tests = ("FitECM tests" >::: [ - "symmetrizeP" >::: symmetrizeP_tests; - "previously worked-out 4x4 example" >::: Example.tests; - "random data" >::: RandomData.tests + "symmetrizeP unit tests" >::: symmetrizeP_tests; + "reproduce a previously worked-out 4x4 example" >::: Example.tests; + "acceptable behavior on degenerate inputs" >::: Degenerate.tests; + "accurately analyze randomly generated inputs" >::: RandomData.tests ]) run_test_tt_main all_tests From 07a167316669cdda4a869eb0950ff02c0b04c3b2 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Fri, 14 Sep 2012 23:46:40 -0700 Subject: [PATCH 8/9] ApproxQ I/O harness --- src/FitECM/ApproxQ.ml | 125 +++++++++++++++++++++++++++++++++++++++++++++++++- src/FitECM/_tags | 2 +- 2 files changed, 125 insertions(+), 2 deletions(-) diff --git a/src/FitECM/ApproxQ.ml b/src/FitECM/ApproxQ.ml index 562b321..8f2a56a 100644 --- a/src/FitECM/ApproxQ.ml +++ b/src/FitECM/ApproxQ.ml @@ -1,5 +1,128 @@ open CamlPaml open Batteries_uni open Printf +module JSON = Yojson.Basic -printf "Hello, world!\n" +let tol = 1e-6 +let smooth = 1e-6 + +(* JSON helpers *) +let vector_of_json = function + | `List v -> + Array.of_list + v |> List.map + function + | `Float x -> x + | `Int x -> float x + | _ -> invalid_arg "expected numeric vector" + | _ -> invalid_arg "expected vector" + +let matrix_of_json = function + | `List rows -> + try + let m = rows |> List.map vector_of_json |> Array.of_list + if Array.length m > 0 then + let nc = Array.length m.(0) + if not (Array.for_all (fun ar -> Array.length ar = nc) m) then invalid_arg "" + m + with + | Invalid_argument _ -> failwith "expected rectangular numeric matrix" + | _ -> invalid_arg "expected matrix" + +let (@) json key = match json with + | `Assoc lst -> List.assoc key lst + | _ -> invalid_arg "expected JSON hash" + + + +(* parse input JSON *) +if Array.length Sys.argv < 2 then + eprintf "Usage: ApproxECM \n" + exit (-1) + +let fn_input = Sys.argv.(1) +let input = JSON.from_file fn_input + + + +(* load and validate input *) +let pi = + try + vector_of_json (input@"CodonFrequencies") + with + | Not_found + | Invalid_argument _ -> failwith ("expected CodonFrequencies to be a numeric vector") + +let n = Array.length pi +if n < 2 then failwith "CodonFrequencies vector is too short" + +let pisum = + pi |> Array.fold_left + fun sum x -> + if x < 0. then failwith "CodonFrequencies vector has a negative entry" + sum +. x + 0. +if abs_float (pisum -. 1.0) > tol then failwith "CodonFrequencies vector does not sum to 1" +let pi = Gsl_vector.of_array pi + +let sms = + try + let lst = match input@"SubstitutionMatrices" with + | `List lst -> lst + | _ -> failwith "expected SubstitutionMatrices to be a list" + Array.of_list + List.map + fun entry -> + try + let m = Gsl_matrix.of_arrays (matrix_of_json (entry@"Matrix")) + if Gsl_matrix.dims m <> (n,n) then + failwith (sprintf "Expected each substitution matrix to be %d-by-%d" n n) + + if smooth > 0. then + for i = 0 to n-1 do + for j = 0 to n-1 do + m.{i,j} <- (m.{i,j} +. smooth) /. (1.0 +. (float n) *. smooth) + + CamlPaml.P.validate m + let sps = `List [entry@"FirstSpecies"; entry@"SecondSpecies"] + eprintf "%s\n" (JSON.to_string sps) + m + with + | Not_found -> failwith "expected each member of SubstitutionMatrices to have a Matrix" + lst + with + | Not_found -> failwith "expected SubstitutionMatrices" + +(* run algorithm *) + +eprintf "Estimating %d-by-%d rate matrix based on %d pairwise substitution matrices...\n" n n (Array.length sms) +flush stderr + +let q = ArvestadBruno1997.est_Q pi sms + + + +(* output results *) + +for i = 1 to n-1 do + for j = 0 to i-1 do + if j > 0 then printf " " + printf "%f" (q.{i,j} /. pi.{j}) + printf "\n" + +pi |> Gsl_vector.to_array |> Array.to_list |> List.map (sprintf "%f") |> String.concat " " |> printf "\n%s\n\n\n" + +if n = 64 then + (* FIXME: residue vector should be included in input JSON *) + printf "AAA AAC AAG AAT ACA ACC ACG ACT AGA AGC AGG AGT ATA ATC ATG ATT CAA CAC CAG CAT \ +CCA CCC CCG CCT CGA CGC CGG CGT CTA CTC CTG CTT GAA GAC GAG GAT GCA GCC GCG GCT \ +GGA GGC GGG GGT GTA GTC GTG GTT TAA TAC TAG TAT TCA TCC TCG TCT TGA TGC TGG TGT \ +TTA TTC TTG TTT\n" + +(* +let pos = ref 0 +while !pos < n do + let row = min 20 (n - !pos) + Array.sub codons !pos row |> Array.to_list |> List.map (sprintf "%f") |> String.concat " " |> printf "%s\n" + pos := !pos + row +*) \ No newline at end of file diff --git a/src/FitECM/_tags b/src/FitECM/_tags index 5f5e7fe..f44b554 100644 --- a/src/FitECM/_tags +++ b/src/FitECM/_tags @@ -1,3 +1,3 @@ -true: debug, pkg_batteries, pkg_CamlPaml, pkg_gsl +true: debug, pkg_batteries, pkg_CamlPaml, pkg_gsl, pkg_yojson : pkg_oUnit <**/*.ml> or <**/*.mli>: pp(ocaml+twt) From 0b3e1398182e2126551ded02da922d8981667904 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Mon, 29 Jul 2013 22:38:07 -0700 Subject: [PATCH 9/9] bring up to modern gsl, batteries, and ocamlbuild --- src/FitECM/ApproxQ.ml | 10 +-- src/FitECM/ArvestadBruno1997.ml | 76 ++++++++--------- src/FitECM/Makefile | 4 +- src/FitECM/TestApproxQ.ml | 82 +++++++++--------- src/FitECM/_tags | 4 +- src/FitECM/myocamlbuild.ml | 185 ---------------------------------------- 6 files changed, 88 insertions(+), 273 deletions(-) delete mode 100644 src/FitECM/myocamlbuild.ml diff --git a/src/FitECM/ApproxQ.ml b/src/FitECM/ApproxQ.ml index 8f2a56a..072b28a 100644 --- a/src/FitECM/ApproxQ.ml +++ b/src/FitECM/ApproxQ.ml @@ -1,5 +1,5 @@ open CamlPaml -open Batteries_uni +open Batteries open Printf module JSON = Yojson.Basic @@ -63,7 +63,7 @@ let pisum = sum +. x 0. if abs_float (pisum -. 1.0) > tol then failwith "CodonFrequencies vector does not sum to 1" -let pi = Gsl_vector.of_array pi +let pi = Gsl.Vector.of_array pi let sms = try @@ -74,8 +74,8 @@ let sms = List.map fun entry -> try - let m = Gsl_matrix.of_arrays (matrix_of_json (entry@"Matrix")) - if Gsl_matrix.dims m <> (n,n) then + let m = Gsl.Matrix.of_arrays (matrix_of_json (entry@"Matrix")) + if Gsl.Matrix.dims m <> (n,n) then failwith (sprintf "Expected each substitution matrix to be %d-by-%d" n n) if smooth > 0. then @@ -110,7 +110,7 @@ for i = 1 to n-1 do printf "%f" (q.{i,j} /. pi.{j}) printf "\n" -pi |> Gsl_vector.to_array |> Array.to_list |> List.map (sprintf "%f") |> String.concat " " |> printf "\n%s\n\n\n" +pi |> Gsl.Vector.to_array |> Array.to_list |> List.map (sprintf "%f") |> String.concat " " |> printf "\n%s\n\n\n" if n = 64 then (* FIXME: residue vector should be included in input JSON *) diff --git a/src/FitECM/ArvestadBruno1997.ml b/src/FitECM/ArvestadBruno1997.ml index c2bee16..5d3055e 100644 --- a/src/FitECM/ArvestadBruno1997.ml +++ b/src/FitECM/ArvestadBruno1997.ml @@ -7,70 +7,70 @@ Sequences. J Mol Evol 45:696-703 *) -open Batteries_uni +open Batteries open Printf exception Numerical_error of string let diag v = - let n = Gsl_vector.length v - let m = Gsl_matrix.create ~init:0. n n + let n = Gsl.Vector.length v + let m = Gsl.Matrix.create ~init:0. n n for i = 0 to n-1 do m.{i,i} <- v.{i} m let mm a b = - let (x,y) = Gsl_matrix.dims a - let (y',z) = Gsl_matrix.dims b + let (x,y) = Gsl.Matrix.dims a + let (y',z) = Gsl.Matrix.dims b if (y <> y') then invalid_arg "mm: matrix dimensions mismatched" - let c = Gsl_matrix.create ~init:0.0 x z - Gsl_blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) + let c = Gsl.Matrix.create ~init:0.0 x z + Gsl.Blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) c (* symmetrize the ~P matrix according to eq. 12 *) let symmetrizeP pi p = - let (m,n) = Gsl_matrix.dims p + let (m,n) = Gsl.Matrix.dims p assert (m=n) - assert (n=Gsl_vector.length pi) + assert (n=Gsl.Vector.length pi) - let pi_sqrt = Gsl_vector.copy pi + let pi_sqrt = Gsl.Vector.copy pi for i = 0 to n-1 do pi_sqrt.{i} <- sqrt pi.{i} (* pi_sqrt is pi^(1/2) as in eq. 10 *) - let pi_invsqrt = Gsl_vector.copy pi_sqrt + let pi_invsqrt = Gsl.Vector.copy pi_sqrt for i = 0 to n-1 do pi_invsqrt.{i} <- 1.0 /. pi_sqrt.{i} (* pi_invsqrt is pi^(-1/2) as in eq. 10 *) (* now compute eq. 12. TODO this more efficiently than actually constructing & multiplying the diagonal matrices *) let x = mm (diag pi_sqrt) (mm p (diag pi_invsqrt)) - let xT = Gsl_matrix.create n n - Gsl_matrix.transpose xT x + let xT = Gsl.Matrix.create n n + Gsl.Matrix.transpose xT x - Gsl_matrix.add x xT - Gsl_matrix.scale x 0.5 + Gsl.Matrix.add x xT + Gsl.Matrix.scale x 0.5 x (* compute the eigenvectors of the symmetrized P matrix, and use them to estimate the eigenvectors of P and Q *) let est_eigenvectors pi symP = - let (m,n) = Gsl_matrix.dims symP + let (m,n) = Gsl.Matrix.dims symP assert (m=n) - assert (n=Gsl_vector.length pi) + assert (n=Gsl.Vector.length pi) - let (_,u) = Gsl_eigen.symmv ~protect:true (`M symP) + let (_,u) = Gsl.Eigen.symmv ~protect:true (`M symP) (* columns of u are the eigenvectors of symP *) - let pi_sqrt = Gsl_vector.copy pi + let pi_sqrt = Gsl.Vector.copy pi for i = 0 to n-1 do pi_sqrt.{i} <- sqrt pi.{i} - let pi_invsqrt = Gsl_vector.copy pi_sqrt + let pi_invsqrt = Gsl.Vector.copy pi_sqrt for i = 0 to n-1 do pi_invsqrt.{i} <- 1.0 /. pi_sqrt.{i} let rv = mm (diag pi_invsqrt) u - Gsl_matrix.transpose_in_place rv + Gsl.Matrix.transpose_in_place rv (* rows of rv are the estimated right eigenvectors of P and Q (eq. 14) *) - Gsl_matrix.transpose_in_place u + Gsl.Matrix.transpose_in_place u let lv = mm u (diag pi_sqrt) (* rows of lv are estimated left eigenvectors of P and Q (eq. 13) *) @@ -78,22 +78,22 @@ let est_eigenvectors pi symP = (* estimate the eigenvalues of Q based on the observations and estimated eigenvectors *) let est_eigenvalues sms lv rv = - let (m,n) = Gsl_matrix.dims lv + let (m,n) = Gsl.Matrix.dims lv assert (m=n) - assert ((m,n) = Gsl_matrix.dims rv) + assert ((m,n) = Gsl.Matrix.dims rv) let k = Array.length sms assert (k>0) - sms |> Array.iter (fun sm -> assert ((m,n) = Gsl_matrix.dims sm)) + sms |> Array.iter (fun sm -> assert ((m,n) = Gsl.Matrix.dims sm)) (* compute distance estimate for each pair of sequences (sm) and nucleotide (eq. 17) *) let distances = sms |> Array.mapi fun which_pair sm -> - let y = Gsl_vector.create ~init:0. n + let y = Gsl.Vector.create ~init:0. n Array.init n fun i -> - Gsl_blas.(gemv NoTrans ~alpha:1.0 ~a:sm ~x:(Gsl_matrix.row rv i) ~beta:0.0 ~y) - let exp_d_i = (Gsl_blas.(dot (Gsl_matrix.row lv i) y)) + Gsl.Blas.(gemv NoTrans ~alpha:1.0 ~a:sm ~x:(Gsl.Matrix.row rv i) ~beta:0.0 ~y) + let exp_d_i = (Gsl.Blas.(dot (Gsl.Matrix.row lv i) y)) if (exp_d_i <= 0.) then raise (Numerical_error (sprintf "Bad distance estimate for species pair %d, residue %d: log(%e)" which_pair i exp_d_i)) (* TODO: test cases to provoke negative exp_d_i http://www2.gsu.edu/~mkteer/npdmatri.html @@ -128,17 +128,17 @@ let est_eigenvalues sms lv rv = raise (Numerical_error (sprintf "Eigenvalue estimate %d is too negative: %e" i ans_i)) ans.(i) <- 0. - Gsl_vector.of_array ans + Gsl.Vector.of_array ans (* Normalize rate matrix to unity mean rate of replacement at equilibrium *) let normalize_Q pi q = - let (m,n) = Gsl_matrix.dims q + let (m,n) = Gsl.Matrix.dims q assert (m=n) - let qdiag = Gsl_vector.of_array (Array.init m (fun i -> q.{i,i})) + let qdiag = Gsl.Vector.of_array (Array.init m (fun i -> q.{i,i})) - let z = 1.0 /. (Gsl_blas.dot pi qdiag) + let z = 1.0 /. (Gsl.Blas.dot pi qdiag) (* 'sign' of eigenvectors is arbitrary *) - Gsl_matrix.scale q (if z>0. then 0. -. z else z) + Gsl.Matrix.scale q (if z>0. then 0. -. z else z) (* round slightly negative off-diagonal entries to zero *) for i = 0 to n-1 do @@ -153,12 +153,12 @@ let normalize_Q pi q = (* putting it all together *) let est_Q pi sms = - let n = Gsl_vector.length pi + let n = Gsl.Vector.length pi if (n < 2 || Array.length sms = 0) then invalid_arg "ArvestadBruno1997.est_Q" - sms |> Array.iter (fun sm -> if (Gsl_matrix.dims sm <> (n,n)) then invalid_arg "ArvestadBruno1997.est_Q") + sms |> Array.iter (fun sm -> if (Gsl.Matrix.dims sm <> (n,n)) then invalid_arg "ArvestadBruno1997.est_Q") - let p = Gsl_matrix.create ~init:0. n n - sms |> Array.iter (fun sm -> Gsl_matrix.add p sm) + let p = Gsl.Matrix.create ~init:0. n n + sms |> Array.iter (fun sm -> Gsl.Matrix.add p sm) let symP = symmetrizeP pi p @@ -167,7 +167,7 @@ let est_Q pi sms = (* reconstruct Q according to the spectral thm Q = rv'*diag(ev)*lv http://www.maths.lancs.ac.uk/~gilbert/m306c/node4.html *) - Gsl_matrix.transpose_in_place rv + Gsl.Matrix.transpose_in_place rv let q = mm rv (mm (diag ev) lv) normalize_Q pi q q diff --git a/src/FitECM/Makefile b/src/FitECM/Makefile index 12deecd..61a17ab 100644 --- a/src/FitECM/Makefile +++ b/src/FitECM/Makefile @@ -1,8 +1,8 @@ all: - ocamlbuild all.otarget + ocamlbuild -use-ocamlfind all.otarget test: - ocamlbuild test.otarget + ocamlbuild -use-ocamlfind test.otarget _build/TestApproxQ.native -verbose clean: diff --git a/src/FitECM/TestApproxQ.ml b/src/FitECM/TestApproxQ.ml index b6d90e1..f92c695 100644 --- a/src/FitECM/TestApproxQ.ml +++ b/src/FitECM/TestApproxQ.ml @@ -1,4 +1,4 @@ -open Batteries_uni +open Batteries open OUnit open CamlPaml open Printf @@ -10,7 +10,7 @@ let tol = 1e-6 exception False let is_symmetric mat = - let open Gsl_matrix + let open Gsl.Matrix let (m,n) = dims mat if m <> n then false else @@ -22,7 +22,7 @@ let is_symmetric mat = with False -> false let random_P n () = - let p = Gsl_matrix.create n n + let p = Gsl.Matrix.create n n let rowsum i = let tot = ref 0. for j = 0 to n-1 do tot := !tot +. p.{i,j} @@ -35,9 +35,9 @@ let random_P n () = assert_bool "synthesized P matrix is valid" (try P.validate ~tol p; true with Invalid_argument _ -> false) assert_bool "synthesized P matrix is asymmetric" (not (is_symmetric p)) - let pi = Gsl_vector.create n + let pi = Gsl.Vector.create n for j = 0 to n-1 do pi.{j} <- float (1 + Random.int 10000) - let z = pi |> Gsl_vector.to_array |> Array.enum |> fold (+.) 0. + let z = pi |> Gsl.Vector.to_array |> Array.enum |> fold (+.) 0. for j = 0 to n-1 do pi.{j} <- pi.{j} /. z p, pi @@ -60,16 +60,16 @@ let symmetrizeP_tests = [ let mm a b = - let (x,y) = Gsl_matrix.dims a - let (y',z) = Gsl_matrix.dims b + let (x,y) = Gsl.Matrix.dims a + let (y',z) = Gsl.Matrix.dims b if (y <> y') then invalid_arg "mm: matrix dimensions mismatched" - let c = Gsl_matrix.create ~init:0.0 x z - Gsl_blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) + let c = Gsl.Matrix.create ~init:0.0 x z + Gsl.Blas.(gemm ~ta:NoTrans ~tb:NoTrans ~alpha:1.0 ~a ~b ~beta:0.0 ~c) c let veq ?(tol=0.001) v1 v2 = - let n = Gsl_vector.length v1 - if Gsl_vector.length v2 <> n then false + let n = Gsl.Vector.length v1 + if Gsl.Vector.length v2 <> n then false else let ans = ref true for i = 0 to n-1 do @@ -81,17 +81,17 @@ let veq ?(tol=0.001) v1 v2 = !ans let meq ?tol m1 m2 = - let (r,c) = Gsl_matrix.dims m1 - let (r',c') = Gsl_matrix.dims m2 + let (r,c) = Gsl.Matrix.dims m1 + let (r',c') = Gsl.Matrix.dims m2 if (r <> r' || c <> c') then false else let ans = ref true for i = 0 to r-1 do - if not (veq ?tol (Gsl_matrix.row m1 i) (Gsl_matrix.row m2 i)) then ans := false + if not (veq ?tol (Gsl.Matrix.row m1 i) (Gsl.Matrix.row m2 i)) then ans := false !ans let ppm m = - let (r,c) = Gsl_matrix.dims m + let (r,c) = Gsl.Matrix.dims m for i = 0 to r-1 do for j = 0 to c-1 do printf "\t%.4f" m.{i,j} @@ -186,14 +186,14 @@ module Example = struct let ev = [| 0.; 0.610885; 0.848496; 1.0 |] let test_symP () = - let sms' = Array.map Gsl_matrix.of_arrays sms - Gsl_matrix.add sms'.(0) sms'.(1) - Gsl_matrix.add sms'.(0) sms'.(2) - assert (meq (Gsl_matrix.of_arrays symP) (ArvestadBruno1997.symmetrizeP (Gsl_vector.of_array pi) sms'.(0))) + let sms' = Array.map Gsl.Matrix.of_arrays sms + Gsl.Matrix.add sms'.(0) sms'.(1) + Gsl.Matrix.add sms'.(0) sms'.(2) + assert (meq (Gsl.Matrix.of_arrays symP) (ArvestadBruno1997.symmetrizeP (Gsl.Vector.of_array pi) sms'.(0))) let test_est_eigenvectors () = - let lv', rv' = ArvestadBruno1997.est_eigenvectors (Gsl_vector.of_array pi) (Gsl_matrix.of_arrays symP) - let lv'' = Gsl_matrix.to_arrays lv' + let lv', rv' = ArvestadBruno1997.est_eigenvectors (Gsl.Vector.of_array pi) (Gsl.Matrix.of_arrays symP) + let lv'' = Gsl.Matrix.to_arrays lv' let d v1 v2 = let maxd = ref 0. @@ -209,7 +209,7 @@ module Example = struct mind := min !mind (d lv.(i) lv''.(j)) assert (!mind < 0.001) - let rv'' = Gsl_matrix.to_arrays rv' + let rv'' = Gsl.Matrix.to_arrays rv' for i = 0 to 3 do let mind = ref infinity for j = 0 to 3 do @@ -217,16 +217,16 @@ module Example = struct assert (!mind < 0.001) let test_est_eigenvalues () = - let lv' = Gsl_matrix.of_arrays lv - let rv' = Gsl_matrix.of_arrays rv - let ev' = Gsl_vector.to_array (ArvestadBruno1997.est_eigenvalues (Array.map Gsl_matrix.of_arrays sms) lv' rv') + let lv' = Gsl.Matrix.of_arrays lv + let rv' = Gsl.Matrix.of_arrays rv + let ev' = Gsl.Vector.to_array (ArvestadBruno1997.est_eigenvalues (Array.map Gsl.Matrix.of_arrays sms) lv' rv') Array.sort compare ev' - assert (veq (Gsl_vector.of_array ev) (Gsl_vector.of_array ev')) + assert (veq (Gsl.Vector.of_array ev) (Gsl.Vector.of_array ev')) let test_est_Q () = - let q' = ArvestadBruno1997.est_Q (Gsl_vector.of_array pi) (Array.map Gsl_matrix.of_arrays sms) - assert (meq (Gsl_matrix.of_arrays q) q') + let q' = ArvestadBruno1997.est_Q (Gsl.Vector.of_array pi) (Array.map Gsl.Matrix.of_arrays sms) + assert (meq (Gsl.Matrix.of_arrays q) q') let tests = ["symP" >:: test_symP; "est_eigenvectors" >:: test_est_eigenvectors; "est_eigenvalues" >:: test_est_eigenvalues; "est_Q" >:: test_est_Q] @@ -274,9 +274,9 @@ module RandomData = struct let qm = Q.Diag.to_Q q let sms = random_sms k q - let q' = ArvestadBruno1997.est_Q (Gsl_vector.of_array pi) sms + let q' = ArvestadBruno1997.est_Q (Gsl.Vector.of_array pi) sms - assert (meq (Gsl_matrix.of_arrays qm) q') + assert (meq (Gsl.Matrix.of_arrays qm) q') let tests = [ "4x4/1 (100x)" >:: (fun () -> Random.init 0; foreach (1 -- 100) (fun _ -> test_est_Q 4 1)); @@ -291,16 +291,16 @@ module Degenerate = struct (* identity substitution matrix *) let ident n = - let pi = Gsl_vector.of_array (Array.make n (1.0 /. float n)) - let sms = [| Gsl_matrix.create ~init:0. n n |] - Gsl_matrix.add_diagonal sms.(0) 1.0 + let pi = Gsl.Vector.of_array (Array.make n (1.0 /. float n)) + let sms = [| Gsl.Matrix.create ~init:0. n n |] + Gsl.Matrix.add_diagonal sms.(0) 1.0 (pi,sms) (* near-identity substitution matrix (one slightly nonzero off-diagonal entry in each row) *) let sparse ?(delta=1e-6) n = - let pi = Gsl_vector.of_array (Array.make n (1.0 /. float n)) - let sms = [| Gsl_matrix.create ~init:0. n n |] - Gsl_matrix.add_diagonal sms.(0) (1. -. delta) + let pi = Gsl.Vector.of_array (Array.make n (1.0 /. float n)) + let sms = [| Gsl.Matrix.create ~init:0. n n |] + Gsl.Matrix.add_diagonal sms.(0) (1. -. delta) for i = 0 to n-1 do sms.(0).{i,(if i < n/2 then i+1 else i-1)} <- delta (pi,sms) @@ -308,8 +308,8 @@ module Degenerate = struct (* random near-identity substitution matrix (all offdiagonal entries are very small random values) *) let random_sparse ?(delta=1e-6) n = Random.init 0 - let pi = Gsl_vector.of_array (Array.make n (1.0 /. float n)) - let sms = [| Gsl_matrix.create ~init:0. n n |] + let pi = Gsl.Vector.of_array (Array.make n (1.0 /. float n)) + let sms = [| Gsl.Matrix.create ~init:0. n n |] for i = 0 to n-1 do let noise = Array.init n (fun j -> if (i <> j) then Random.float delta else 0.) let z = Array.fold_left (+.) 0. noise @@ -323,8 +323,8 @@ module Degenerate = struct (* half of off-diagonal entries are zero *) let checkerboard ?(delta=1e-6) n = Random.init 0 - let pi = Gsl_vector.of_array (Array.make n (1.0 /. float n)) - let sms = [| Gsl_matrix.create ~init:0. n n |] + let pi = Gsl.Vector.of_array (Array.make n (1.0 /. float n)) + let sms = [| Gsl.Matrix.create ~init:0. n n |] for i = 0 to n-1 do for j = 0 to n-1 do if i <> j then @@ -338,7 +338,7 @@ module Degenerate = struct try let q = ArvestadBruno1997.est_Q pi sms try - CamlPaml.Q.validate (Gsl_matrix.to_arrays q) + CamlPaml.Q.validate (Gsl.Matrix.to_arrays q) with | (Invalid_argument _) as exn -> ppm q diff --git a/src/FitECM/_tags b/src/FitECM/_tags index f44b554..4f468a7 100644 --- a/src/FitECM/_tags +++ b/src/FitECM/_tags @@ -1,3 +1,3 @@ -true: debug, pkg_batteries, pkg_CamlPaml, pkg_gsl, pkg_yojson -: pkg_oUnit +true: debug, package(batteries), package(CamlPaml), package(gsl), package(yojson) +: package(oUnit) <**/*.ml> or <**/*.mli>: pp(ocaml+twt) diff --git a/src/FitECM/myocamlbuild.ml b/src/FitECM/myocamlbuild.ml deleted file mode 100644 index 5ffd91e..0000000 --- a/src/FitECM/myocamlbuild.ml +++ /dev/null @@ -1,185 +0,0 @@ -open Ocamlbuild_plugin -open Command (* no longer needed for OCaml >= 3.10.2 *) - -(** - Overview of tags: - - [pkg_batteries] to use Batteries as a library, without syntax extensions - - [use_batteries] and [use_batteries_r] to use both Batteries and all the non-destructive syntax extensions - - [pkg_sexplib.syntax] with [syntax_camlp4o] or [syntax_camlp4r] for sexplib -*) - - -(** - {1 OCamlFind} -*) - -let run_and_read = Ocamlbuild_pack.My_unix.run_and_read - -let blank_sep_strings = Ocamlbuild_pack.Lexers.blank_sep_strings - -module OCamlFind = -struct - (* this lists all supported packages *) - let find_packages () = - blank_sep_strings & - Lexing.from_string & - run_and_read "ocamlfind list | cut -d' ' -f1" - - (* this is supposed to list available syntaxes, but I don't know how to do it. *) - let find_syntaxes () = ["camlp4o"; "camlp4r"] - - (* ocamlfind command *) - let ocamlfind x = S[A"ocamlfind"; x] - - let before_options () = - (* by using Before_options one let command line options have an higher priority *) - (* on the contrary using After_options will guarantee to have the higher priority *) - - (* override default commands by ocamlfind ones *) - Options.ocamlc := ocamlfind & A"ocamlc"; - Options.ocamlopt := ocamlfind & A"ocamlopt"; - Options.ocamldep := ocamlfind & A"ocamldep"; - Options.ocamldoc := ocamlfind & A"ocamldoc"; - Options.ocamlmktop := ocamlfind & A"ocamlmktop" - - let get_ocamldoc_directory () = - let ocamldoc_directory = run_and_read "ocamlfind ocamldoc -customdir" in - let length = String.length ocamldoc_directory in - assert (length != 0); - let char = ocamldoc_directory.[length - 1] in - if (char = '\n') || (char = '\r') then String.sub ocamldoc_directory 0 (length - 1) - else ocamldoc_directory - - let after_rules () = - (* When one link an OCaml library/binary/package, one should use -linkpkg *) - flag ["ocaml"; "byte"; "link"; "program"] & A"-linkpkg"; - flag ["ocaml"; "native"; "link"; "program"] & A"-linkpkg"; - - - (* For each ocamlfind package one inject the -package option when - * compiling, computing dependencies, generating documentation and - * linking. *) - List.iter begin fun pkg -> - flag ["ocaml"; "compile"; "pkg_"^pkg] & S[A"-package"; A pkg]; - flag ["ocaml"; "ocamldep"; "pkg_"^pkg] & S[A"-package"; A pkg]; - flag ["ocaml"; "doc"; "pkg_"^pkg] & S[A"-package"; A pkg]; - flag ["ocaml"; "link"; "pkg_"^pkg] & S[A"-package"; A pkg]; - end (find_packages ()); - - (* Like -package but for extensions syntax. Morover -syntax is useless - * when linking. *) - List.iter begin fun syntax -> - flag ["ocaml"; "compile"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; - flag ["ocaml"; "ocamldep"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; - flag ["ocaml"; "doc"; "syntax_"^syntax] & S[A"-syntax"; A syntax]; - end (find_syntaxes ()); - - (* The default "thread" tag is not compatible with ocamlfind. - Indeed, the default rules add the "threads.cma" or "threads.cmxa" - options when using this tag. When using the "-linkpkg" option with - ocamlfind, this module will then be added twice on the command line. - - To solve this, one approach is to add the "-thread" option when using - the "threads" package using the previous plugin. - *) - flag ["ocaml"; "pkg_threads"; "compile"] (S[A "-thread"]); - flag ["ocaml"; "pkg_threads"; "link"] (S[A "-thread"]); -end - -(** - {1 OCaml Batteries Included} -*) - -module Batteries = -struct - let before_options () = () - - let after_rules () = - flag ["ocaml"; "link"; "byte"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"; A "odoc_info.cma"]); - flag ["ocaml"; "link"; "native"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"(*; A "odoc_info.cmxa"*)]); - flag ["ocaml"; "docfile"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"]); - flag ["ocaml"; "docdir"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"]); - flag ["ocaml"; "doc"; "use_ocamldoc_info"] (S[A "-I"; A "+ocamldoc"]); - - (*The command-line for [use_batteries] and [use_batteries_r]*) - - let cl_use_boilerplate = [A"-package"; A "batteries.pa_type_conv.syntax,batteries,sexplib.syntax"] - and cl_use_batteries = [A"-package"; A "batteries.pa_openin.syntax,batteries.pa_where.syntax,batteries.pa_batteries.syntax"; A "-package"; A "batteries"] - and cl_use_batteries_o = [] - (*[cl_use_batteries_o]: extensions which only make sense in original syntax*) - and cl_camlp4o = [A"-syntax"; A "camlp4o"] - and cl_camlp4r = [A"-syntax"; A "camlp4r"] in - - let cl_boilerplate_original = cl_use_boilerplate @ cl_camlp4o - and cl_boilerplate_revised = cl_use_boilerplate @ cl_camlp4r - and cl_batteries_original = cl_use_batteries @ cl_use_batteries_o @ cl_camlp4o - and cl_batteries_revised = cl_use_batteries @ cl_camlp4r in - - (** Tag [use_boilerplate] provides boilerplate syntax extensions, - in original syntax*) - - flag ["ocaml"; "compile"; "use_boilerplate"] & S cl_boilerplate_original ; - flag ["ocaml"; "ocamldep"; "use_boilerplate"] & S cl_boilerplate_original ; - flag ["ocaml"; "doc"; "use_boilerplate"] & S cl_boilerplate_original ; - flag ["ocaml"; "link"; "use_boilerplate"] & S cl_boilerplate_original ; - - (** Tag [use_boilerplate_r] provides boilerplate syntax extensions, - in original syntax*) - - flag ["ocaml"; "compile"; "use_boilerplate_r"] & S cl_boilerplate_revised ; - flag ["ocaml"; "ocamldep"; "use_boilerplate_r"] & S cl_boilerplate_revised ; - flag ["ocaml"; "doc"; "use_boilerplate_r"] & S cl_boilerplate_revised ; - flag ["ocaml"; "link"; "use_boilerplate_r"] & S cl_boilerplate_revised ; - - (** Tag [use_batteries] provides both package [batteries] - and all syntax extensions, in original syntax. *) - - flag ["ocaml"; "compile"; "use_batteries"] & S cl_batteries_original ; - flag ["ocaml"; "ocamldep"; "use_batteries"] & S cl_batteries_original ; - flag ["ocaml"; "doc"; "use_batteries"] & S cl_batteries_original ; - flag ["ocaml"; "link"; "use_batteries"] & S cl_batteries_original ; - - (** Tag [use_batteries_r] provides both package [batteries] - and all syntax extensions, in revised syntax. *) - - flag ["ocaml"; "compile"; "use_batteries_r"] & S cl_batteries_revised; - flag ["ocaml"; "ocamldep"; "use_batteries_r"] & S cl_batteries_revised; - flag ["ocaml"; "doc"; "use_batteries_r"] & S cl_batteries_revised; - flag ["ocaml"; "link"; "use_batteries_r"] & S cl_batteries_revised - - -(* flag ["ocaml"; "compile"; "use_batteries"] & S[A "-verbose"; - A"-package"; A "batteries.syntax.full"; - A"-syntax"; A "batteries.syntax.full"]; - flag ["ocaml"; "ocamldep"; "use_batteries"] & S[A "-verbose"; - A"-package"; A "batteries.syntax.full"; - A"-syntax"; A "batteries.syntax.full"]; - flag ["ocaml"; "doc"; "use_batteries"] & S[A "-verbose"; - A"-package"; A "batteries.syntax.full"; - A"-syntax"; A "batteries.syntax.full"]; - flag ["ocaml"; "link"; "use_batteries"] & S[A "-verbose"; - A"-package"; A "batteries.syntax.full"; - A"-syntax"; A "batteries.syntax.full"];*) - - -end - -let _ = dispatch begin function - | Before_options -> - OCamlFind.before_options (); - Batteries.before_options () - | After_rules -> - OCamlFind.after_rules (); - Batteries.after_rules () - - - | _ -> () -end - - - -(** - which ocamlrun -> header - - print_backtrace -> ajouter "-b" après le header -**)