Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also .

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also .
  • 4 commits
  • 9 files changed
  • 0 commit comments
  • 1 contributor
Showing with 87 additions and 31 deletions.
  1. +1 −0 .gitignore
  2. +1 −1 .travis-ci.sh
  3. +1 −1 .travis.yml
  4. +3 −0 src/ForkNo.ml
  5. +8 −0 src/ForkYes.ml
  6. +17 −4 src/Makefile
  7. +52 −24 src/PhyloCSF.ml
  8. +3 −0 src/test.ml
  9. +1 −1 src/testSim.ml
View
@@ -2,3 +2,4 @@ PhyloCSF.Linux.x86_64
PhyloCSF.Darwin.x86_64
_build
*.native
+ForkMaybe.ml
View
@@ -1,4 +1,4 @@
-OPAM_DEPENDS="batteries gsl ocaml+twt ounit should"
+OPAM_DEPENDS="batteries gsl ocaml+twt forkwork ounit should"
sudo add-apt-repository -y ppa:avsm
sudo apt-get update -qq
View
@@ -3,4 +3,4 @@ script: bash -ex .travis-ci.sh
env:
matrix:
- SKIP_SLOW=1
- - DUMMY=1
+ - FORKWORK=1
View
@@ -0,0 +1,3 @@
+let can_fork = false
+
+let map ?procs f lst = List.map f lst
View
@@ -0,0 +1,8 @@
+open Batteries
+
+let can_fork = true
+
+let map ?(procs=1) f lst =
+ if procs=1 || List.length lst = 1 then List.map f lst
+ else
+ ForkWork.map_list ~maxprocs:procs f lst
View
@@ -1,12 +1,25 @@
+OCAMLBUILDFLAGS=
+ifdef FORKWORK
+OCAMLBUILDFLAGS=-tag pkg_forkwork
+endif
+
all:
- ocamlbuild PhyloCSF.native
+ rm -f ForkMaybe.ml
+ifdef FORKWORK
+ ln -s ForkYes.ml ForkMaybe.ml
+else
+ ln -s ForkNo.ml ForkMaybe.ml
+endif
+ ocamlbuild $(OCAMLBUILDFLAGS) PhyloCSF.native
-test: all
- ocamlbuild testSim.native test.native
+test: testexe
./test.native -verbose
+testexe: all
+ ocamlbuild $(OCAMLBUILDFLAGS) testSim.native test.native
+
clean:
rm -f *~
ocamlbuild -clean
-.PHONY: all test clean
+.PHONY: all test testexe clean
View
@@ -39,6 +39,7 @@ let reading_frame = opt ~group ~l:"frames" ~s:'f' ~h:"how many reading frames to
let orf_mode = opt ~group ~l:"orf" ~h:"search for ORFs (default AsIs)" (Opt.value_option "AsIs|ATGStop|StopStop|StopStop3|ToFirstStop|FromLastStop|ToOrFromStop" (Some AsIs) (fun s -> match String.lowercase s with "asis" -> AsIs | "atgstop" -> ATGStop | "stopstop" -> StopStop | "stopstop3" -> StopStop3 | "tofirststop" -> ToFirstStop | "fromlaststop" -> FromLastStop | "toorfromstop" -> ToOrFromStop| x -> invalid_arg x) (fun _ s -> sprintf "invalid ORF search mode %s"s))
let min_codons = opt ~group ~l:"minCodons" ~h:"minimum ORF length for searching over ORFs (default 25 codons)" (StdOpt.int_option ~default:25 ())
let print_orfs = opt ~group ~l:"allScores" ~h:"report scores of all regions evaluated, not just the max" (StdOpt.store_true ())
+let procs = opt ~group ~s:'p' ~h:"search frames/ORFs using up to p parallel subprocesses" (StdOpt.int_option ~default: 1 ())
let group = OptParser.add_group opt_parser "output control"
let print_bls = opt ~group ~l:"bls" ~h:"include alignment branch length score (BLS) for the reported region in output" (StdOpt.store_true ())
@@ -65,6 +66,16 @@ if Opt.get orf_mode <> AsIs && Opt.get reading_frame = One then
eprintf "Warning: --orf with --frames=1; are you sure you don't want to search for ORFs in three or six frames?\n"
flush stderr
+if Opt.get procs > 1 && not ForkMaybe.can_fork then
+ eprintf "Warning: ignoring -p, recompile PhyloCSF with: make FORKWORK=1\n"
+ flush stderr
+
+(*
+if Opt.get procs > 1 && Opt.get orf_mode = AsIs && Opt.get reading_frame = One then
+ eprintf "Warning: -p isn't useful without --orf and/or --frames\n"
+ flush stderr
+*)
+
if Opt.get debug then Printexc.record_backtrace true
(******************************************************************************)
@@ -178,7 +189,7 @@ let find_orfs ?(ofs=0) dna =
orfs := if firstorflo = ofs then [firstorf] else []
orfs := if len - lastorfhi <= 3 && firstorf <> lastorf then lastorf :: !orfs else !orfs
- !orfs |> List.rev |> List.enum |> filter
+ !orfs |> List.rev |> List.filter
fun (lo,hi) ->
assert ((hi-lo+1) mod 3 = 0)
assert ((lo-ofs) mod 3 = 0)
@@ -191,9 +202,9 @@ let candidate_regions dna rcdna =
(if Opt.get reading_frame <> One then
[ (false,1,hi); (false,2,hi) ] else []);
(if Opt.get reading_frame = Six then
- [ (true,0,hi); (true,1,hi); (true,2,hi) ] else []) ] |> List.flatten |> List.enum
+ [ (true,0,hi); (true,1,hi); (true,2,hi) ] else []) ] |> List.flatten
else
- let aoeu a = map (fun (b,c) -> a,b,c)
+ let aoeu a = List.map (fun (b,c) -> a,b,c)
let all_orfs = ref [aoeu false (find_orfs ~ofs:0 dna)]
if Opt.get reading_frame <> One then
all_orfs := aoeu false (find_orfs ~ofs:1 dna) :: !all_orfs
@@ -203,7 +214,7 @@ let candidate_regions dna rcdna =
all_orfs := aoeu true (find_orfs ~ofs:0 rcdna) :: !all_orfs
all_orfs := aoeu true (find_orfs ~ofs:1 rcdna) :: !all_orfs
all_orfs := aoeu true (find_orfs ~ofs:2 rcdna) :: !all_orfs
- concat (List.enum (List.rev !all_orfs))
+ List.rev !all_orfs |> List.flatten
let pleaves ?(lo=0) ?hi t leaf_ord aln =
let hi = match hi with Some x -> x | None -> String.length aln.(0) - 1
@@ -288,38 +299,56 @@ let process_alignment (nt,t,evaluator) fn =
(* generate list of candidate regions within the alignment *)
let rgns = candidate_regions aln.(0) rc_aln.(0)
+
+ let rgns =
+ if Opt.get procs > 1 && ForkMaybe.can_fork then
+ (* If parallelizing, sort regions longest-to-shortest
+ in order to optimize utilization *)
+ List.sort (fun (_,lo1,hi1) (_,lo2,hi2) -> (hi2-lo2) - (hi1-lo1)) rgns
+ else rgns
try
- if Enum.is_empty rgns then
+ if rgns = [] then
assert (Opt.get orf_mode <> AsIs)
failwith "no sufficiently long ORFs found"
- (* evaluate each candidate region *)
+ (* evaluate each candidate region, perhaps in parallel *)
let rgns_scores =
- rgns |> Enum.filter_map
+ rgns |> ForkMaybe.map ~procs:(Opt.get procs)
fun (rc,lo,hi) ->
try
let aln_leaves = Array.of_enum (pleaves ~lo:lo ~hi:hi t leaf_ord (if rc then rc_aln else aln))
let rslt = evaluator aln_leaves
- Some (rslt,rc,lo,hi)
+ `Res (rslt,rc,lo,hi)
with
| exn ->
(* problem evaluating an individual region within the
- alignment: complain, but proceed, as maybe some
- other region will succeed. *)
- printf "%s\texception\t%d\t%d%s\t%s\n"
- fn2id fn
- lo
- hi
- if Opt.get reading_frame = Six then (if rc then "\t-" else "\t+") else ""
- Printexc.to_string exn
+ alignment: register complaint, but don't die, as
+ maybe some other region will succeed. *)
+ let msg =
+ sprintf "%s\texception\t%d\t%d%s\t%s"
+ fn2id fn
+ lo
+ hi
+ if Opt.get reading_frame = Six then (if rc then "\t-" else "\t+") else ""
+ Printexc.to_string exn
if Opt.get debug then
- flush stdout
- eprintf "%s" (Printexc.get_backtrace ())
- flush stderr
- None
+ `Exn [| msg; Printexc.get_backtrace () |]
+ else
+ `Exn [| msg |]
- if Enum.is_empty rgns_scores then failwith "no regions successfully evaluated"
+ let rgns_scores =
+ rgns_scores |> List.filter_map
+ function
+ | `Res x -> Some x
+ | `Exn [| msg |] -> print_endline msg; flush stdout; None
+ | `Exn [| msg; bt |] ->
+ print_endline msg; flush stdout
+ prerr_endline bt; flush stderr
+ None
+ | _ -> assert false
+
+ if rgns_scores = [] then failwith "no regions successfully evaluated"
let report_score ty (rslt,rc,lo,hi) =
printf "%s\t%s\t%.4f" (fn2id fn) ty rslt.PhyloCSFModel.score
@@ -342,10 +371,9 @@ let process_alignment (nt,t,evaluator) fn =
foreach (List.enum rslt.PhyloCSFModel.diagnostics) (fun (k,v) -> printf " %s=%s" k v)
printf "\n"
- Enum.force rgns_scores
if Opt.get print_orfs then
- Enum.clone rgns_scores |> iter (report_score "orf_score(decibans)")
- reduce max rgns_scores |> report_score (if Opt.get orf_mode <> AsIs || Opt.get reading_frame <> One then "max_score(decibans)" else "score(decibans)")
+ rgns_scores |> List.iter (report_score "orf_score(decibans)")
+ List.reduce max rgns_scores |> report_score (if Opt.get orf_mode <> AsIs || Opt.get reading_frame <> One then "max_score(decibans)" else "score(decibans)")
with
| ((Assert_failure _) as exn) -> raise exn
(* move on to the next alignment: convergence problems, no ORFs found, etc. *)
View
@@ -12,6 +12,9 @@ let slow () = skip_if (try ignore (Sys.getenv "SKIP_SLOW"); true with Not_found
(* test results on the three bundled example alignments *)
let run_PhyloCSF species params =
+ let params =
+ if ForkMaybe.can_fork then params ^ " -p 8"
+ else params
let cmd = sprintf "%s %s %s" fn_PhyloCSF (Filename.concat dn_here ("../PhyloCSF_Parameters/" ^ species)) params
let phylocsf_in = Unix.open_process_in ~cleanup:true cmd
View
@@ -129,7 +129,7 @@ let mfa headers seqs =
Buffer.contents buf
let run_phylocsf aln =
- let cmd = sprintf "%s %s" fn_exe fp_params
+ let cmd = sprintf "%s %s%s" fn_exe fp_params (if ForkMaybe.can_fork then " -p 8" else "")
let phylocsf_in, phylocsf_out = Unix.open_process ~cleanup:true cmd
output_string phylocsf_out aln

No commit comments for this range