Permalink
...
Checking mergeability…
Don’t worry, you can still create the pull request.
Comparing changes
Open a pull request
- 4 commits
- 9 files changed
- 0 commit comments
- 1 contributor
Unified
Split
Showing
with
87 additions
and 31 deletions.
- +1 −0 .gitignore
- +1 −1 .travis-ci.sh
- +1 −1 .travis.yml
- +3 −0 src/ForkNo.ml
- +8 −0 src/ForkYes.ml
- +17 −4 src/Makefile
- +52 −24 src/PhyloCSF.ml
- +3 −0 src/test.ml
- +1 −1 src/testSim.ml
View
1
.gitignore
| @@ -2,3 +2,4 @@ PhyloCSF.Linux.x86_64 | ||
| PhyloCSF.Darwin.x86_64 | ||
| _build | ||
| *.native | ||
| +ForkMaybe.ml | ||
View
2
.travis-ci.sh
| @@ -1,4 +1,4 @@ | ||
| -OPAM_DEPENDS="batteries gsl ocaml+twt ounit should" | ||
| +OPAM_DEPENDS="batteries gsl ocaml+twt forkwork ounit should" | ||
| sudo add-apt-repository -y ppa:avsm | ||
| sudo apt-get update -qq | ||
View
2
.travis.yml
| @@ -3,4 +3,4 @@ script: bash -ex .travis-ci.sh | ||
| env: | ||
| matrix: | ||
| - SKIP_SLOW=1 | ||
| - - DUMMY=1 | ||
| + - FORKWORK=1 | ||
View
3
src/ForkNo.ml
| @@ -0,0 +1,3 @@ | ||
| +let can_fork = false | ||
| + | ||
| +let map ?procs f lst = List.map f lst |
View
8
src/ForkYes.ml
| @@ -0,0 +1,8 @@ | ||
| +open Batteries | ||
| + | ||
| +let can_fork = true | ||
| + | ||
| +let map ?(procs=1) f lst = | ||
| + if procs=1 || List.length lst = 1 then List.map f lst | ||
| + else | ||
| + ForkWork.map_list ~maxprocs:procs f lst |
View
21
src/Makefile
| @@ -1,12 +1,25 @@ | ||
| +OCAMLBUILDFLAGS= | ||
| +ifdef FORKWORK | ||
| +OCAMLBUILDFLAGS=-tag pkg_forkwork | ||
| +endif | ||
| + | ||
| all: | ||
| - ocamlbuild PhyloCSF.native | ||
| + rm -f ForkMaybe.ml | ||
| +ifdef FORKWORK | ||
| + ln -s ForkYes.ml ForkMaybe.ml | ||
| +else | ||
| + ln -s ForkNo.ml ForkMaybe.ml | ||
| +endif | ||
| + ocamlbuild $(OCAMLBUILDFLAGS) PhyloCSF.native | ||
| -test: all | ||
| - ocamlbuild testSim.native test.native | ||
| +test: testexe | ||
| ./test.native -verbose | ||
| +testexe: all | ||
| + ocamlbuild $(OCAMLBUILDFLAGS) testSim.native test.native | ||
| + | ||
| clean: | ||
| rm -f *~ | ||
| ocamlbuild -clean | ||
| -.PHONY: all test clean | ||
| +.PHONY: all test testexe clean |
View
76
src/PhyloCSF.ml
| @@ -39,6 +39,7 @@ let reading_frame = opt ~group ~l:"frames" ~s:'f' ~h:"how many reading frames to | ||
| let orf_mode = opt ~group ~l:"orf" ~h:"search for ORFs (default AsIs)" (Opt.value_option "AsIs|ATGStop|StopStop|StopStop3|ToFirstStop|FromLastStop|ToOrFromStop" (Some AsIs) (fun s -> match String.lowercase s with "asis" -> AsIs | "atgstop" -> ATGStop | "stopstop" -> StopStop | "stopstop3" -> StopStop3 | "tofirststop" -> ToFirstStop | "fromlaststop" -> FromLastStop | "toorfromstop" -> ToOrFromStop| x -> invalid_arg x) (fun _ s -> sprintf "invalid ORF search mode %s"s)) | ||
| let min_codons = opt ~group ~l:"minCodons" ~h:"minimum ORF length for searching over ORFs (default 25 codons)" (StdOpt.int_option ~default:25 ()) | ||
| let print_orfs = opt ~group ~l:"allScores" ~h:"report scores of all regions evaluated, not just the max" (StdOpt.store_true ()) | ||
| +let procs = opt ~group ~s:'p' ~h:"search frames/ORFs using up to p parallel subprocesses" (StdOpt.int_option ~default: 1 ()) | ||
| let group = OptParser.add_group opt_parser "output control" | ||
| let print_bls = opt ~group ~l:"bls" ~h:"include alignment branch length score (BLS) for the reported region in output" (StdOpt.store_true ()) | ||
| @@ -65,6 +66,16 @@ if Opt.get orf_mode <> AsIs && Opt.get reading_frame = One then | ||
| eprintf "Warning: --orf with --frames=1; are you sure you don't want to search for ORFs in three or six frames?\n" | ||
| flush stderr | ||
| +if Opt.get procs > 1 && not ForkMaybe.can_fork then | ||
| + eprintf "Warning: ignoring -p, recompile PhyloCSF with: make FORKWORK=1\n" | ||
| + flush stderr | ||
| + | ||
| +(* | ||
| +if Opt.get procs > 1 && Opt.get orf_mode = AsIs && Opt.get reading_frame = One then | ||
| + eprintf "Warning: -p isn't useful without --orf and/or --frames\n" | ||
| + flush stderr | ||
| +*) | ||
| + | ||
| if Opt.get debug then Printexc.record_backtrace true | ||
| (******************************************************************************) | ||
| @@ -178,7 +189,7 @@ let find_orfs ?(ofs=0) dna = | ||
| orfs := if firstorflo = ofs then [firstorf] else [] | ||
| orfs := if len - lastorfhi <= 3 && firstorf <> lastorf then lastorf :: !orfs else !orfs | ||
| - !orfs |> List.rev |> List.enum |> filter | ||
| + !orfs |> List.rev |> List.filter | ||
| fun (lo,hi) -> | ||
| assert ((hi-lo+1) mod 3 = 0) | ||
| assert ((lo-ofs) mod 3 = 0) | ||
| @@ -191,9 +202,9 @@ let candidate_regions dna rcdna = | ||
| (if Opt.get reading_frame <> One then | ||
| [ (false,1,hi); (false,2,hi) ] else []); | ||
| (if Opt.get reading_frame = Six then | ||
| - [ (true,0,hi); (true,1,hi); (true,2,hi) ] else []) ] |> List.flatten |> List.enum | ||
| + [ (true,0,hi); (true,1,hi); (true,2,hi) ] else []) ] |> List.flatten | ||
| else | ||
| - let aoeu a = map (fun (b,c) -> a,b,c) | ||
| + let aoeu a = List.map (fun (b,c) -> a,b,c) | ||
| let all_orfs = ref [aoeu false (find_orfs ~ofs:0 dna)] | ||
| if Opt.get reading_frame <> One then | ||
| all_orfs := aoeu false (find_orfs ~ofs:1 dna) :: !all_orfs | ||
| @@ -203,7 +214,7 @@ let candidate_regions dna rcdna = | ||
| all_orfs := aoeu true (find_orfs ~ofs:0 rcdna) :: !all_orfs | ||
| all_orfs := aoeu true (find_orfs ~ofs:1 rcdna) :: !all_orfs | ||
| all_orfs := aoeu true (find_orfs ~ofs:2 rcdna) :: !all_orfs | ||
| - concat (List.enum (List.rev !all_orfs)) | ||
| + List.rev !all_orfs |> List.flatten | ||
| let pleaves ?(lo=0) ?hi t leaf_ord aln = | ||
| let hi = match hi with Some x -> x | None -> String.length aln.(0) - 1 | ||
| @@ -288,38 +299,56 @@ let process_alignment (nt,t,evaluator) fn = | ||
| (* generate list of candidate regions within the alignment *) | ||
| let rgns = candidate_regions aln.(0) rc_aln.(0) | ||
| + | ||
| + let rgns = | ||
| + if Opt.get procs > 1 && ForkMaybe.can_fork then | ||
| + (* If parallelizing, sort regions longest-to-shortest | ||
| + in order to optimize utilization *) | ||
| + List.sort (fun (_,lo1,hi1) (_,lo2,hi2) -> (hi2-lo2) - (hi1-lo1)) rgns | ||
| + else rgns | ||
| try | ||
| - if Enum.is_empty rgns then | ||
| + if rgns = [] then | ||
| assert (Opt.get orf_mode <> AsIs) | ||
| failwith "no sufficiently long ORFs found" | ||
| - (* evaluate each candidate region *) | ||
| + (* evaluate each candidate region, perhaps in parallel *) | ||
| let rgns_scores = | ||
| - rgns |> Enum.filter_map | ||
| + rgns |> ForkMaybe.map ~procs:(Opt.get procs) | ||
| fun (rc,lo,hi) -> | ||
| try | ||
| let aln_leaves = Array.of_enum (pleaves ~lo:lo ~hi:hi t leaf_ord (if rc then rc_aln else aln)) | ||
| let rslt = evaluator aln_leaves | ||
| - Some (rslt,rc,lo,hi) | ||
| + `Res (rslt,rc,lo,hi) | ||
| with | ||
| | exn -> | ||
| (* problem evaluating an individual region within the | ||
| - alignment: complain, but proceed, as maybe some | ||
| - other region will succeed. *) | ||
| - printf "%s\texception\t%d\t%d%s\t%s\n" | ||
| - fn2id fn | ||
| - lo | ||
| - hi | ||
| - if Opt.get reading_frame = Six then (if rc then "\t-" else "\t+") else "" | ||
| - Printexc.to_string exn | ||
| + alignment: register complaint, but don't die, as | ||
| + maybe some other region will succeed. *) | ||
| + let msg = | ||
| + sprintf "%s\texception\t%d\t%d%s\t%s" | ||
| + fn2id fn | ||
| + lo | ||
| + hi | ||
| + if Opt.get reading_frame = Six then (if rc then "\t-" else "\t+") else "" | ||
| + Printexc.to_string exn | ||
| if Opt.get debug then | ||
| - flush stdout | ||
| - eprintf "%s" (Printexc.get_backtrace ()) | ||
| - flush stderr | ||
| - None | ||
| + `Exn [| msg; Printexc.get_backtrace () |] | ||
| + else | ||
| + `Exn [| msg |] | ||
| - if Enum.is_empty rgns_scores then failwith "no regions successfully evaluated" | ||
| + let rgns_scores = | ||
| + rgns_scores |> List.filter_map | ||
| + function | ||
| + | `Res x -> Some x | ||
| + | `Exn [| msg |] -> print_endline msg; flush stdout; None | ||
| + | `Exn [| msg; bt |] -> | ||
| + print_endline msg; flush stdout | ||
| + prerr_endline bt; flush stderr | ||
| + None | ||
| + | _ -> assert false | ||
| + | ||
| + if rgns_scores = [] then failwith "no regions successfully evaluated" | ||
| let report_score ty (rslt,rc,lo,hi) = | ||
| printf "%s\t%s\t%.4f" (fn2id fn) ty rslt.PhyloCSFModel.score | ||
| @@ -342,10 +371,9 @@ let process_alignment (nt,t,evaluator) fn = | ||
| foreach (List.enum rslt.PhyloCSFModel.diagnostics) (fun (k,v) -> printf " %s=%s" k v) | ||
| printf "\n" | ||
| - Enum.force rgns_scores | ||
| if Opt.get print_orfs then | ||
| - Enum.clone rgns_scores |> iter (report_score "orf_score(decibans)") | ||
| - reduce max rgns_scores |> report_score (if Opt.get orf_mode <> AsIs || Opt.get reading_frame <> One then "max_score(decibans)" else "score(decibans)") | ||
| + rgns_scores |> List.iter (report_score "orf_score(decibans)") | ||
| + List.reduce max rgns_scores |> report_score (if Opt.get orf_mode <> AsIs || Opt.get reading_frame <> One then "max_score(decibans)" else "score(decibans)") | ||
| with | ||
| | ((Assert_failure _) as exn) -> raise exn | ||
| (* move on to the next alignment: convergence problems, no ORFs found, etc. *) | ||
View
3
src/test.ml
| @@ -12,6 +12,9 @@ let slow () = skip_if (try ignore (Sys.getenv "SKIP_SLOW"); true with Not_found | ||
| (* test results on the three bundled example alignments *) | ||
| let run_PhyloCSF species params = | ||
| + let params = | ||
| + if ForkMaybe.can_fork then params ^ " -p 8" | ||
| + else params | ||
| let cmd = sprintf "%s %s %s" fn_PhyloCSF (Filename.concat dn_here ("../PhyloCSF_Parameters/" ^ species)) params | ||
| let phylocsf_in = Unix.open_process_in ~cleanup:true cmd | ||
View
2
src/testSim.ml
| @@ -129,7 +129,7 @@ let mfa headers seqs = | ||
| Buffer.contents buf | ||
| let run_phylocsf aln = | ||
| - let cmd = sprintf "%s %s" fn_exe fp_params | ||
| + let cmd = sprintf "%s %s%s" fn_exe fp_params (if ForkMaybe.can_fork then " -p 8" else "") | ||
| let phylocsf_in, phylocsf_out = Unix.open_process ~cleanup:true cmd | ||
| output_string phylocsf_out aln | ||