-
Notifications
You must be signed in to change notification settings - Fork 0
/
fdsel_lib.ml
1614 lines (1427 loc) · 64.5 KB
/
fdsel_lib.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
let fold = Mu.fold
let map = Mu.map
let iter = Mu.iter
let sumf = Mu.sumf
let cons = Mu.cons
let map2 = Mu.map2
let revh tl h = fold cons tl h
let rev l = revh [] l
let fl = float_of_int
let lnfact n = Gsl.Sf.lnfact n
let norm i z = fl i /. fl z
let normf i z = fl i /. z
let floori x = truncate x
let ceili x = truncate (ceil x)
let atol = Array.to_list
let aofl = Array.of_list
let strcat = String.concat
let strf = string_of_float
let absf = abs_float
let is_nan x = classify_float x = FP_nan
let eq_sels_a xs ys =
let kons a b acc = acc && (compare a b = 0) in
Mu.fold2 kons true (atol xs) (atol ys)
let sprint_lof lof =
Printf.sprintf "[%s]"
(fold (fun sel st -> Printf.sprintf "%s;%0.3f" st sel)
(Printf.sprintf "%0.3f" (List.hd lof)) (List.tl lof))
let sprint_mof mof =
Printf.sprintf "[%s]"
(fold (fun lof st ->
let rowst = (fold (fun cell st -> Printf.sprintf "%s;% 18.9f" st cell)
(Printf.sprintf "% 18.9f" (List.hd (atol lof))) (List.tl (atol lof)))
in Printf.sprintf "%s%s;\n " st rowst)
"" (atol mof))
module M = MuMap.Make(struct type t = int let compare = compare end)
module Mp = MuMapP
let sterr n q = 1.96 *. sqrt (q *. (1. -. q) /. fl n)
(* Execute copies of f in nprocs parallel processes and pkons the results
of each onto pknil. Once the result of pkons is finishedp, continue
pkonsing results, but don't spawn any new batches. f must return an
integer. *)
let unix_proc_fold nprocs f pkons pknil finishedp =
let procs = Array.make nprocs
(None : (int * in_channel * Unix.file_descr * Unix.file_descr) option) in
let open_proc () =
let inde, outde = Unix.pipe () in match Unix.fork () with
0 -> let result = f () in
(output_binary_int (Unix.out_channel_of_descr outde) result; exit 0)
| pid -> (pid, Unix.in_channel_of_descr inde, inde, outde) in
let close_proc _ ch fd1 fd2 =
let result = input_binary_int ch in
close_in ch; Unix.close fd2 ; result in
let rec proc_fold pknil =
(* If array is filled with none and pknil is finished, we're done *)
if Array.fold_right (function None -> (&&) true | _ -> (&&) false)
procs (finishedp pknil) then pknil else
let new_proc pkn = if finishedp pkn then None else Some (open_proc ()) in
let kons procn pknil =
match procs.(procn) with
None -> (procs.(procn) <- new_proc pknil ; pknil)
| Some (pid, inch, infd, outfd) -> (
match Unix.waitpid [Unix.WNOHANG] pid with
(0, _) -> (ignore (Unix.select [] [] [] 0.05) ; pknil)
| _ -> let pknil = pkons (close_proc pid inch infd outfd) pknil in
(procs.(procn) <- new_proc pknil ; pknil)) in
let pknil = fold kons pknil (Mu.range 0 (nprocs - 1)) in
proc_fold pknil in
proc_fold pknil
(* Execute boolean func in batches of batch on nprocs up to max times until
errt is reached on the fraction of time func returns true. Returns (total,
ntrue) of runs. batch should be high enough that running batch number of
funcs takes a at least a few seconds to run. errt is the proportional size
of the 95% CI. *)
let dogged_monte_carlop label nprocs batch max errt func =
let finishedp errt (n, ntrue) = if n > max then true else
let q = fl ntrue /. fl n in
(n > 0 && ((sterr n q < errt && ntrue < n && sterr n q < errt *. q)
|| n >= max)) in
let batchf () = Mu.rec_n batch (fun ct -> if func () then ct+1 else ct) 0 in
let upf_pkons ntrue (tot, old_ntrue) =
Printf.printf "UPD-%s\t%d\t%d\n%!" label (tot + batch) (old_ntrue + ntrue);
(tot + batch, old_ntrue + ntrue) in
unix_proc_fold nprocs batchf upf_pkons (0, 0) (finishedp errt)
(* {{{ *)
(* Updates datastructure:
((from, to) :: rest) :: generations)
Non-aforeseen types must be registered (0, n). *)
type upd_list = (int * int) list list
(* Note: all three pop_sizes functions differ and give different results.
However if all types enter and exit the population through zeros (i.e.,
transitions (0,n) and (n,0) are registered for every type, then the
following are all equal:
pop_sizes_ts ts
= (hd (pop_sizes_uds uds)) :: pop_sizes_uds_prime uds
= pop_sizes_uds_prime uds @ [hd (rev (pop_sizes_uds_prime uds))]
= rev (hd (rev (pop_sizes_uds_prime uds)) :: rev (pop_sizes_uds uds))
*)
let pop_size_ts tsgen = fold (fun (_, ct) ps -> ct + ps) 0 tsgen
let pop_sizes_ts timeseries = Mu.map (fun (g,p) -> pop_size_ts p) timeseries
let pop_size ud = fold (fun (ff, _) ss -> ff + ss) 0 ud
let pop_sizes_uds updates = Mu.map pop_size updates
let pop_size_prime ud = fold (fun (_, tt) ss -> tt + ss) 0 ud
let pop_sizes_uds_prime updates = Mu.map pop_size_prime updates
let pop_size_a aud = fold (fun (ty, (ff, _)) ss -> ff + ss) 0 aud
let pop_sizes_auds annups = Mu.map (fun x -> pop_size_a (snd x)) annups
let pop_size_prime_a aud = fold (fun (ty, (_, tt)) ss -> tt + ss) 0 aud
let pop_sizes_auds_prime annups =
Mu.map (fun x -> pop_size_prime_a (snd x)) annups
(* Note that print* and fprint* differ in their interpretation of tag *)
let fprint_pop_sizes ouch pre pss =
Printf.fprintf ouch "%scount\n%!" pre ;
iter (fun n -> Printf.fprintf ouch "%s%d\n%!" pre n) pss
let print_pop_sizes tag pss = fprint_pop_sizes stdout (tag^"\t") pss
let fprint_pop_sizes_gtc ouch pre gtc =
let kons (gen,_,cou) mp = Mp.addf gen ((+) cou) 0 mp in
let mp = fold kons Mp.empty gtc in
Printf.fprintf ouch "%sgen\tcount\n%!" pre ;
iter (fun (gen, count) ->
Printf.fprintf ouch "%s%d\t%d\n%!" pre gen count) (Mp.bindings mp)
let fprint_pop_sizes_ts ouch pre timeseries =
Printf.fprintf ouch "%sgen\tcount\n%!" pre ;
iter (fun (gen, pop) ->
let count = pop_size_ts pop in
Printf.fprintf ouch "%s%d\t%d\n%!" pre gen count) timeseries
let initial_pop updates =
fold (fun (frc, toc) (maxtyp, pop) ->
if frc == 0 then (maxtyp, pop)
else (maxtyp + 1, (maxtyp + 1, frc) :: pop))
(0, []) (List.hd updates)
(* update_kons will cons (::) updates onto the updates datastructure (acc)
unless the type is not found in one or the other population. If either
population doesn't contain the type, it will either lnfkons (0, ct) or
tnfkons (ct, 0) onto the update, depending on whether the type was absent in
this or the previous generation. *)
let update_kons_nf lnfkons tnfkons this_pop (last_pop, acc) =
let kvm = fold (fun (k, v) m ->
BatMap.add k v m) BatMap.empty this_pop in
let last_kvm = fold (fun (k, v) m ->
BatMap.add k v m) BatMap.empty last_pop in
let kons (ty,last_ct) tl =
try let this_ct = BatMap.find ty kvm in (last_ct, this_ct) :: tl
with Not_found -> tnfkons (last_ct, 0) tl in
let nonmut_ups = fold kons [] last_pop in
let kons (ty,this_ct) tl =
try ignore (BatMap.find ty last_kvm); tl
with Not_found -> lnfkons (0, this_ct) tl in
(this_pop, (fold kons nonmut_ups this_pop :: acc))
let annupdate_kons_nf lnfkons tnfkons (tgen, tpop) ((lgen, lpop), acc) =
let tmap = Mp.of_assoc tpop in
let lmap = Mp.of_assoc lpop in
let kons (ty,lct) tl =
try let tct = Mp.find ty tmap in (ty, (lct, tct)) :: tl
with Not_found -> tnfkons (ty, (lct, 0)) tl in
let nonmut_ups = fold kons [] lpop in
let kons (ty,tct) tl =
try ignore (Mp.find ty lmap); tl
with Not_found -> lnfkons (ty, (0, tct)) tl in
((tgen, tpop), ((lgen, fold kons nonmut_ups tpop) :: acc))
(* impute missing type counts and borrow counts from the wildtype count *)
let annupdate_kons_wt nfct wtt (tgen, tpop) ((lgen, lpop), acc) =
let tmap = Mp.of_assoc tpop in
let lmap = Mp.of_assoc lpop in
let wtc = try Mp.find wtt lmap with Not_found -> 0 in
let wtpc = try Mp.find wtt tmap with Not_found -> 0 in
let typecountkons (ty, lct) (wtpc, tl) =
if ty = wtt then (wtpc, tl) (* don't write update for wt yet *)
else try let tct = Mp.find ty tmap in
(wtpc, (ty, (lct, tct)) :: tl) (* ordinary case *)
with Not_found -> (* borrow imputed types from wtp count *)
(wtpc - nfct, (ty, (lct, nfct)) :: tl) in
let (wtpc, nonmut_ups) = fold typecountkons (wtpc, []) lpop in
let kons (ty, tct) tl = try ignore (Mp.find ty lmap); tl
with Not_found -> Mu.cons (ty, (0, tct)) tl in
((tgen, tpop),
((lgen, (wtt, (wtc, wtpc)) :: fold kons nonmut_ups tpop) :: acc))
let aggregate_wt wtty indty annuds =
map (fun (gen, ups) ->
let ps = pop_size_a ups in
let (wtf, wtt, acc) = fold (fun (ty, (frc, toc)) (wtf, wtt, acc) ->
if indty ps (ty, (frc, toc)) == 0 then (wtf + frc, wtt + toc, acc)
else (wtf, wtt, (ty, (frc, toc)) :: acc)) (0, 0, []) ups in
(gen, (wtty, (wtf, wtt)) :: rev acc)) annuds
let annupdates_ub nfct wtt ts =
rev (snd
(fold (annupdate_kons_wt nfct wtt) (List.hd ts, []) (List.tl ts)))
let annupdate_kons a b = annupdate_kons_nf Mu.cons Mu.cons a b
let bare_updates annup =
map (map snd) (map snd annup)
let update_kons a b = update_kons_nf Mu.cons Mu.cons a b
let update_data data =
rev (snd (fold update_kons ((List.hd data), []) (List.tl data)))
let update_data_k knil data =
snd (fold update_kons ((List.hd data), knil) (List.tl data))
(* Transitions from zero are not incorporated *)
let update_data_nomut data =
rev (snd (fold (update_kons_nf (fun a b -> b) Mu.cons)
((List.hd data), []) (List.tl data)))
(* Transitions to or from zero are not incorporated *)
let update_data_nozero data =
rev (snd (fold (update_kons_nf (fun a b -> b) (fun a b -> b))
((List.hd data), []) (List.tl data)))
(* See equivalent updates functions above *)
let annupdate_data_nf lnfkons tnfkons timeseries =
rev (snd (fold (annupdate_kons_nf lnfkons tnfkons)
((List.hd timeseries), []) (List.tl timeseries)))
let idk a b = b (* identity kons means ignore the transition *)
let annupdate_data_nomut ts = annupdate_data_nf idk Mu.cons ts
let annupdate_data_nozero ts = annupdate_data_nf idk idk ts
let annupdate_data ts = annupdate_data_nf Mu.cons Mu.cons ts
type 'a pop = ('a * int) list
type idpop = (int * (int pop)) (* maxtype x pop *)
let monomorphic_idpop n = (1, [(1,n)])
let idpop_of_counts maxty cts =
let (mt, pop) = fold (fun ct (mt, tl) -> if ct = 0 then (mt, tl)
else (mt + 1, (mt + 1, ct) :: tl)) (maxty, []) cts in
(mt, rev pop)
let idpop_of_counts_string maxty cts =
let is = int_of_string and si = string_of_int in
let (mt, pop) = fold (fun ct (mt, tl) -> if ct = 0 then (mt, tl)
else (si (is mt + 1), (si (is mt + 1), ct) :: tl)) (maxty, []) cts in
(mt, rev pop)
(* converts a list of (type : string, count : int) to idpop, attempting to
convert type strings to integers. If the conversion fails for any string,
strings are considered opaque and will be numbered sequentially as in
idpop_of_counts. *)
let idpop_of_pop tcs =
let kons (ty, ct) (maxty, pop) =
let tyi = int_of_string ty in
(max maxty tyi, (tyi, ct) :: pop) in
try let (mt, pop) = fold kons (0, []) tcs in (mt, rev pop)
with Failure _ -> idpop_of_counts 0 (map snd tcs)
let pop_of_idpop = snd
let avewv v1 v2 w1 w2 = map2
(fun x1 x2 -> (w1 *. x1 +. w2 *. x2) /. (w1 +. w2)) v1 v2
let wright_fisher_exp mu wf pop =
let pimut = mu in
let ps = pop_size_ts pop in
let wis = map (fun (_, ct) -> wf (norm ct ps) *. norm ct ps) pop in
let sum_wi = sumf wis in
let piis = map (fun w -> (w /. sum_wi) *. (1. -. pimut)) wis in
pimut :: piis
let wright_fisher_exp_ty mu wfty pop =
let pimut = mu in
let ps = pop_size_ts pop in
let wis = map (fun (ty, ct) -> wfty ps (ty, (ct, 0)) *. norm ct ps) pop in
let sum_wi = sumf wis in
let piis = map (fun w -> (w /. sum_wi) *. (1. -. pimut)) wis in
pimut :: piis
let neutral_wf ps (maxty, pop) =
let piis = let ps = pop_size_ts pop in
aofl (map (fun (_, ct) -> norm ct ps) pop) in
let xps = atol (Gsl.Randist.multinomial Rand.rng ~n:ps ~p:piis) in
(maxty, rev (Mu.fold2 (fun (ty,_) ct pop -> (ty, ct)::pop) [] pop xps))
let wright_fisher_ time_rescaling lambda mu wf ps (maxty, pop) =
let mult_piis = aofl (wright_fisher_exp mu wf pop) in
let xps = atol (Gsl.Randist.multinomial Rand.rng ~n:ps ~p:mult_piis) in
let mut_tot, nm_counts = List.hd xps, List.tl xps in
let mut_ntypes = if mut_tot > 1
then 1 + (Gsl.Randist.binomial Rand.rng ~p:lambda ~n:(mut_tot - 1))
else mut_tot in
let mut_counts = Array.map ((+) 1)
(Gsl.Randist.multinomial Rand.rng ~n:(mut_tot - mut_ntypes)
~p:(Array.make mut_ntypes (1. /. fl mut_ntypes))) in
let maxty, mut_pop = idpop_of_counts maxty (atol mut_counts) in
let result = (maxty,
Mu.fold2 (fun (ty,_) ct pop -> (ty,ct)::pop) mut_pop pop nm_counts) in
Mu.rec_n (time_rescaling - 1) (neutral_wf ps) result
let wright_fisher lambda mu wf ps (maxty, pop) =
wright_fisher_ 1 lambda mu wf ps (maxty, pop)
(* The fitness of a type is determined by it's position in the series:
incrementing type id increments fitness by ss *)
let noveltybias_wf time_rescaling mu ss next_ps (maxty, pop) =
let omit_zeros pop = fold
(fun (ty, ct) pop -> if ct > 0 then (ty, ct) :: pop else pop) [] pop in
let this_ps = pop_size_ts pop and pimut = mu in
let minty = fold (fun (ty, ct) mn -> min ty mn) maxty pop in
let wis = map (fun (ty, ct) -> exp (fl (ty - minty) *. ss)
*. norm ct this_ps) pop in
let sum_wi = Mu.sumf wis in
let piis = map (fun w -> (w /. sum_wi) *. (1. -. pimut)) wis in
let mult_piis = aofl (pimut :: piis) in
let xps = atol (Gsl.Randist.multinomial Rand.rng ~n:next_ps ~p:mult_piis) in
let mut_tot, nm_counts = List.hd xps, List.tl xps in
let maxty, mut_pop = idpop_of_counts maxty (Mu.repeat mut_tot 1) in
let result = (maxty, omit_zeros
(Mu.fold2 (fun (ty,_) ct pop -> (ty,ct)::pop) mut_pop pop nm_counts)) in
Mu.rec_n (time_rescaling - 1) (neutral_wf next_ps) result
(* As above, but simulate "in the weak mutation limit":
only two types at once are allowed. Another type is introduced exactly as
the previous type fixes, skipping the infinite generations of monomorphy.
Naturally mu is assumed to be infinitesimal. *)
let noveltybias_wm_wf ss next_ps (maxty, pop) = match pop with
[(ty,_)] -> (maxty + 1, [(maxty+1, 1); (ty, next_ps-1)])
| _ -> (
let omit_zeros pop = fold
(fun (ty, ct) pop -> if ct > 0 then (ty, ct) :: pop else pop) [] pop in
let this_ps = pop_size_ts pop in
let minty = fold (fun (ty, ct) mn -> min ty mn) maxty pop in
let wis = map (fun (ty, ct) -> exp (fl (ty - minty) *. ss)
*. norm ct this_ps) pop in
let sum_wi = Mu.sumf wis in
let piis = map (fun w -> (w /. sum_wi)) wis in
let mult_piis = aofl piis in
let counts = atol (Gsl.Randist.multinomial Rand.rng ~n:next_ps ~p:mult_piis) in
(maxty, omit_zeros
(Mu.fold2 (fun (ty,_) ct pop -> (ty,ct)::pop) [] pop counts)))
(* returns (initps, finps, [initfreq], [obsfreq], [expfreq], [neut_expfreq]) *)
let residuals mu wf update =
let (mutc, ctst, nmctstp1, pst, pstp1) =
fold (fun (t1p, t2p) (mc, ctst, ctstp1, pst, pstp1) -> match t1p with
0->(mc+t2p, ctst, ctstp1, pst, pstp1+t2p)
| t1p->(mc, t1p::ctst, t2p::ctstp1, pst+t1p, pstp1+t2p))
(0, [], [], 0, 0) update in
let (pimut, piis) =
let ll = wright_fisher_exp mu wf (snd (idpop_of_counts 0 ctst)) in
(List.hd ll, List.tl ll) in
(pst, pstp1,
(0. :: rev (map (fun ct -> norm ct pst) ctst)),
(norm mutc pstp1 :: rev (map (fun ct -> norm ct pstp1) nmctstp1)),
(pimut :: rev piis),
(pimut :: rev (map (fun ct -> norm ct pst *. (1. -. pimut)) ctst)))
let obsexps mu wf updates =
Mu.catmap (fun ud ->
let (_, _, _, obss, exps, _) = residuals mu wf ud in
Mu.zip obss exps) updates
let obsexps_of_lhdress ress =
fold (fun (gen, psp, types, counts, piis) acc ->
Mu.fold2 (fun ct pi acc ->
(norm ct psp, pi) :: acc) acc counts piis) [] ress
let ts_residuals mu wf mutty ts =
let tcs_to_kv tcs =
fold (fun (ty,ct) mp -> Mp.add ty ct mp) Mp.empty tcs in
let add_gen (gen, tcs) ((lastgen, lasttcs), ts_rs) =
let lookupk (ty, ct) (kv, (mutc, tyups)) =
(Mp.add ty ct kv,
try let lastct = Mp.find ty lasttcs in
(mutc, ((ty, (lastct, ct)) :: tyups))
with Not_found -> (mutc + ct, tyups)) in
let (kv, (mutc, tyups)) = fold lookupk (Mp.empty, (0, [])) tcs in
let (initps, finps, ifs, osfs, expfs, nexpfs) =
residuals mu wf ((0, mutc) :: map snd tyups) in
let nonmut_ty (ty, (fct, tct)) = if fct <> 0 then Some ty else None in
((gen, kv),
(lastgen, initps, finps,
(mutty :: Mu.filtmap nonmut_ty tyups), ifs, osfs, expfs, nexpfs)
:: ts_rs) in
let (gen, tcs) = List.hd ts in
rev (snd (fold add_gen ((gen, tcs_to_kv tcs),[]) (List.tl ts)))
let ts_to_uds_residuals res =
map (fun (lastgen, initps, finps, tys, ifs, osfs, expfs, nexpfs) ->
(initps, finps, ifs, osfs, expfs, nexpfs)) res
let ne_estimate obsexps =
let ress = aofl (Mu.filtmap (fun (obsf, expf) -> match expf with
0. -> if expf == 0. then Some 0. else None
| 1. -> if expf == 1. then Some 0. else None
| ee -> Some ((obsf -. ee) /. sqrt(ee *. (1. -. ee))))
obsexps) in
let var = Gsl.Stats.variance ress in
let nn = Array.length ress in
(1. /. var,
let x = 2. *. var ** 2. /. fl (nn - 1) in
x /. var ** 4.)
let ne_timeseries residuals =
let resss = map (fun (_,_,_,obs,exp,_) ->
Mu.filtmap2 (fun obsf expf -> match expf with
0. -> if expf == 0. then Some 0. else None
| 1. -> if expf == 1. then Some 0. else None
| ee -> Some ((obsf -. ee) /. sqrt(ee *. (1. -. ee))))
obs exp) residuals in
map (fun ress ->
let aress = aofl ress in
let var = Gsl.Stats.variance aress in
let nn = Array.length aress in
(1. /. var,
let x = 2. *. var ** 2. /. fl (nn - 1) in
x /. var ** 4.)) resss
let w_pwc indf sels freq = let ind = indf freq in exp sels.(ind)
let w_pwcty indty sels nn ud = let ind = indty nn ud in exp sels.(ind)
let w_neutral freq = 1.
let wty_neutral _ _ = 1.
let findty indf nnt (ty, (frc, toc)) = indf (norm frc nnt)
let equal_bins_ind minfreq len freq =
if freq <= 0. || freq > 1.0 then failwith "Frequency outside range"
else truncate (ceil (freq *. fl len)) - 1
let equal_binning nbins minfreq =
let mf = Mu.opt_def 1. minfreq in
(equal_bins_ind mf nbins,
map (fun i -> mf +. (1. -. mf) *. norm i nbins) (Mu.range 1 (nbins - 1)))
(* Construct some b1 ... bN such that (-inf, b1], (b1, b2], ..., (bN, inf] each
contain roughly the same number of frequencies in the timeseries. The b1
... bN are unique. If all frequencies in the data are unique, each bin will
contain at most one more value than any other bin. However, if there are
redundant values in the data, the bin contents may differ.
Ignore frequencies less than minfreq when computing quantiles (hence lower
bin bounsary is guaranteed to be greater than minfreq). *)
let quantile_breaks wtt nbins minfreq annuds =
if nbins < 2 then failwith "It's not possible to have less than 2 bins." ;
let minfreq = Mu.opt_def 0. minfreq in
let data = (* Convert updates to source frequencies *)
Mu.catmap (fun (_, uds) ->
let tot = pop_size_a uds in
Mu.catmap (fun (ty, (frc, toc)) -> let ff = norm frc tot in
if ff >= minfreq && Some ty <> wtt then [ff] else []) uds) annuds in
let adata = aofl data in
Array.sort compare adata ; (* sort the source frequencies *)
let len = Array.length adata in
(* Don't put in two of the same quantile *)
let kons ind ((last, skipped), res) =
let rec next skp n =
if n = len then failwith (Printf.sprintf
"There are too many duplicate frequencies for %n quantile bins."
nbins) ;
if last >= adata.(n)
then next (skp + 1) (n + 1)
else ((adata.(n), skp), adata.(n) :: res) in
next skipped (skipped + ind * (len - 1 - skipped) / nbins) in
(* skip initial zeros *)
let nzeros = let rec ct n = if adata.(n) = 0. then ct (n+1) else n in ct 0 in
let (_, breaks) = fold kons ((0., nzeros), [])
(Mu.range 1 (nbins - 1)) in
rev breaks
let print_data_quantiles tag quantiles =
Printf.printf "%s\tquantile\n%!" tag ;
fold (fun x () -> Printf.printf "%s\t%F\n%!" tag x) () quantiles
let fprint_breaks ouch breaks =
iter (fun x -> Printf.fprintf ouch "%F\n%!" x) breaks
let fprint_nes ouch pre nes =
Printf.fprintf ouch "%sne\tvar\tlci\tuci\n%!" pre ;
iter (fun (ne, ne_var) ->
let neciw = 1.96 *. sqrt (ne_var) in
Printf.fprintf ouch "%s%f\t%f\t%f\t%f\n%!" pre
ne ne_var (ne-.neciw) (ne+.neciw)) nes
(* Note that there should be one less break than bins because the first first
and last last bin boundaries are assumed to be at +- inf. Thus output
ranges from 0 ... number of breaks, that is, there are nbreaks + 1 = nbins
possible output values *)
let explicit_indf breaks x =
let breaks = aofl breaks in
let seek x n = if x <= breaks.(n) then n else (n + 1) in
Mu.rec_n (Array.length breaks) (seek x) 0
let quantile_binning nbins wtt minfreq annuds =
let breaks = if nbins > 1
then quantile_breaks wtt nbins minfreq annuds else [] in
(explicit_indf breaks, breaks)
let log_bins nbins min_freq max_freq =
let beta = exp ((log max_freq -. log min_freq) /. fl nbins) in
let boundaries = if nbins <= 1 then [] else
Mu.rec_n (nbins - 2)
(fun li -> match li with hd :: tl -> hd /. beta :: hd :: tl
| [] -> failwith "Can't happen") [max_freq /. beta] in
(explicit_indf boundaries, boundaries)
let parse_indty_args nbins indf wtty minfreq = match (wtty, minfreq) with
(Some wtt, Some minf) -> (nbins + 1,
(fun nnt (ty, (frc, _)) -> let ff = norm frc nnt in
if ty = wtt || ff < minf then 0 else indf ff + 1))
| (Some wtt, None) -> (nbins + 1,
(fun nnt (ty, (frc, _)) -> let ff = norm frc nnt in
if ty = wtt then 0 else indf ff + 1))
| (None, Some minf) -> (nbins,
(fun nnt (ty, (frc, _)) -> let ff = norm frc nnt in
if ff < minf then failwith
(Printf.sprintf "No idea how to bin %f = %d/%d with -f." ff frc nnt)
else indf ff))
| (None, None) -> (nbins,
(fun nnt (ty, (frc, _)) -> indf (norm frc nnt)))
let min_max_freq updates =
let mk cc (mn, mx) ps =
((if cc > 0 then min cc mn else mn), max cc mx, ps + cc) in
fold (fun us (mmn, mmx) ->
let (mn, mx, ps) = fold (fun (cc,_) (mn, mx, ps) -> mk cc (mn, mx) ps)
(max_int, 0, 0) us in
(min mmn (norm mn ps), max mmx (norm mx ps)))
(let (mn, mx, ps) = fold (fun (cc,_) (mn, mx, ps) -> mk cc (mn, mx) ps)
(max_int, 0, 0) (List.hd updates) in
(norm mn ps, norm mx ps))
(List.tl updates)
let min_max_freq_nowt wtt annupdates =
fold (fun (_,uds) (mn, mx) ->
let tot = pop_size_a uds in
fold (fun (ty, (frc, toc)) (mn, mx) -> let ff = norm frc tot in
if ff > 0. && wtt <> Some ty then (min ff mn, max ff mx) else (mn, mx))
(mn, mx) uds) (infinity, neg_infinity) annupdates
let log_binning nbins wtt mfparam updates =
let (min_freq, max_freq) = min_max_freq_nowt wtt updates in
log_bins nbins (max (Mu.opt_def 0. mfparam) min_freq) max_freq
(* }}} *)
(* merge_totals pop dist records the marginalized counts
of the kth most abundant types *)
let merge_totals pop dist =
let empty = M.empty in
let insert p map = M.addf p ((+) 1) 0 map in
let rec merge acc = function
[], d -> revh d acc
| (_,p) :: ptl, [] -> merge (insert p empty :: acc) (ptl, [])
| (_,p) :: ptl, d ::dtl -> merge (insert p d :: acc) (ptl, dtl)
in merge [] (pop, dist)
(* frequency-based version with n frequency bins *)
let merge_totals_divn n pop dist =
let tot = Mu.sum (map snd pop) in
merge_totals (map (fun (a,b) -> (a, (b*2*n + tot)/(2*tot))) pop) dist
let rank_freq_dist_per_n n timeseries =
fold (merge_totals_divn n) [] timeseries
(* returns [(binid, prob);...] where prob is the proportion of observations
with frequency in the interval [0,1/nbins),
[binid/nbins, (binid + 1)/nbins), ..., [(nbins - 1)/nbins, 1] *)
let freq_dist nbins timeseries =
let insert p map = M.addf p ((+) 1) 0 map in
let (total, bins) = fold (fun pop (total, bins) ->
let ps = pop_size_ts pop in
let bin ct = if ct == ps then nbins - 1 else
(ct * nbins) / ps in
fold (fun (_, ct) (total, bins) ->
(total + 1, insert (bin ct) bins)) (total, bins) pop)
(0, M.empty) timeseries in
let (min, _) = M.min_binding bins in
let (max, _) = M.max_binding bins in
map (fun id -> (id, norm (try M.find id bins with Not_found -> 0) total))
(Mu.range min max)
let pop_to_freq timeseries =
map (fun pop ->
let ps = pop_size_ts pop in
map (fun (ty, ct) -> (ty, norm ct ps)) pop) timeseries
let fold_timeseries kons knil timeseries =
fold (fun pop kn -> fold kons kn pop) knil timeseries
let freq_dist_log nbins timeseries =
let freqs = pop_to_freq timeseries in
let (min_freq, max_freq) = let init = snd (List.hd (List.hd freqs)) in
fold_timeseries (fun y -> Mu.min_max_kons (snd y))
(init, init) freqs in
let insert p map = M.addf p ((+) 1) 0 map in
let (total, bins) = fold (fun pop (total, bins) ->
let bin freq = if freq == max_freq then nbins - 1 else
int_of_float ((log freq -. log min_freq) *. fl nbins /.
(log max_freq -. log min_freq)) in
fold (fun (_, freq) (total, bins) -> if freq == 0. then (total, bins)
else (total + 1, insert (bin freq) bins)) (total, bins) pop)
(0, M.empty) freqs in
let (min, _) = M.min_binding bins in
let (max, _) = M.max_binding bins in
map (fun id ->
(exp (norm id nbins *. (log max_freq -. log min_freq) +. log min_freq),
exp (norm (id+1) nbins*.(log max_freq-. log min_freq) +. log min_freq),
norm (try M.find id bins with Not_found -> 0) total))
(Mu.range min max)
let clean l = (* TODO: use a sort that behaves well on mostly-sorted lists *)
(* sort based on population size, largest first *)
let cmp (a,b) (c,d) = compare (d,c) (b,a) in
List.sort cmp l
let idclean (a, l) = (a, List.filter (fun (a, b) -> b <> 0) (clean l))
(* demography is either (popsize, ngens) or a list of popsizes *)
type demography = Fixed of (int * int) | Variable of int list
(* simulates n generations of models of the form (pop -> pop) where a pop is a
(type * count) list. *)
let simulate ?seed:(seed=None) ?demog:(demog=Fixed (1000, 10)) agg agg_knil
model idpop =
ignore (Mu.opt_fmap Rand.init seed) ;
let update ps (idpop, aggk) =
let new_idpop = idclean (model ps idpop) in
let new_agg = agg (pop_of_idpop new_idpop) aggk in
(new_idpop, new_agg) in
match demog with
Fixed (popsize, ngens) ->
snd (Mu.rec_n ngens (update popsize) (idpop, agg_knil))
| Variable pss -> snd (fold update (idpop, agg_knil) pss)
let agg_none x y = x
let agg_none_knil = []
type 'a agg_t = {
pop : int * (('a * int) list) ;
ups : (int * (int * (int * int)) list) list ;
dist : (int M.t) list ;
pops : (int * ('a * int) list) list ; (* [(generation, [(type,count)])] *)
nupd : int }
let agg_all_print print sampling_interval pop agg =
if (agg.nupd + 1) mod sampling_interval <> 0 then {
agg with nupd = agg.nupd + 1 } else
let this_pop = agg.nupd + 1, pop in
print this_pop ;
{ pop = this_pop ;
ups = snd (annupdate_kons this_pop (agg.pop, agg.ups)) ;
dist = merge_totals pop agg.dist ;
pops = this_pop :: agg.pops ;
nupd = agg.nupd + 1 }
let agg_all sampling_interval pop agg =
agg_all_print (fun _ -> ()) sampling_interval pop agg
let agg_all_knil pop = {pop=(0,pop); ups=[]; dist=[]; pops=[(0,pop)]; nupd=0}
let fold_updates kons knil updates =
fold (fun ups kn -> fold kons kn ups) knil updates
let fold_annupdates kons knil updates =
fold (fun (gen, ups) kn -> fold (kons gen) kn ups) knil updates
(* Pretty-printers {{{ *)
(* 'dist' throughout refers to the rank-abundance distribution. *)
let print_dist n dist =
let rec pd listpos = function
[] -> ()
| hd :: tl ->
(* Printf.printf "UPD: %d:\nUPD:" listpos ; *)
fold (fun (k, v) () -> Printf.printf " %d:%0.5f" k (norm v n )) ()
(M.bindings hd);
Printf.printf "\n%!" ;
pd (listpos + 1) tl
in
Printf.printf "\x1B\x63" ;
pd 1 dist
let print_dist_tsv tag n dist =
let rec pd lpos = function
[] -> ()
| hd :: tl ->
fold (fun (k, v) () ->
Printf.printf "%s\t%d\t%d\t%0.5f\n" tag lpos k (norm v n))
() (M.bindings hd);
pd (lpos + 1) tl
in
Printf.printf "%s\trank\tabund\tprob\n" tag ;
pd 1 dist
let fprint_n_top_rank_freqs ouch pre n timeseries =
Printf.fprintf ouch "%srank\tfreq\n" pre ;
iter
(fun pop ->
let ps = pop_size_ts pop in
ignore (Mu.rec_n n (fun (rank, rest) -> match rest with
[] -> (0, [])
| (tp,ct) :: tl ->
Printf.fprintf ouch "%s%d\t%F\n" pre rank (norm ct ps) ;
(rank + 1, tl)) (1, pop)))
timeseries
let print_n_top_rank_freqs tag n timeseries =
fprint_n_top_rank_freqs stdout (Printf.sprintf "%s\t" tag) n timeseries
let print_freq_dist tag nbins dist =
Printf.printf "%s\tnbins\tbin\tprob\n" tag ;
iter (fun (bin, freq) ->
Printf.printf "%s\t%d\t%d\t%F\n" tag nbins bin freq) dist
let fprint_freq_dist_log ch pre dist =
Printf.fprintf ch "%sbmin\tbmax\tprob\n" pre ;
iter (fun (min, max, freq) ->
Printf.fprintf ch "%s%F\t%F\t%F\n" pre min max freq) dist
let print_freq_dist_log tag dist =
fprint_freq_dist_log stdout (Printf.sprintf "%s\t" tag) dist
let print_pop pop =
Printf.printf "[" ;
fold (fun (n,p) () -> Printf.printf "%dx%s;" p n) () pop ;
Printf.printf "[]]\n"
let print_pop_tsv string_of_type prefix pop =
Printf.printf "%s\ttype\tcount\n%!" prefix ;
iter (fun (ty,ct) -> Printf.printf "%s\t%d\t%d\n%!" prefix ty ct) pop
let fprint_pops_tsv string_of_type ouch pre pops =
Printf.fprintf ouch "%sgen\ttype\tcount\n%!" pre ;
iter (fun (gen, pop) ->
iter (fun (ty,ct) ->
Printf.fprintf ouch "%s%d\t%s\t%d\n%!"
pre gen (string_of_type ty) ct) pop) pops
let fprint_pops_tsv_header ouch pre =
Printf.fprintf ouch "%sgen\ttype\tcount\n%!" pre
let fprint_pops_tsv_inc string_of_type ouch pre (gen, pop) =
iter (fun (ty,ct) ->
Printf.fprintf ouch "%s%d\t%s\t%d\n%!"
pre gen (string_of_type ty) ct) pop
let print_pops_tsv string_of_type tag pops =
fprint_pops_tsv string_of_type stdout (tag^"\t") pops
let print_timeseries = print_pops_tsv
let print_updates_dbg tag ups =
iter (fun ups -> Printf.printf "updates_dbg\t%s\t%s\n%!" tag
(fold (fun (l,t) acc -> Printf.sprintf "%s%d->%d;" acc l t) "" ups))
ups
(* unlike the above, print in an ocaml-readable datastructure *)
let fprint_updates ouch updates =
Printf.fprintf ouch "let updates = [\n%!" ;
iter (fun ups ->
Printf.fprintf ouch " [%s];\n%!"
(String.concat ";" (map (fun (l,t) ->
Printf.sprintf "(%d,%d)" l t) ups)))
updates ;
Printf.fprintf ouch "] ;;\n"
let print_wp tag ps wp =
Printf.printf "%s\tct\twp\n%!" tag ;
let pr ct = Printf.printf "%s\t%d\t%F\n%!" tag ct (wp ct) ; ct + 1 in
ignore (Mu.rec_n (ps + 1) pr 0)
let fprint_bindata ouch prefix indf breaks updates =
let breaks = Array.of_list breaks in
let dd = Array.length breaks + 1 in
let (bin_gens, bin_transs, bin_fsum, bin_lnfsum) =
let bin_gens = Array.make dd 0 in
let bin_transs = Array.make dd 0 in
let bin_fsum = Array.make dd 0. in
let bin_lnfsum = Array.make dd 0. in
iter
(fun ups ->
let bin_counts_gen = Array.make dd 0 in
let nnt = pop_size ups in
let ff (frc, toc) =
if frc <> 0 then
let freq = norm frc nnt in
let ind = indf freq in (* ind = b(x_j^t/N_t) *)
bin_counts_gen.(ind) <- 1 ;
bin_transs.(ind) <- bin_transs.(ind) + 1 ;
bin_fsum.(ind) <- bin_fsum.(ind) +. freq ;
bin_lnfsum.(ind) <- bin_lnfsum.(ind) +. log freq in
iter ff ups ;
Array.iteri
(fun ind bin_ct -> bin_gens.(ind) <- bin_gens.(ind) + bin_ct)
bin_counts_gen)
updates ;
(bin_gens, bin_transs, bin_fsum, bin_lnfsum) in
let nupd = Mu.sum (map List.length updates) in
let nnmupd = Mu.sum (map List.length
(map (List.filter (fun (x,_) -> x > 0)) updates)) in
let bounds ind =
((if ind - 1 < 0 then neg_infinity else breaks.(ind - 1)),
(if ind > dd - 2 then infinity else breaks.(ind))) in
Printf.fprintf ouch
"%snupd\tnnmuupd\tngens\tbin\tllim\tulim\tgenc\ttransc\tmeanfreq\tgeomeanfreq\n%!" prefix ;
Mu.iter (fun ind ->
Printf.fprintf ouch "%s%d\t%d\t%d\t%d\t%e\t%e\t%d\t%d\t%F\t%F\n%!" prefix
nupd nnmupd (List.length updates)
ind (fst (bounds ind)) (snd (bounds ind))
bin_gens.(ind) bin_transs.(ind)
(bin_fsum.(ind) /. float_of_int bin_transs.(ind))
(exp (bin_lnfsum.(ind) /. float_of_int bin_transs.(ind))))
(Mu.range 0 (dd - 1))
(* }}} *)
(* MLEst {{{ *)
let likelihood mu wf updates =
let sum_gen us tot =
let (mut_us, nonmut_us) = Mu.partition (fun (a,_) -> a == 0) us in
let oldps = Mu.sum (map fst us) in
let mut_ct = Mu.sum (map snd mut_us) in
(* let nonmut_ct = Mu.sum (map snd nonmut_us) in *)
(* let newps = mut_ct + nonmut_ct in *)
let pimut = mu in
let wis = map (fun (frc,_) -> wf (norm frc oldps) *. norm frc oldps)
nonmut_us in
let tot_wi = Mu.sumf wis in
let piis = pimut :: (map (fun wi -> wi /. tot_wi *. (1. -. pimut)) wis) in
let counts = mut_ct :: map snd nonmut_us in
(* Printf.printf "gslll\t%F\t%F\n%!" (Mu.sumf piis) tot_wi ; *)
tot +. Gsl.Randist.multinomial_lnpdf ~p:(aofl piis) ~n:(aofl counts) in
fold sum_gen 0. updates
let ml_mutation_rate updates =
let (num, denom) = fold_updates
(fun (frc, toc) (nsum, dsum) ->
if frc == 0 then (nsum + toc, dsum + toc) else (nsum, dsum + toc))
(0, 0) updates in
norm num denom
(* Derivative of the piecewise constant loglhd function wrt all dd parameters.
The derivative of the log likelihood can be thought of as 'obs = expect'.
The return value is two arrays, where obs.(k) and expect.(k) are those
components of the derivative with respect to k. The arguments coef and sels
are both vectors of parameters. The difference is that in mm, coeff are sk
that will become the coefficients e^sk to the RHS numerator of Eq 13, that
is, the variable parameters with respect to the optimization, and sels are
the iterates or the sk that appear in the RHS denominator. The derivative
with respect to sk at a given set of parameters sels is:
let (obs, ex) = diffll sels sels updates in obs.(k) - ex.(k)
The optimum s_k in the context of estimated sels sms is:
let (obs, ex) = diffll (Array.map (fun _ -> 0.) sms) sms updates in
log (obss.(k) /. ex.(k))
This is possible because exp 0. = 1., and coef is named that to suggest that
those variable sks only show up as the e^sk coefficient to the ex. Passing
sk = 0 in coeff allows you to solve for e^sk as (obs.(k)/ex.(k)) by replacing
the factor of e^sk with 1. *)
let diffll coef indf sels updates =
let dd = Array.length sels in (* number of bins *)
let obs = Array.make dd 0. in (* accumulator for Eq 13 LHS by bin *)
let exs = Array.make dd 0. in (* acc for Eq 13 RHS by bin *)
iter (fun ups ->
let bin_sums = Array.make dd 0. in
let nnt = pop_size ups in
let (nonmutc, zz) =
let kons (frc, toc) (nonmutc, zz) =
let frcexpsk =
if frc <> 0 then
(let ind = indf (norm frc nnt) in
obs.(ind) <- obs.(ind) +. fl toc;
bin_sums.(ind) <- bin_sums.(ind) +. fl frc ;
exp sels.(ind) *. (fl frc))
else 0. in
((if frc = 0 then nonmutc else nonmutc + toc), zz +. frcexpsk) in
fold kons (0, 0.) ups in
Array.iteri
(fun ind bin_sum ->
exs.(ind) <- exs.(ind) +.
exp coef.(ind) *. bin_sum *. fl nonmutc /. zz)
bin_sums) updates ;
(obs, exs)
(* similar to above, but takes annotated update and mutp, a predicate
(typically (frc == 0)) to determine whether that transition is a mutation *)
let default_mutp _ (ty, (frc, toc)) = frc = 0
let nomut_mutp _ _ = false
let count_mutp count _ (ty, (frc, toc)) = frc < count
let freq_mutp freq nnt (ty, (frc, toc)) = norm frc nnt < freq
(* mutwt -- use mutp to determine wildtype in addition to wt. *)
let diffll_ress coef sels indty mutp annupds =
let dd = Array.length sels in (* number of bins *)
let obs = Array.make dd 0 in (* accumulator for Eq 13 LHS by bin *)
let exs = Array.make dd 0. in (* acc for Eq 13 RHS by bin *)
let ress = map (fun (gen, ups) ->
let bin_sums = Array.make dd 0. in
let nnt = pop_size_a ups in
let (mutc, nonmutc, zz, tyress) =
let kons ((ty, (frc, toc)) as trans) (mutc, nonmutc, zz, tyracc) =
let ind = indty nnt (ty, (frc, toc)) in
let frcexpsk =
if mutp nnt trans then 0.
else (obs.(ind) <- obs.(ind) + toc;
bin_sums.(ind) <- bin_sums.(ind) +. fl frc ;
exp sels.(ind) *. (fl frc)) in
((if mutp nnt trans then mutc + toc else mutc),
(if mutp nnt trans then nonmutc else nonmutc + toc),
zz +. frcexpsk,
(ty, ind, frc, toc, frcexpsk) :: tyracc) in
fold kons (0, 0, 0., []) ups in
Array.iteri
(fun ind bin_sum ->
exs.(ind) <- exs.(ind) +.
exp coef.(ind) *. bin_sum *. fl nonmutc /. zz)
bin_sums ;
(gen, nnt, mutc, nonmutc, zz, tyress)) annupds in
(obs, exs, ress)
(* Legacy version, does not deal with censorship *)
let ml_mm_pwclf logchopt indf sels updates =
let dd = Array.length sels in
(* === logging mm iterates to a file, if desired === *)
ignore (Mu.opt_fmap
(fun logch -> Printf.fprintf logch "ml_mm_pwclf\tll\t%s\n%!"
(strcat "\t" (map (Printf.sprintf "s%d") (Mu.range 1 dd))))
logchopt) ;
let last_likelihood = ref (-1./.0.) in
let print_sels lhd sels =
ignore (Mu.opt_fmap
(fun logch ->
Printf.fprintf logch "ml_mm_pwclf\t%f\t%s\n%!"
lhd (strcat "\t" (map strf sels)))
logchopt) ;
last_likelihood := lhd in
(* === end logging === *)
let muest = ml_mutation_rate updates in
let mm sms =
let (obs, exs) = diffll (Array.make dd 0.) indf sms updates in
let optss = Array.mapi (fun ind _ -> log (obs.(ind) /. exs.(ind))) obs in
let smean = Mu.meanf_n (atol optss) in
let optss = Array.map (fun x -> x -. smean) optss in
optss in
(* recurse mm to fixed likelihood *)
let rec fix fn init init_lhd =
let next = fn init in
let lhd = likelihood muest (w_pwc indf next) updates in
print_sels lhd (atol next) ;
if eq_sels_a init next || lhd <= init_lhd then (init, init_lhd)
else fix fn next lhd in
let (sels, ll) = fix mm sels neg_infinity in
(muest, sels, ll)
let annupdates_residuals sels indty mutp annups =
let dd = Array.length sels in let zeros = Array.make dd 0. in
let (_, _, ress) = diffll_ress zeros sels indty mutp annups in
ress
let fprint_diff_ress ouch ress =
Printf.fprintf ouch
"gen\tinit_ps\tmutc\tnonmutc\tfin_ps\tdenom\ttype\tind\tinitc\tfinc\tinitf\tobsf\tcesk\tnexpf\texpf\trs_inc\n" ;
iter (fun (gen, nnt, mutc, nonmutc, zz, tyress) ->
iter (fun (ty, ind, frc, toc, frcexpsk) ->
let initf = norm frc nnt and obsf = norm toc (mutc + nonmutc) in
let nonmutf = norm nonmutc (mutc + nonmutc) in
let nexpf = initf *. nonmutf in
let expf = frcexpsk /. zz *. nonmutf in
let rs_inc = (obsf -. expf) /. sqrt (expf *. (1. -. expf)) in
Printf.fprintf ouch
"%d\t%d\t%d\t%d\t%d\t%F\t%s\t%d\t%d\t%d\t%F\t%F\t%F\t%F\t%F\t%F\n"
gen nnt mutc nonmutc (mutc + nonmutc)
zz ty ind frc toc
initf obsf
frcexpsk
nexpf expf rs_inc)