-
Notifications
You must be signed in to change notification settings - Fork 80
/
TMB.R
1951 lines (1891 loc) · 83.5 KB
/
TMB.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
## Copyright (C) 2013-2015 Kasper Kristensen
## License: GPL-2
## Utilities
grepRandomParameters <- function(parameters,random){
r <- sort(unique(unlist(lapply(random,function(regexp)grep(regexp,names(parameters))))))
tmp <- lapply(parameters,function(x)x*0)
tmp[r] <- lapply(tmp[r],function(x)x*0+1)
which(as.logical(unlist(tmp)))
}
## unlist name handling is extremely slow and we *almost* never use it
## New default: use.names=FALSE
unlist <- function (x, recursive = TRUE, use.names = FALSE) {
base::unlist(x, recursive, use.names)
}
## Assign without losing other attributes than 'names' (which may get
## overwritten when subsetting)
"keepAttrib<-" <- function(x, value){
attr <- attributes(x)
keep <- setdiff(names(attr), "names")
x <- value
attributes(x)[keep] <- attr[keep]
x
}
## Associate a 'map' with *one* entry in a parameter list
updateMap <- function(parameter.entry, map.entry) {
## Shortened parameter
ans <- tapply(parameter.entry, map.entry, mean)
if(length(ans) == 0) ans <- as.numeric(ans) ## (zero-length case)
## Integer code used to fill short into original shape
fnew <- unclass(map.entry)
fnew[!is.finite(fnew)] <- 0L
fnew <- fnew - 1L
## Output
attr(ans,"shape") <- parameter.entry
attr(ans,"map") <- fnew
attr(ans,"nlevels") <- length(ans)
ans
}
## Guess name of user's loaded DLL code
getUserDLL <- function(){
dlls <- getLoadedDLLs()
isTMBdll <- function(dll)!is(try(getNativeSymbolInfo("MakeADFunObject",dll),TRUE),"try-error")
TMBdll <- sapply(dlls, isTMBdll)
if(sum(TMBdll) == 0) stop("There are no TMB models loaded (use 'dyn.load').")
if(sum(TMBdll) >1 ) stop("Multiple TMB models loaded. Failed to guess DLL name.")
names(dlls[TMBdll])
}
## Un-exported functions that we need
.shlib_internal <- get(".shlib_internal", envir = asNamespace("tools"), inherits = FALSE)
## Update cholesky factorization ( of H+t*I ) avoiding copy overhead
## by writing directly to L(!).
updateCholesky <- function(L, H, t=0){
.Call("tmb_destructive_CHM_update", L, H, t, PACKAGE="TMB")
}
solveCholesky <- function(L, x){
.Call("tmb_CHMfactor_solve", L, x, PACKAGE="TMB")
}
## Test for invalid external pointer
isNullPointer <- function(pointer) {
.Call("isNullPointer", pointer, PACKAGE="TMB")
}
## Add external pointer finalizer
registerFinalizer <- function(ADFun, DLL) {
if (is.null(ADFun)) return (NULL) ## ADFun=NULL used by sdreport
ADFun$DLL <- DLL
finalizer <- function(ptr) {
if ( ! isNullPointer(ptr) ) {
.Call("FreeADFunObject", ptr, PACKAGE=DLL)
} else {
## Nothing to free
}
}
reg.finalizer(ADFun$ptr, finalizer)
ADFun
}
##' Sequential reduction configuration
##'
##' Helper function to specify an integration grid used by the
##' sequential reduction algorithm available through the argument
##' \code{integrate} to \code{MakeADFun}.
##' @param x Breaks defining the domain of integration
##' @param discrete Boolean defining integration wrt Lebesgue measure (\code{discrete=FALSE}) or counting measure \code{discrete=TRUE}.
SR <- function(x, discrete=FALSE) {
if (is(x, "SR")) return (x)
x <- as.numeric(x)
if (is.unsorted(x)) stop("'x' must be sorted")
if (discrete) {
w <- rep(1., length(x))
} else {
w <- diff(x)
x <- head(x, -1) + w / 2
}
structure(list(x=x, w=w, method="marginal_sr"), class="SR")
}
##' Gauss Kronrod configuration
##'
##' Helper function to specify parameters used by the Gauss Kronrod
##' integration available through the argument \code{integrate} to
##' \code{MakeADFun}.
##' @param ... See source code
GK <- function(...) {
ans <- list(dim=1, adaptive=FALSE, debug=FALSE)
args <- list(...)
ans[names(args)] <- args
ans$method <- "marginal_gk"
class(ans) <- "GK"
ans
}
## TODO: Laplace approx config
LA <- function(...) {
ans <- list(...)
ans$method <- "laplace"
class(ans) <- "LA"
ans
}
## 'parse' MakeADFun argument 'integrate'
parseIntegrate <- function(arg, name) {
i <- sapply(arg, function(x) is(x, name))
arg[i]
}
##' Construct objective functions with derivatives based on the users C++ template.
##'
##' A call to \code{MakeADFun} will return an object that, based on the users DLL code (specified through \code{DLL}), contains functions to calculate the objective function
##' and its gradient. The object contains the following components:
##' \itemize{
##' \item \code{par} A default parameter.
##' \item \code{fn} The likelihood function.
##' \item \code{gr} The gradient function.
##' \item \code{report} A function to report all variables reported with the REPORT() macro in the user template.
##' \item \code{env} Environment with access to all parts of the structure.
##' }
##' and is thus ready for a call to an R optimizer, such as \code{nlminb} or \code{optim}.
##' Data (\code{data}) and parameters (\code{parameters}) are directly read by the user template via the macros beginning with DATA_
##' and PARAMETER_. The order of the PARAMETER_ macros defines the order of parameters in the final objective function.
##' There are no restrictions on the order of random parameters, fixed parameters or data in the template.
##' @section Parameter mapping:
##' Optionally, a simple mechanism for collecting and fixing parameters from R is available through the \code{map} argument. A map is a named list
##' of factors with the following properties:
##' \itemize{
##' \item names(map) is a subset of names(parameters).
##' \item For a parameter "p" length(map$p) equals length(parameters$p).
##' \item Parameter entries with NAs in the factor are fixed.
##' \item Parameter entries with equal factor level are collected to a common value.
##' }
##' More advanced parameter mapping, such as collecting parameters between different vectors etc., must be implemented from the template.
##' @section Specifying random effects:
##' Random effects are specified via the argument \code{random}: A component of the parameter list is marked as random if its name is matched
##' by any of the characters of the vector \code{random} (Regular expression match is performed if \code{regexp=TRUE}).
##' If some parameters are specified as random effects, these will
##' be integrated out of the objective function via the Laplace approximation. In this situation the functions \code{fn} and \code{gr}
##' automatically perform an optimization of random effects for each function evaluation. This is referred to as
##' the 'inner optimization'. Strategies for choosing initial values of the inner optimization can be controlled
##' via the argument \code{random.start}. The default is \code{expression(last.par.best[random])}
##' where \code{last.par.best} is an internal full parameter vector corresponding to the currently best
##' likelihood. An alternative choice could be \code{expression(last.par[random])} i.e. the random effect optimum of
##' the most recent - not necessarily best - likelihood evaluation. Further control of the inner optimization can
##' be obtained by the argument \code{inner.control} which is a list of control parameters for the inner optimizer
##' \code{newton}. Depending of the inner optimization problem type the following settings are recommended:
##' \enumerate{
##' \item Quasi-convex: \code{smartsearch=TRUE} (the default).
##' \item Strictly-convex: \code{smartsearch=FALSE} and \code{maxit=20}.
##' \item Quadratic: \code{smartsearch=FALSE} and \code{maxit=1}.
##' }
##' @section The model environment \code{env}:
##' Technically, the user template is processed several times by inserting
##' different types as template parameter, selected by argument \code{type}:
##' \itemize{
##' \item \code{"ADFun"} Run through the template with AD-types and produce a stack of operations representing the objective function.
##' \item \code{"Fun"} Run through the template with ordinary double-types.
##' \item \code{"ADGrad"} Run through the template with nested AD-types and produce a stack of operations representing the objective function gradient.
##' }
##' Each of these are represented by external pointers to C++ structures available in the environment \code{env}.
##'
##' Further objects in the environment \code{env}:
##' \itemize{
##' \item \code{validpar} Function defining the valid parameter region (by default no restrictions). If an invalid
##' parameter is inserted \code{fn} immediately return NaN.
##' \item \code{parList} Function to get the full parameter vector of random and fixed effects in a convenient
##' list format.
##' \item \code{random} An index vector of random effect positions in the full parameter vector.
##' \item \code{last.par} Full parameter of the latest likelihood evaluation.
##' \item \code{last.par.best} Full parameter of the best likelihood evaluation.
##' \item \code{tracepar} Trace every likelihood evaluation ?
##' \item \code{tracemgc} Trace maximum gradient component of every gradient evaluation ?
##' \item \code{silent} Pass 'silent=TRUE' to all try-calls ?
##' }
##' @section The argument \code{intern}:
##' By passing \code{intern=TRUE} the entire Laplace approximation (including sparse matrix calculations) is done within the AD machinery on the C++ side. This requires the model to be compiled using the 'TMBad framework' - see \code{\link{compile}}. For any serious use of this option one should consider compiling with \code{supernodal=TRUE} - again see \code{\link{compile}} - in order to get performance comparable to R's matrix calculations. The benefit of the 'intern' LA is that it may be faster in some cases and that it provides an autodiff hessian (\code{obj$he}) wrt. the fixed effects which would otherwise not work for random effect models. Another benefit is that it gives access to fast computations with certain hessian structures that do not meet the usual sparsity requirement. A detailed list of options are found in the online doxygen documentation in the 'newton' namespace under the 'newton_config' struct. All these options can be passed from R via the `inner.control` argument. However, there are some drawbacks of running the LA on the C++ side. Notably, random effects are no longer visible in the model environment which may break assumptions on the layout of internal vectors (`par`, `last.par`, etc). In addition, model debugging becomes harder when calculations are moved to C++.
##' @section Controlling tracing:
##' A high level of tracing information will be output by default when evaluating the objective function and gradient.
##' This is useful while developing a model, but may eventually become annoying. Disable all tracing by passing
##' \code{silent=TRUE} to the \code{MakeADFun} call.
##' @note Do not rely upon the default arguments of any of the functions in the model object \code{obj$fn}, \code{obj$gr}, \code{obj$he}, \code{obj$report}. I.e. always use the explicit form \code{obj$fn(obj$par)} rather than \code{obj$fn()}.
##'
##' @title Construct objective functions with derivatives based on a compiled C++ template.
##' @param data List of data objects (vectors, matrices, arrays, factors, sparse matrices) required by the user template (order does not matter and un-used components are allowed).
##' @param parameters List of all parameter objects required by the user template (both random and fixed effects).
##' @param map List defining how to optionally collect and fix parameters - see details.
##' @param type Character vector defining which operation stacks are generated from the users template - see details.
##' @param random Character vector defining the random effect parameters. See also \code{regexp}.
##' @param profile Parameters to profile out of the likelihood (this subset will be appended to \code{random} with Laplace approximation disabled).
##' @param random.start Expression defining the strategy for choosing random effect initial values as function of previous function evaluations - see details.
##' @param hessian Calculate Hessian at optimum?
##' @param method Outer optimization method.
##' @param inner.method Inner optimization method (see function "newton").
##' @param inner.control List controlling inner optimization.
##' @param MCcontrol List controlling importance sampler (turned off by default).
##' @param ADreport Calculate derivatives of macro ADREPORT(vector) instead of objective_function return value?
##' @param atomic Allow tape to contain atomic functions?
##' @param LaplaceNonZeroGradient Allow Taylor expansion around non-stationary point?
##' @param DLL Name of shared object file compiled by user (without the conventional extension, \file{.so}, \file{.dll}, \dots).
##' @param checkParameterOrder Optional check for correct parameter order.
##' @param regexp Match random effects by regular expressions?
##' @param silent Disable all tracing information?
##' @param intern Do Laplace approximation on C++ side ? See details (Experimental - may change without notice)
##' @param integrate Specify alternative integration method(s) for random effects (see details)
##' @param ... Currently unused.
##' @return List with components (fn, gr, etc) suitable for calling an R optimizer, such as \code{nlminb} or \code{optim}.
MakeADFun <- function(data, parameters, map=list(),
type=c("ADFun","Fun","ADGrad"[!intern && (!is.null(random) || !is.null(profile)) ] ),
random=NULL,
profile=NULL,
random.start=expression(last.par.best[random]),
hessian=FALSE,method="BFGS",
inner.method="newton",
inner.control=list(maxit=1000),
MCcontrol=list(doMC=FALSE,seed=123,n=100),
ADreport=FALSE,
atomic=TRUE,
LaplaceNonZeroGradient=FALSE, ## Experimental feature: Allow expansion around non-stationary point
DLL=getUserDLL(),
checkParameterOrder=TRUE, ## Optional check
regexp=FALSE,
silent=FALSE,
intern=FALSE,
integrate=NULL,
...){
## Check that DLL is loaded
if ( ! DLL %in% names(getLoadedDLLs()) ) {
stop(sprintf("'%s' was not found in the list of loaded DLLs. Forgot to dyn.load(dynlib('%s')) ?", DLL, DLL))
}
env <- environment() ## This environment
if(!is.list(data))
stop("'data' must be a list")
ok <- function(x)(is.matrix(x)|is.vector(x)|is.array(x))&(is.numeric(x)|is.logical(x))
ok.data <- function(x)ok(x)|is.factor(x)|is(x,"sparseMatrix")|is.list(x)|(is.character(x)&length(x)==1)
check.passed <- function(x){
y <- attr(x,"check.passed")
if(is.null(y)) FALSE else y
}
if(!check.passed(data)){
if(!all(sapply(data,ok.data))){
cat("Problem with these data entries:\n")
print(which(!sapply(data,ok.data)))
stop("Only numeric matrices, vectors, arrays, ",
"factors, lists or length-1-characters ",
"can be interfaced")
}
}
if(!check.passed(parameters)){
if(!all(sapply(parameters,ok))){
cat("Problem with these parameter entries:\n")
print(which(!sapply(parameters,ok)))
stop("Only numeric matrices, vectors and arrays ",
"can be interfaced")
}
}
if(length(data)){
dataSanitize <- function(x){
if(is.list(x)) return( lapply(x, dataSanitize) )
if(is(x,"sparseMatrix")){
## WAS: x <- as(x, "dgTMatrix")
x <- as( as(x, "TsparseMatrix"), "generalMatrix")
}
else if (is.character(x))
{
## Do nothing
}
else
{
if(is.factor(x))x <- unclass(x)-1L ## Factors are passed as 0-based integers !!!
storage.mode(x) <- "double"
}
x
}
if(!check.passed(data)){
data <- lapply(data,dataSanitize)
}
attr(data,"check.passed") <- TRUE
}
if(length(parameters)){
parameterSanitize <- function(x){
storage.mode(x) <- "double"
x
}
if(!check.passed(parameters)){
parameters <- lapply(parameters,parameterSanitize)
}
attr(parameters,"check.passed") <- TRUE
}
if(checkParameterOrder){
## For safety, check that parameter order match the parameter order in user template.
## If not, permute parameter list with a warning.
## Order in which parameters were requested:
parNameOrder <- getParameterOrder(data, parameters, new.env(), DLL=DLL)
if(!identical(names(parameters),parNameOrder)){
if(!silent) cat("Order of parameters:\n")
if(!silent) print(names(parameters))
if(!silent) cat("Not matching template order:\n")
if(!silent) print(parNameOrder)
keepAttrib( parameters ) <- parameters[parNameOrder]
if(!silent) cat("Your parameter list has been re-ordered.\n(Disable this warning with checkParameterOrder=FALSE)\n")
}
}
## Prepare parameter mapping.
## * A parameter map is a factor telling which parameters should be grouped
## * NA values are untouched: So user can e.g. set them to zero
## * NOTE: CURRENTLY ONLY WORKS ON PARAMETER_ARRAY() !!!
if(length(map)>0){
ok <- all(names(map)%in%names(parameters))
if(!ok)stop("Names in map must correspond to parameter names")
ok <- all(sapply(map,is.factor))
if(!ok)stop("map must contain factors")
ok <- sapply(parameters[names(map)],length)==sapply(map,length)
if(!all(ok))stop("A map factor length must equal parameter length")
param.map <- lapply(names(map),
function(nam)
{
updateMap(parameters[[nam]], map[[nam]])
})
## Now do the change:
keepAttrib( parameters[names(map)] ) <- param.map
}
lrandom <- function() {
ans <- logical(length(par))
ans[random] <- TRUE
ans
}
lfixed <- function() {
!lrandom()
}
## Utility to get back parameter list in original shape
parList <- function(x=par[lfixed()],par=last.par){
ans <- parameters
nonemp <- sapply(ans,function(x)length(x)>0) ## Workaround utils::relist bug for empty list items
nonempindex <- which(nonemp)
skeleton <- as.relistable(ans[nonemp])
par[lfixed()] <- x
li <- relist(par,skeleton)
reshape <- function(x){
if(is.null(attr(x,"map")))return(x)
y <- attr(x,"shape")
f <- attr(x,"map")
i <- which(f>=0)
y[i] <- x[f[i]+1]
y
}
for(i in seq(skeleton)){
ans[[nonempindex[i]]][] <- as.vector(li[[i]])
}
## MM: ans[] <- lapply(ans, reshape) # _____________________
for(i in seq(ans)){
ans[[i]] <- reshape(ans[[i]])
}
ans
}
type <- match.arg(type, eval(type), several.ok = TRUE)
#if("ADFun"%in%type)ptrADFun <- .Call("MakeADFunObject",data,parameters) else ptrADFun <- NULL
reportenv <- new.env()
par <- NULL
last.par.ok <- last.par <- last.par1 <- last.par2 <- last.par.best <- NULL
value.best <- Inf
ADFun <- NULL
Fun <- NULL
ADGrad <- NULL
tracepar <- FALSE
validpar <- function(x)TRUE
tracemgc <- TRUE
## dummy assignments better than "globalVariables(....)"
L.created.by.newton <- skipFixedEffects <- spHess <- altHess <- NULL
## Disable all tracing information
beSilent <- function(){
tracemgc <<- FALSE
inner.control$trace <<- FALSE
silent <<- TRUE
cf <- config(DLL=DLL)
i <- grep("^trace.",names(cf))
cf[i] <- 0
cf$DLL <- DLL
do.call(config, cf)
NULL
}
if(silent)beSilent()
## Getting shape of ad reported variables
ADreportDims <- NULL
ADreportIndex <- function() {
lngt <- sapply(ADreportDims, prod)
offset <- head( cumsum( c(1, lngt) ) , -1)
ans <- lapply(seq_along(lngt),
function(i) array(seq(from = offset[i],
length.out = lngt[i]),
ADreportDims[[i]] ))
names(ans) <- names(ADreportDims)
ans
}
## All external pointers are created in function "retape" and can be re-created
## by running retape() if e.g. the number of openmp threads is changed.
## set.defaults: reset internal parameters to their default values.
.random <- random
retape <- function(set.defaults = TRUE){
omp <- config(DLL=DLL) ## Get current OpenMP configuration
random <<- .random ## Restore original 'random' argument
if(atomic){ ## FIXME: Then no reason to create ptrFun again later ?
## User template contains atomic functions ==>
## Have to call "double-template" to trigger tape generation
Fun <<- MakeDoubleFunObject(data, parameters, reportenv, DLL=DLL)
## Hack: unlist(parameters) only guarantied to be a permutation of the parameter vecter.
out <- EvalDoubleFunObject(Fun, unlist(parameters), get_reportdims = TRUE)
ADreportDims <<- attr(out, "reportdims")
}
if(is.character(profile)){
random <<- c(random, profile)
}
if(is.character(random)){
if(!regexp){ ## Default: do exact match
if(!all(random %in% names(parameters))){
cat("Some 'random' effect names does not match 'parameter' list:\n")
print(setdiff(random,names(parameters)))
cat("(Note that regular expression match is disabled by default)\n")
stop()
}
if(any(duplicated(random))){
cat("Duplicates in 'random' - will be removed\n")
random <<- unique(random)
}
tmp <- lapply(parameters,function(x)x*0)
tmp[random] <- lapply(tmp[random],function(x)x*0+1)
random <<- which(as.logical(unlist(tmp)))
if(length(random)==0) random <<- NULL
}
if(regexp){ ## Original regular expression match
random <<- grepRandomParameters(parameters,random)
if(length(random)==0){
cat("Selected random effects did not match any model parameters.\n")
random <<- NULL
}
}
if(is.character(profile)){
## Convert 'profile' to a pointer into random (represented
## as logical index vector):
tmp <- lapply(parameters,function(x)x*0)
tmp[profile] <- lapply(tmp[profile],function(x)x*0+1)
profile <<- match( which(as.logical(unlist(tmp))) , random )
if(length(profile)==0) random <<- NULL
if(any(duplicated(profile))) stop("Profile parameter vector not unique.")
tmp <- rep(0L, length(random))
tmp[profile] <- 1L
profile <<- tmp
}
if (set.defaults) {
par <<- unlist(parameters)
}
}
if("ADFun"%in%type){
## autopar? => Tape with single thread
if (omp$autopar)
openmp(1, DLL=DLL)
ADFun <<- MakeADFunObject(data, parameters, reportenv, ADreport=ADreport, DLL=DLL)
## autopar? => Restore OpenMP number of threads
if (omp$autopar)
openmp(omp$nthreads, DLL=DLL)
if (!is.null(integrate)) {
nm <- sapply(parameters, length)
nmpar <- rep(names(nm), nm)
for (i in seq_along(integrate)) {
I <- integrate[i]
## Special case: joint integration list
if (is.null(names(I)) || names(I) == "") {
I <- I[[1]]
}
ok <- all(names(I) %in% nmpar[random])
if (!ok)
stop("Names to be 'integrate'd must be among the random parameters")
w <- which(nmpar[random] %in% names(I))
## Argument 'which' is common to all methods
arg_which <- I[[1]]$which
if ( ! is.null(arg_which) )
w <- w[arg_which]
method <- sapply(I, function(x) x$method)
ok <- all(duplicated(method)[-1])
if (!ok)
stop("Grouping only allowed for identical methods")
method <- method[1]
cfg <- NULL
if (method == "marginal_sr") {
## SR has special support for joint integration
fac <- factor(nmpar[random[w]],
levels=names(I))
cfg <- list(
grid = I,
random2grid = fac
)
} else {
## For other methods we use the first
## (FIXME: Test no contradicting choices)
cfg <- I[[1]]
}
stopifnot (is.list(cfg))
## Integrate parameter subset out of the likelihood
TransformADFunObject(ADFun,
method = method,
random_order = random[w],
config = cfg,
mustWork = 1L)
## Find out what variables have been integrated
## (only GK might not integrate all random[w])
activeDomain <- as.logical(info(ADFun)$activeDomain)
random_remove <- random[w][!activeDomain[random[w]]]
## Integrated parameters must no longer be present
TransformADFunObject(ADFun,
method="remove_random_parameters",
random_order = random_remove,
mustWork = 1L)
## Adjust 'random' and 'par' accordingly
attr(ADFun$ptr, "par") <- attr(ADFun$ptr, "par")[-random_remove]
par_mask <- rep(FALSE, length(attr(ADFun$ptr, "par")))
par_mask[random] <- TRUE
par <<- par[-random_remove]
nmpar <- nmpar[-random_remove]
par_mask <- par_mask[-random_remove]
random <<- which(par_mask)
if (length(random) == 0) {
random <<- NULL
type <<- setdiff(type, "ADGrad")
}
## Run tape optimizer
if (config(DLL=DLL)$optimize.instantly) {
TransformADFunObject(ADFun,
method = "optimize",
mustWork = 1L)
}
}
}
if (intern) {
cfg <- inner.control
if (is.null(cfg$sparse)) cfg$sparse <- TRUE
cfg <- lapply(cfg, as.double)
TransformADFunObject(ADFun,
method = "laplace",
config = cfg,
random_order = random,
mustWork = 1L)
TransformADFunObject(ADFun,
method="remove_random_parameters",
random_order = random,
mustWork = 1L)
## FIXME: Should be done by above .Call
attr(ADFun$ptr,"par") <- attr(ADFun$ptr,"par")[-random]
##
par <<- par[-random]
random <<- NULL
## Run tape optimizer
if (config(DLL=DLL)$optimize.instantly) {
TransformADFunObject(ADFun,
method = "optimize",
mustWork = 1L)
}
}
if (set.defaults) {
par <<- attr(ADFun$ptr,"par")
last.par <<- par
last.par1 <<- par
last.par2 <<- par
last.par.best <<- par
value.best <<- Inf
}
}
if (omp$autopar && !ADreport) {
## Experiment !
TransformADFunObject(ADFun,
method = "parallel_accumulate",
num_threads = as.integer(openmp(DLL=DLL)),
mustWork = 0L)
}
if (length(random) > 0) {
## Experiment !
TransformADFunObject(ADFun,
method = "reorder_random",
random_order = random,
mustWork = 0L)
}
if("Fun"%in%type) {
Fun <<- MakeDoubleFunObject(data, parameters, reportenv, DLL=DLL)
}
if("ADGrad"%in%type) {
retape_adgrad(lazy = TRUE)
}
## Skip fixed effects from the full hessian ?
## * Probably more efficient - especially in terms of memory.
## * Only possible if a taped gradient is available - see function "ff" below.
env$skipFixedEffects <- !is.null(ADGrad)
delayedAssign("spHess", sparseHessianFun(env, skipFixedEffects=skipFixedEffects ),
assign.env = env)
}## end{retape}
## Lazy / Full adgrad ?
retape_adgrad <- function(lazy = TRUE) {
## * Use already taped function value f = ADFun$ptr
## * In random effects case we only need the 'random' part of the gradient
if (!lazy) random <- NULL
ADGrad <<- MakeADGradObject(data, parameters, reportenv,
random=random, f=ADFun$ptr, DLL=DLL)
}
retape(set.defaults = TRUE)
## Has atomic functions been generated for the tapes ?
usingAtomics <- function().Call("usingAtomics", PACKAGE=DLL)
.data <- NULL
f <- function(theta=par, order=0, type="ADdouble",
cols=NULL, rows=NULL,
sparsitypattern=0, rangecomponent=1, rangeweight=NULL,
dumpstack=0, doforward=1, do_simulate=0, set_tail=0,
keepx=NULL, keepy=NULL) {
if(isNullPointer(ADFun$ptr)) {
if(silent)beSilent()
## Loaded or deep copied object: Only restore external
## pointers. Don't touch last.par/last.par.best etc:
retape(set.defaults = FALSE)
}
## User has changed the data => Next forward pass must traverse whole graph !
data_changed <- !identical(.data, data) ## Fast to check if identical (i.e. most of the time)
if (data_changed) {
.data <<- data ## Shallow copy (fast)
}
switch(type,
"ADdouble" = {
res <- EvalADFunObject(ADFun, theta,
order=order,
hessiancols=cols,
hessianrows=rows,
sparsitypattern=sparsitypattern,
rangecomponent=rangecomponent,
rangeweight=rangeweight,
dumpstack=dumpstack,
doforward=doforward,
set_tail=set_tail,
data_changed=data_changed)
last.par <<- theta
if(order==1)last.par1 <<- theta
if(order==2)last.par2 <<- theta
},
"double" = {
res <- EvalDoubleFunObject(Fun, theta, do_simulate=do_simulate)
},
"ADGrad" = {
res <- EvalADFunObject(ADGrad, theta,
order=order,
hessiancols=cols,
hessianrows=rows,
sparsitypattern=sparsitypattern,
rangecomponent=rangecomponent,
rangeweight=rangeweight,
dumpstack=dumpstack,
doforward=doforward,
set_tail=set_tail,
keepx=keepx,
keepy=keepy,
data_changed=data_changed)
},
stop("invalid 'type'")) # end{ switch() }
res
} ## end{ f }
h <- function(theta=par, order=0, hessian, L, ...) {
if(order == 0) {
##logdetH <- determinant(hessian)$mod
logdetH <- 2*determinant(L, sqrt=TRUE)$modulus
ans <- f(theta,order=0) + .5*logdetH - length(random)/2*log(2*pi)
if(LaplaceNonZeroGradient){
grad <- f(theta,order=1)[random]
ans - .5* sum(grad * as.numeric( solveCholesky(L, grad) ))
} else
ans
}
else if(order == 1) {
if(LaplaceNonZeroGradient)stop("Not correct for LaplaceNonZeroGradient=TRUE")
##browser()
e <- environment(spHess)
solveSubset <- function(L).Call("tmb_invQ",L,PACKAGE="TMB")
solveSubset2 <- function(L).Call("tmb_invQ_tril_halfdiag",L,PACKAGE="TMB")
## FIXME: The following two lines are not efficient:
## 1. ihessian <- tril(solveSubset(L))
## 2. diag(ihessian) <- .5*diag(ihessian)
## Make option to solveSubset to return lower triangular part
## with diagonal halved. As it is now the output of solveSubset is
## symm _with upper storage_ (!) (side effect of cholmod_ptranspose)
## therefore tril takes long time. Further, "diag<-" is too slow.
## FIXED! :
ihessian <- solveSubset2(L)
## Profile case correction (1st order case only)
if(!is.null(profile)){
## Naive way:
## ihessian[profile,] <- 0
## ihessian[,profile] <- 0
## However, this would modify sparseness pattern and also not
## account for 'ihessian' being permuted:
perm <- L@perm+1L
ihessian <- .Call("tmb_sparse_izamd", ihessian, profile[perm], 0.0, PACKAGE="TMB")
}
## General function to lookup entries A subset B.
## lookup.old <- function(A,B){
## A <- as(tril(A),"dtTMatrix")
## B <- as(tril(B),"dtTMatrix")
## match(paste(A@i,A@j),paste(B@i,B@j))
## }
## General function to lookup entries A in B[r,r] assuming pattern of A
## is subset of pattern of B[r,r].
lookup <- function(A,B,r=NULL){
A <- tril(A); B <- tril(B)
B@x[] <- seq.int(length.out=length(B@x)) ## Pointers to full B matrix (Can have up to 2^31-1 non-zeros)
if(!is.null(r)){
## Goal is to get:
## B <- forceSymmetric(B)
## B <- B[r,r,drop=FALSE]
## However the internal Matrix code for
## "B[r,r,drop=FALSE]" creates temporary "dgCMatrix"
## thereby almost doubling the number of non-zeros. Need
## solution that works with max (2^31-1) non-zeros:
B <- .Call("tmb_half_diag", B, PACKAGE="TMB")
B <- tril( B[r,r,drop=FALSE] ) + tril( t(B)[r,r,drop=FALSE] )
}
m <- .Call("match_pattern", A, B, PACKAGE="TMB") ## Same length as A@x with pointers to B@x
B@x[m]
}
if(is.null(e$ind1)){
## hessian: Hessian of random effect part only.
## ihessian: Inverse subset of hessian (same dim but larger pattern!).
## Hfull: Pattern of full hessian including fixed effects.
if (!silent) cat("Matching hessian patterns... ")
iperm <- invPerm(L@perm+1L)
e$ind1 <- lookup(hessian,ihessian,iperm) ## Same dimensions
e$ind2 <- lookup(hessian,e$Hfull,random) ## Note: dim(Hfull)>dim(hessian) !
if (!silent) cat("Done\n")
}
w <- rep(0,length.out=length(e$Hfull@x))
w[e$ind2] <- ihessian@x[e$ind1]
## Reverse mode evaluate ptr in rangedirection w
## now gives .5*tr(Hdot*Hinv) !!
## return
as.vector( f(theta,order=1) ) +
EvalADFunObject(e$ADHess, theta,
order=1,
rangeweight=w)
}## order == 1
else stop(sprintf("'order'=%d not yet implemented", order))
} ## end{ h }
ff <- function(par.fixed=par[-random], order=0, ...) {
names(par.fixed) <- names(par[-random])
f0 <- function(par.random,order=0,...){
par[random] <- par.random
par[-random] <- par.fixed
res <- f(par,order=order,set_tail=random[1],...)
switch(order+1,res,res[random],res[random,random])
}
## sparse hessian
H0 <- function(par.random){
par[random] <- par.random
par[-random] <- par.fixed
#spHess(par)[random,random,drop=FALSE]
spHess(par,random=TRUE,set_tail=random[1])
}
if(inner.method=="newton"){
#opt <- newton(eval(random.start),fn=f0,gr=function(x)f0(x,order=1),
# he=function(x)f0(x,order=2))
opt <- try( do.call("newton",c(list(par=eval(random.start),
fn=f0,
gr=function(x)f0(x,order=1),
##he=function(x)f0(x,order=2)),
he=H0,env=env),
inner.control)
), silent=silent
)
if (inherits(opt, "try-error") || !is.finite(opt$value)) {
if (order==0) return(NaN)
if (order==1) stop("inner newton optimization failed during gradient calculation")
stop("invalid 'order'")
}
} else {
opt <- optim(eval(random.start),fn=f0,gr=function(x)f0(x,order=1),
method=inner.method,control=inner.control)
}
par[random] <- opt$par
par[-random] <- par.fixed
## Use alternative Hessian for log determinant?
altHessFlag <- !is.null(altHess)
if (altHessFlag) {
altHess(TRUE) ## Enable alternative hessian
on.exit(altHess(FALSE))
}
## HERE! - update hessian and cholesky
if(!skipFixedEffects){ ## old way
hess <- spHess(par) ## Full hessian
hessian <- hess[random,random] ## Subset
} else {
hessian <- spHess(par,random=TRUE)
}
## Profile case correction (0 and 1st order)
if( !is.null(profile) ){
## Naive way:
## hessian[profile, ] <- 0
## hessian[, profile] <- 0
## diag(hessian)[profile] <- 1
## However, this would modify sparseness pattern:
hessian <- .Call("tmb_sparse_izamd", hessian, profile, 1.0, PACKAGE="TMB")
}
## Update Cholesky:
if(inherits(env$L.created.by.newton,"dCHMsuper")){
L <- env$L.created.by.newton
##.Call("destructive_CHM_update",L,hessian,as.double(0),PACKAGE="Matrix")
updateCholesky(L,hessian)
} else
L <- Cholesky(hessian,perm=TRUE,LDL=FALSE,super=TRUE)
if(order==0){
res <- h(par,order=0,hessian=hessian,L=L)
## Profile case correction
if(!is.null(profile)){
res <- res + sum(profile)/2*log(2*pi)
}
if(is.finite(res)){
if(res<value.best){
last.par.best <<- par; value.best <<- res
}
}
}
if(order==1){
#hess <- f(par,order=2,cols=random)
#hess <- spHess(par)##[,random,drop=FALSE]
grad <- h(par,order=1,hessian=hessian,L=L)
#res <- grad[-random] - t(grad[random])%*%solve(hess[random,random])%*%hess[random,-random]
#res <- grad[-random] - t(grad[random])%*%solve(hess[random,])%*%t(hess[-random,])
if (altHessFlag) {
## Restore original hessian and Cholesky
altHess(FALSE) ## Disable alternative hessian
hessian <- spHess(par, random=TRUE)
updateCholesky(L, hessian)
}
## Profile case correction. The remaining calculations must be
## done with the original hessian (which has been destroyed)
if(!is.null(profile)){
## Update hessian and Cholesky:
if(!skipFixedEffects){ ## old way
hess <- spHess(par) ## Full hessian
hessian <- hess[random,random] ## Subset
} else {
hessian <- spHess(par,random=TRUE)
}
updateCholesky(L,hessian)
}
## res <- grad[-random] -
## hess[-random,random]%*%as.vector(solve(hess[random,random],grad[random]))
if(!skipFixedEffects){
## Relies on "hess[-random,random]" !!!!!
res <- grad[-random] -
hess[-random,random] %*% as.vector(solveCholesky(L,grad[random]))
} else {
## Smarter: Do a reverse sweep of ptrADGrad
w <- rep(0,length(par))
w[random] <- as.vector(solveCholesky(L,grad[random]))
res <- grad[-random] -
f(par, order=1, type="ADGrad", rangeweight=w)[-random]
}
res <- drop(res)
}
if(order == 2) {
n <- length(par); nr <- length(random); nf <- n-nr
fixed <- setdiff(1:n,random)
D1h <- h(par,order=1) ## 1*n
D2h <- h(par,order=2) ## n*n
D2f <- f(par,order=2,cols=random) ## n*nr
D3f <- sapply(random,function(i)
f(par,type="ADGrad",
order=2,rangecomponent=i)) ## n^2 * nr
I.D2f <- solve(D2f[random,])
D1eta <- -t(D2f[-random,] %*% I.D2f) ## nr*nf
D3f.D1eta <- D3f%*%D1eta ## n^2 * nf
dim(D3f.D1eta) <- c(n,n,nf)
dim(D3f) <- c(n,n,nr)
D3f.fixed <- D3f[fixed,,] ##nf*n*nr
D2eta <- sapply(1:nf, function(i) {
-I.D2f %*% (t(D3f.fixed[i,fixed,]) + D3f.D1eta[random,fixed, i] +
( D3f.fixed[i,random,] + D3f.D1eta[random,random,i] ) %*% D1eta )
}) # nr*nf*nf
dim(D2eta) <- c(nr,nf,nf)
D2h.fixed <- D2h[fixed,] #nf*n
res <- sapply(1:nf,function(i){
D2h.fixed[i,fixed] + t( D2h.fixed[,random] %*% D1eta[,i] ) +
( t(D2h.fixed[i,random]) +
t(D2h[random,random] %*% D1eta[,i]) ) %*% D1eta +
D1h[,random] %*% D2eta[,,i]
})
attr(res,"D2eta") <- D2eta
attr(res,"D1eta") <- D1eta
#attr(res,"D1h") <- D1h
#attr(res,"D2h") <- D2h
#attr(res,"D2f") <- D2f
#attr(res,"D3f") <- D3f
}
if(all(is.finite(res))) last.par.ok <<- par
res
} ## end{ ff }
## Monte Carlo improvement of Laplace approximation
## Importance sampling from *fixed* reference measure determined
## by parameter "par0". Assumptions:
## * We know how to sample from the measure - see rmvnorm.
## * We know how to evaluate the density of the samples - see logdmvnorm.
## * Eventually "par0" will stabilize and become independent of the fixed
## effects "par", so that derivatives of the sample density and the samples
## are zero wrt. the fixed effects.
MC <- function(par=last.par, ## Point in which we are evaluating the likelihood
par0=last.par.best, ## Parameter for proposal distribution
n=100, ## Number of samples
order=0, ## Derivative order
seed=NULL, ## Random seed
antithetic=TRUE, ## Reduce variance
keep=FALSE, ## Keep samples and fct evals
phi=NULL, ## Function to calculate mean of
...){
if(is.numeric(seed))set.seed(seed)
## Clean up on exit
last.par.old <- last.par
last.par.best.old <- last.par.best
on.exit({last.par <<- last.par.old;
last.par.best <<- last.par.best.old})
## Update Cholesky needed by reference measure
h <- spHess(par0,random=TRUE)
L <- L.created.by.newton
updateCholesky(L,h) ## P %*% h %*% Pt = L %*% Lt
rmvnorm <- function(n){
u <- matrix(rnorm(ncol(L)*n),ncol(L),n)
u <- solve(L,u,system="Lt") ## Solve Lt^-1 %*% u
u <- solve(L,u,system="Pt") ## Multiply Pt %*% u
as.matrix(u)
}
M.5.log2pi <- -.5* log(2*pi) # = log(1/sqrt(2*pi))
logdmvnorm <- function(u){
logdetH.5 <- determinant(L, logarithm=TRUE, sqrt=TRUE)$modulus # = log(det(L)) = .5 * log(det(H))
nrow(h)*M.5.log2pi + logdetH.5 - .5*colSums(u*as.matrix(h %*% u))
}
eval.target <- function(u,order=0){
par[random] <- u
f(par,order=order)
}
samples <- rmvnorm(n)
if(antithetic)samples <- cbind(samples,-samples) ## Antithetic variates
log.density.propose <- logdmvnorm(samples)
samples <- samples+par0[random]
log.density.target <- -apply(samples,2,eval.target)
log.density.target[is.nan(log.density.target)] <- -Inf
I <- log.density.target - log.density.propose
M <- max(I)
if(order>=1){
vec <- exp(I-M)
p <- vec/sum(vec)
i <- (p>0)
p <- p[i]
I1 <- apply(samples[,i,drop=FALSE],2,eval.target,order=1)[-random,,drop=FALSE]