/
utilities.R
1277 lines (1195 loc) · 49.2 KB
/
utilities.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
if((Rv <- getRversion()) < "3.2.1") {
lengths <- function (x, use.names = TRUE) vapply(x, length, 1L, USE.NAMES = use.names)
if(Rv < "3.1.0") {
anyNA <- function(x) any(is.na(x))
if(Rv < "3.0.0") {
rep_len <- function(x, length.out) rep(x, length.out=length.out)
if(Rv < "2.15")
paste0 <- function(...) paste(..., sep = '')
}
}
} ## R < 3.2.1
rm(Rv)
## From Matrix package isDiagonal(.) :
all0 <- function(x) !anyNA(x) && all(!x)
.isDiagonal.sq.matrix <- function(M, n = dim(M)[1L])
all0(M[rep_len(c(FALSE, rep.int(TRUE,n)), n^2)])
### Utilities for parsing and manipulating mixed-model formulas
##' deparse(.) returning \bold{one} string
##' @note Protects against the possibility that results from deparse() will be
##' split after 'width.cutoff' (by default 60, maximally 500)
safeDeparse <- function(x, collapse=" ") paste(deparse(x, 500L), collapse=collapse)
abbrDeparse <- function(x, width=60) {
r <- deparse(x, width)
if(length(r) > 1) paste(r[1], "...") else r
}
##' @param bars result of findbars
barnames <- function(bars) vapply(bars, function(x) safeDeparse(x[[3]]), "")
makeFac <- function(x,char.only=FALSE) {
if (!is.factor(x) && (!char.only || is.character(x))) factor(x) else x
}
factorize <- function(x,frloc,char.only=FALSE) {
## convert grouping variables to factors as necessary
## TODO: variables that are *not* in the data frame are
## not converted -- these could still break, e.g. if someone
## tries to use the : operator
## TODO: some sensible tests for drop.unused.levels
## (not actually used, but could come in handy)
for (i in all.vars(RHSForm(x))) {
if (!is.null(curf <- frloc[[i]]))
frloc[[i]] <- makeFac(curf,char.only)
}
return(frloc)
}
colSort <- function(x) {
termlev <- vapply(strsplit(x,":"),length,integer(1))
iterms <- split(x,termlev)
iterms <- sapply(iterms,sort,simplify=FALSE)
## make sure intercept term is first
ilab <- "(Intercept)"
if (ilab %in% iterms[[1]]) {
iterms[[1]] <- c(ilab,setdiff(iterms[[1]],ilab))
}
unlist(iterms)
}
##' @param x a language object of the form effect | groupvar
##' @param frloc model frame
##' @param drop.unused.levels (logical)
##' @return list containing grouping factor, sparse model matrix, number of levels, names
mkBlist <- function(x,frloc, drop.unused.levels=TRUE,
reorder.vars=FALSE) {
frloc <- factorize(x,frloc)
## try to evaluate grouping factor within model frame ...
if (is.null(ff <- tryCatch(eval(substitute(makeFac(fac),
list(fac = x[[3]])), frloc),
error=function(e) NULL)))
stop("couldn't evaluate grouping factor ",
deparse(x[[3]])," within model frame:",
" try adding grouping factor to data ",
"frame explicitly if possible",call.=FALSE)
if (all(is.na(ff)))
stop("Invalid grouping factor specification, ",
deparse(x[[3]]),call.=FALSE)
## NB: *also* silently drops <NA> levels - and mkReTrms() and hence
## predict.merMod() have relied on that property :
if (drop.unused.levels) ff <- factor(ff, exclude=NA)
nl <- length(levels(ff))
## this section implements eq. 6 of the JSS lmer paper
## model matrix based on LHS of random effect term (X_i)
## x[[2]] is the LHS (terms) of the a|b formula
mm <- model.matrix(eval(substitute( ~ foo, list(foo = x[[2]]))), frloc)
if (reorder.vars) {
mm <- mm[colSort(colnames(mm)),]
}
## this is J^T (see p. 9 of JSS lmer paper)
## construct indicator matrix for groups by observations
## use fac2sparse() rather than as() to allow *not* dropping
## unused levels where desired
sm <- fac2sparse(ff, to = "d",
drop.unused.levels = drop.unused.levels)
sm <- KhatriRao(sm, t(mm))
dimnames(sm) <- list(
rep(levels(ff),each=ncol(mm)),
rownames(mm))
list(ff = ff, sm = sm, nl = nl, cnms = colnames(mm))
}
##' From the result of \code{\link{findbars}} applied to a model formula and
##' and the evaluation frame, create the model matrix, etc. associated with
##' random-effects terms. See the description of the returned value for a
##' detailed list.
##'
##' @title Create Z, Lambda, Lind, etc.
##' @param bars a list of parsed random-effects terms
##' @param fr a model frame in which to evaluate these terms
##' @return a list with components
##' \item{Zt}{transpose of the sparse model matrix for the random effects}
##' \item{Lambdat}{transpose of the sparse relative covariance factor}
##' \item{Lind}{an integer vector of indices determining the mapping of the
##' elements of the \code{theta} to the \code{"x"} slot of \code{Lambdat}}
##' \item{theta}{initial values of the covariance parameters}
##' \item{lower}{lower bounds on the covariance parameters}
##' \item{flist}{list of grouping factors used in the random-effects terms}
##' \item{cnms}{a list of column names of the random effects according to
##' the grouping factors}
##' @importFrom Matrix sparseMatrix drop0
##' @importMethodsFrom Matrix coerce rbind
##' @family utilities
##' @export
mkReTrms <- function(bars, fr, drop.unused.levels=TRUE,
reorder.terms=TRUE,
reorder.vars=FALSE) {
if (!length(bars))
stop("No random effects terms specified in formula",call.=FALSE)
stopifnot(is.list(bars), vapply(bars, is.language, NA),
inherits(fr, "data.frame"))
names(bars) <- barnames(bars)
term.names <- vapply(bars, safeDeparse, "")
## get component blocks
blist <- lapply(bars, mkBlist, fr, drop.unused.levels,
reorder.vars = reorder.vars)
nl <- vapply(blist, `[[`, 0L, "nl") # no. of levels per term
# (in lmer jss: \ell_i)
## order terms stably by decreasing number of levels in the factor
if (reorder.terms) {
if (any(diff(nl) > 0)) {
ord <- rev(order(nl))
blist <- blist [ord]
nl <- nl [ord]
term.names <- term.names[ord]
}
}
Ztlist <- lapply(blist, `[[`, "sm")
Zt <- do.call(rbind, Ztlist) ## eq. 7, JSS lmer paper
names(Ztlist) <- term.names
q <- nrow(Zt)
## Create and install Lambdat, Lind, etc. This must be done after
## any potential reordering of the terms.
cnms <- lapply(blist, `[[`, "cnms") # list of column names of the
# model matrix per term
nc <- lengths(cnms) # no. of columns per term
# (in lmer jss: p_i)
nth <- as.integer((nc * (nc+1))/2) # no. of parameters per term
# (in lmer jss: ??)
nb <- nc * nl # no. of random effects per term
# (in lmer jss: q_i)
## eq. 5, JSS lmer paper
if (sum(nb) != q) {
stop(sprintf("total number of RE (%d) not equal to nrow(Zt) (%d)",
sum(nb),q))
}
boff <- cumsum(c(0L, nb)) # offsets into b
thoff <- cumsum(c(0L, nth)) # offsets into theta
### FIXME: should this be done with cBind and avoid the transpose
### operator? In other words should Lambdat be generated directly
### instead of generating Lambda first then transposing?
Lambdat <-
t(do.call(sparseMatrix,
do.call(rbind,
lapply(seq_along(blist), function(i)
{
mm <- matrix(seq_len(nb[i]), ncol = nc[i],
byrow = TRUE)
dd <- diag(nc[i])
ltri <- lower.tri(dd, diag = TRUE)
ii <- row(dd)[ltri]
jj <- col(dd)[ltri]
## unused: dd[cbind(ii, jj)] <- seq_along(ii)
data.frame(i = as.vector(mm[, ii]) + boff[i],
j = as.vector(mm[, jj]) + boff[i],
x = as.double(rep.int(seq_along(ii),
rep.int(nl[i], length(ii))) +
thoff[i]))
}))))
thet <- numeric(sum(nth))
ll <- list(Zt = drop0(Zt), theta = thet, Lind = as.integer(Lambdat@x),
Gp = unname(c(0L, cumsum(nb))))
## lower bounds on theta elements are 0 if on diagonal, else -Inf
ll$lower <- -Inf * (thet + 1)
ll$lower[unique(diag(Lambdat))] <- 0
ll$theta[] <- is.finite(ll$lower) # initial values of theta are 0 off-diagonal, 1 on
Lambdat@x[] <- ll$theta[ll$Lind] # initialize elements of Lambdat
ll$Lambdat <- Lambdat
# massage the factor list
fl <- lapply(blist, `[[`, "ff")
# check for repeated factors
fnms <- names(fl)
if (length(fnms) > length(ufn <- unique(fnms))) {
fl <- fl[match(ufn, fnms)]
asgn <- match(fnms, ufn)
} else asgn <- seq_along(fl)
names(fl) <- ufn
## DON'T need fl to be a data.frame ...
## fl <- do.call(data.frame, c(fl, check.names = FALSE))
attr(fl, "assign") <- asgn
ll$flist <- fl
ll$cnms <- cnms
ll$Ztlist <- Ztlist
ll
} ## {mkReTrms}
##' Create an lmerResp, glmResp or nlsResp instance
##'
##' @title Create an lmerResp, glmResp or nlsResp instance
##' @param fr a model frame
##' @param REML logical scalar, value of REML for an lmerResp instance
##' @param family the optional glm family (glmResp only)
##' @param nlenv the nonlinear model evaluation environment (nlsResp only)
##' @param nlmod the nonlinear model function (nlsResp only)
##' @param ... where to look for response information if \code{fr} is missing.
##' Can contain a model response, \code{y}, offset, \code{offset}, and weights,
##' \code{weights}.
##' @return an lmerResp or glmResp or nlsResp instance
##' @family utilities
##' @export
mkRespMod <- function(fr, REML=NULL, family = NULL, nlenv = NULL, nlmod = NULL, ...)
{
if(!missing(fr)) {
y <- model.response(fr)
offset <- model.offset(fr)
weights <- model.weights(fr)
N <- n <- nrow(fr)
etastart_update <- model.extract(fr, "etastart")
mustart_update <- model.extract(fr, "mustart")
} else {
fr <- list(...)
y <- fr$y
N <- n <- NROW(y)
offset <- fr$offset
weights <- fr$weights
etastart_update <- fr$etastart
mustart_update <- fr$mustart
}
if(length(dim(y)) == 1L)
y <- drop(y) ## avoid problems with 1D arrays and keep names
if(isGLMM <- !is.null(family))
stopifnot(inherits(family, "family"))
## FIXME: may need to add X, or pass it somehow, if we want to use glm.fit
## test for non-numeric response here to avoid later
## confusing error messages from deeper machinery
if (!is.null(y)) { ## 'y' may be NULL if we're doing simulation
if(!(is.numeric(y) ||
((is.binom <- isGLMM && family$family == "binomial") &&
(is.factor(y) || is.logical(y))))) {
if (is.binom)
stop("response must be numeric or factor")
else {
if (is.logical(y))
y <- as.integer(y)
else stop("response must be numeric")
}
}
if(!all(is.finite(y)))
stop("NA/NaN/Inf in 'y'") # same msg as from lm.fit()
}
rho <- new.env()
rho$y <- if (is.null(y)) numeric(0) else y
if (!is.null(REML)) rho$REML <- REML
rho$etastart <- etastart_update
rho$mustart <- mustart_update
rho$start <- attr(fr,"start")
if (!is.null(nlenv)) {
stopifnot(is.language(nlmod),
is.environment(nlenv),
is.numeric(val <- eval(nlmod, nlenv)),
length(val) == n,
## FIXME? Restriction, not present in ole' nlme():
is.matrix(gr <- attr(val, "gradient")),
is.numeric(gr),
nrow(gr) == n,
!is.null(pnames <- colnames(gr)))
N <- length(gr)
rho$mu <- as.vector(val)
rho$sqrtXwt <- as.vector(gr)
rho$gam <- ## FIXME more efficient mget(pnames, envir=nlenv)
unname(unlist(lapply(pnames,
function(nm) get(nm, envir=nlenv))))
}
rho$offset <- if (!is.null(offset)) {
if (length(offset) == 1L) offset <- rep.int(offset, N)
else stopifnot(length(offset) == N)
unname(offset)
} else rep.int(0, N)
rho$weights <- if (!is.null(weights)) {
stopifnot(length(weights) == n, all(weights >= 0))
unname(weights)
} else rep.int(1, n)
if(isGLMM) {
## need weights for initializing evaluation
rho$nobs <- n
## allow trivial objects, e.g. for simulation
if (length(y)>0) eval(family$initialize, rho)
## ugh. this *is* necessary;
## family$initialize *ignores* mustart in env, overwrites!
## see ll 180-182 of src/library/stats/R/glm.R
## https://github.com/wch/r-source/search?utf8=%E2%9C%93&q=mukeep
if (!is.null(mustart_update)) rho$mustart <- mustart_update
## family$initialize <- NULL # remove clutter from str output
ll <- as.list(rho)
ans <- do.call(new, c(list(Class="glmResp", family=family),
ll[setdiff(names(ll), c("m", "nobs", "mustart"))]))
if (length(y)>0)
ans$updateMu(if (!is.null(es <- etastart_update)) es else family$linkfun(rho$mustart))
ans
} else if (is.null(nlenv)) ## lmer
do.call(lmerResp$new, as.list(rho))
else ## nlmer
do.call(nlsResp$new,
c(list(nlenv=nlenv,
nlmod=substitute(~foo, list(foo=nlmod)),
pnames=pnames), as.list(rho)))
}
##' From the right hand side of a formula for a mixed-effects model,
##' determine the pairs of expressions that are separated by the
##' vertical bar operator. Also expand the slash operator in grouping
##' factor expressions and expand terms with the double vertical bar operator
##' into separate, independent random effect terms.
##'
##' @title Determine random-effects expressions from a formula
##' @seealso \code{\link{formula}}, \code{\link{model.frame}}, \code{\link{model.matrix}}.
##' @param term a mixed-model formula
##' @return pairs of expressions that were separated by vertical bars
##' @section Note: This function is called recursively on individual
##' terms in the model, which is why the argument is called \code{term} and not
##' a name like \code{form}, indicating a formula.
##' @example
##' findbars(f1 <- Reaction ~ Days + (Days|Subject))
##' ## => list( Days | Subject )
##' findbars(y ~ Days + (1|Subject) + (0+Days|Subject))
##' ## => list of length 2: list ( 1 | Subject , 0+Days|Subject)
##' findbars(~ 1 + (1|batch/cask))
##' ## => list of length 2: list ( 1 | cask:batch , 1 | batch)
##' identical(findbars(~ 1 + (Days || Subject)),
##' findbars(~ 1 + (1|Subject) + (0+Days|Subject)))
##' \dontshow{
##' stopifnot(identical(findbars(f1),
##' list(expression(Days | Subject)[[1]])))
##' }
##' @family utilities
##' @keywords models utilities
##' @export
findbars <- function(term)
{
## Recursive function applied to individual terms
fb <- function(term)
{
if (is.name(term) || !is.language(term)) return(NULL)
if (term[[1]] == as.name("(")) return(fb(term[[2]]))
stopifnot(is.call(term))
if (term[[1]] == as.name('|')) return(term)
if (length(term) == 2) return(fb(term[[2]]))
c(fb(term[[2]]), fb(term[[3]]))
}
## Expand any slashes in the grouping factors returned by fb
expandSlash <- function(bb)
{
## Create the interaction terms for nested effects
makeInteraction <- function(x)
{
if (length(x) < 2) return(x)
trm1 <- makeInteraction(x[[1]])
trm11 <- if(is.list(trm1)) trm1[[1]] else trm1
list(substitute(foo:bar, list(foo=x[[2]], bar = trm11)), trm1)
}
## Return the list of '/'-separated terms
slashTerms <- function(x)
{
if (!("/" %in% all.names(x))) return(x)
if (x[[1]] != as.name("/"))
stop("unparseable formula for grouping factor",call.=FALSE)
list(slashTerms(x[[2]]), slashTerms(x[[3]]))
}
if (!is.list(bb))
expandSlash(list(bb))
else
unlist(lapply(bb, function(x) {
if (length(x) > 2 && is.list(trms <- slashTerms(x[[3]])))
## lapply(unlist(...)) - unlist returns a flattened list
lapply(unlist(makeInteraction(trms)),
function(trm) substitute(foo|bar, list(foo = x[[2]], bar = trm)))
else x
}))
}## {expandSlash}
modterm <- expandDoubleVerts(
if(is(term, "formula")) term[[length(term)]] else term)
expandSlash(fb(modterm))
}
##' From the right hand side of a formula for a mixed-effects model,
##' expand terms with the double vertical bar operator
##' into separate, independent random effect terms.
##'
##' @title Expand terms with \code{'||'} notation into separate \code{'|'} terms
##' @seealso \code{\link{formula}}, \code{\link{model.frame}}, \code{\link{model.matrix}}.
##' @param term a mixed-model formula
##' @return the modified term
##' @family utilities
##' @keywords models utilities
##' @export
expandDoubleVerts <- function(term)
{
expandDoubleVert <- function(term) {
frml <- formula(substitute(~x,list(x=term[[2]])))
## FIXME: do this without paste and deparse if possible!
## need term.labels not all.vars to capture interactions too:
newtrms <- paste0("0+", attr(terms(frml), "term.labels"))
if(attr(terms(frml), "intercept")!=0)
newtrms <- c("1", newtrms)
as.formula(paste("~(",
paste(vapply(newtrms, function(trm)
paste0(trm, "|", deparse(term[[3]])), ""),
collapse=")+("), ")"))[[2]]
}
if (!is.name(term) && is.language(term)) {
if (term[[1]] == as.name("(")) {
term[[2]] <- expandDoubleVerts(term[[2]])
}
stopifnot(is.call(term))
if (term[[1]] == as.name('||'))
return( expandDoubleVert(term) )
## else :
term[[2]] <- expandDoubleVerts(term[[2]])
if (length(term) != 2) {
if(length(term) == 3)
term[[3]] <- expandDoubleVerts(term[[3]])
}
}
term
}
##' Remove the random-effects terms from a mixed-effects formula,
##' thereby producing the fixed-effects formula.
##'
##' @title Omit terms separated by vertical bars in a formula
##' @param term the right-hand side of a mixed-model formula
##' @return the fixed-effects part of the formula
##' @section Note: This function is called recursively on individual
##' terms in the model, which is why the argument is called \code{term} and not
##' a name like \code{form}, indicating a formula.
##' @examples
##' nobars(Reaction ~ Days + (Days|Subject)) ## => Reaction ~ Days
##' @seealso \code{\link{formula}}, \code{\link{model.frame}}, \code{\link{model.matrix}}.
##' @family utilities
##' @keywords models utilities
##' @export
nobars <- function(term) {
nb <- nobars_(term) ## call recursive version
if (is(term,"formula") && length(term)==3 && is.symbol(nb)) {
## called with two-sided RE-only formula:
## construct response~1 formula
nb <- reformulate("1",response=deparse(nb))
}
## called with one-sided RE-only formula, or RHS alone
if (is.null(nb)) {
nb <- if (is(term,"formula")) ~1 else 1
}
nb
}
nobars_ <- function(term)
{
if (!anyBars(term)) return(term)
if (isBar(term)) return(NULL)
if (isAnyArgBar(term)) return(NULL)
if (length(term) == 2) {
nb <- nobars_(term[[2]])
if(is.null(nb)) return(NULL)
term[[2]] <- nb
return(term)
}
nb2 <- nobars_(term[[2]])
nb3 <- nobars_(term[[3]])
if (is.null(nb2)) return(nb3)
if (is.null(nb3)) return(nb2)
term[[2]] <- nb2
term[[3]] <- nb3
term
}
isBar <- function(term) {
if(is.call(term)) {
if((term[[1]] == as.name("|")) || (term[[1]] == as.name("||"))) {
return(TRUE)
}
}
FALSE
}
isAnyArgBar <- function(term) {
if ((term[[1]] != as.name("~")) && (term[[1]] != as.name("("))) {
for(i in seq_along(term)) {
if(isBar(term[[i]])) return(TRUE)
}
}
FALSE
}
anyBars <- function(term) {
any(c('|','||') %in% all.names(term))
}
##' Substitute the '+' function for the '|' and '||' function in a mixed-model
##' formula. This provides a formula suitable for the current
##' model.frame function.
##'
##' @title "Sub[stitute] Bars"
##' @param term a mixed-model formula
##' @return the formula with all | and || operators replaced by +
##' @section Note: This function is called recursively on individual
##' terms in the model, which is why the argument is called \code{term} and not
##' a name like \code{form}, indicating a formula.
##' @examples
##' subbars(Reaction ~ Days + (Days|Subject)) ## => Reaction ~ Days + (Days + Subject)
##' @seealso \code{\link{formula}}, \code{\link{model.frame}}, \code{\link{model.matrix}}.
##' @family utilities
##' @keywords models utilities
##' @export
subbars <- function(term)
{
if (is.name(term) || !is.language(term)) return(term)
if (length(term) == 2) {
term[[2]] <- subbars(term[[2]])
return(term)
}
stopifnot(length(term) >= 3)
if (is.call(term) && term[[1]] == as.name('|'))
term[[1]] <- as.name('+')
if (is.call(term) && term[[1]] == as.name('||'))
term[[1]] <- as.name('+')
for (j in 2:length(term)) term[[j]] <- subbars(term[[j]])
term
}
##' Does every level of f1 occur in conjunction with exactly one level
##' of f2? The function is based on converting a triplet sparse matrix
##' to a compressed column-oriented form in which the nesting can be
##' quickly evaluated.
##'
##' @title Is f1 nested within f2?
##'
##' @param f1 factor 1
##' @param f2 factor 2
##'
##' @return TRUE if factor 1 is nested within factor 2
##' @examples
##' with(Pastes, isNested(cask, batch)) ## => FALSE
##' with(Pastes, isNested(sample, batch)) ## => TRUE
##' @export
isNested <- function(f1, f2)
{
f1 <- as.factor(f1)
f2 <- as.factor(f2)
stopifnot(length(f1) == length(f2))
k <- length(levels(f1))
sm <- as(new("ngTMatrix",
i = as.integer(f2) - 1L,
j = as.integer(f1) - 1L,
Dim = c(length(levels(f2)), k)),
"CsparseMatrix")
all(sm@p[2:(k+1L)] - sm@p[1:k] <= 1L)
}
subnms <- function(form, nms) {
## Recursive function applied to individual terms
sbnm <- function(term)
{
if (is.name(term)) {
if (any(term == nms)) 0 else term
} else switch(length(term),
term, ## 1
{ ## 2
term[[2]] <- sbnm(term[[2]])
term
},
{ ## 3
term[[2]] <- sbnm(term[[2]])
term[[3]] <- sbnm(term[[3]])
term
})
}
sbnm(form)
}
##' Check for a constant term (a literal 1) in an expression
##
##' In the mixed-effects part of a nonlinear model formula, a constant
##' term is not meaningful because every term must be relative to a
##' nonlinear model parameter. This function recursively checks the
##' expressions in the formula for a a constant, calling stop() if
##' such a term is encountered.
##' @title Check for constant terms.
##' @param expr an expression
##' @return NULL. The function is executed for its side effect.
chck1 <- function(expr) {
if ((le <- length(expr)) == 1) {
if (is.numeric(expr) && expr == 1)
stop("1 is not meaningful in a nonlinear model formula")
return()
} else
for (j in seq_len(le)[-1]) Recall(expr[[j]])
}
## ---> ../man/nlformula.Rd --- Manipulate a nonlinear model formula
##' @param mc matched call from the caller, with arguments 'formula','start',...
##' @return a list with components "respMod", "frame", "X", "reTrms"
nlformula <- function(mc) {
start <- eval(mc$start, parent.frame(2L))
if (is.numeric(start)) start <- list(nlpars = start)
stopifnot(is.numeric(nlpars <- start$nlpars),
lengths(nlpars) == 1L,
length(pnames <- names(nlpars)) == length(nlpars),
length(form <- as.formula(mc$formula)) == 3L,
is(nlform <- eval(form[[2]]), "formula"),
pnames %in% all.vars(nlmod <-
as.call(nlform[[lnl <- length(nlform)]])))
## MM{FIXME}: fortune(106) even twice in here!
nlform[[lnl]] <- parse(text= paste(setdiff(all.vars(form), pnames), collapse=' + '))[[1]]
nlform <- eval(nlform)
environment(nlform) <- environment(form)
m <- match(c("data", "subset", "weights", "na.action", "offset"),
names(mc), 0)
mc <- mc[c(1, m)]
mc$drop.unused.levels <- TRUE
mc[[1L]] <- quote(stats::model.frame)
mc$formula <- nlform
fr <- eval(mc, parent.frame(2L))
n <- nrow(fr)
nlenv <- list2env(fr, parent=parent.frame(2L))
lapply(pnames, function(nm) nlenv[[nm]] <- rep.int(nlpars[[nm]], n))
respMod <- mkRespMod(fr, nlenv=nlenv, nlmod=nlmod)
chck1(meform <- form[[3L]])
pnameexpr <- parse(text=paste(pnames, collapse='+'))[[1]]
nb <- nobars_(meform) ## call ORIGINAL recursive form
fe <- eval(substitute(~ 0 + nb + pnameexpr))
environment(fe) <- environment(form)
frE <- do.call(rbind, lapply(seq_along(nlpars), function(i) fr)) # rbind s copies of the frame
for (nm in pnames) # convert these variables in fr to indicators
frE[[nm]] <- as.numeric(rep(nm == pnames, each = n))
X <- model.matrix(fe, frE)
rownames(X) <- NULL
reTrms <- mkReTrms(lapply(findbars(meform),
function(expr) {
expr[[2]] <- substitute(0+foo, list(foo=expr[[2]]))
expr
}), frE)
list(respMod=respMod, frame=fr, X=X, reTrms=reTrms, pnames=pnames)
} ## {nlformula}
################################################################################
## Beginning to think about exposing tools to create devcomp lists.
## Could be useful when extending merMod objects. Commenting them out
## however, because R CMD check is complaining:
## https://github.com/lme4/lme4/commit/8d71e439758999ea8f90eb4752487e189407ef33#commitcomment-8773017
################################################################################
##
## .dims <- function(pp, resp, nAGQ,
## reTrms, n, p, rcl,
## compDev = NULL) {
## if(missing(rcl)) rcl <- class(resp)
## if(missing(n)) n <- nrow(pp$V)
## if(missing(p)) p <- ncol(pp$V)
## c(N=nrow(pp$X), n=n, p=p, nmp=n-p,
## nth=length(pp$theta), q=nrow(pp$Zt),
## nAGQ=rho$nAGQ,
## compDev=rho$compDev,
## ## 'use scale' in the sense of whether dispersion parameter should
## ## be reported/used (*not* whether theta should be scaled by sigma)
## useSc=(rcl != "glmResp" ||
## !resp$family$family %in% c("poisson","binomial")),
## reTrms=length(reTrms$cnms),
## spFe=0L,
## REML=if (rcl=="lmerResp") resp$REML else 0L,
## GLMM=(rcl=="glmResp"),
## NLMM=(rcl=="nlsResp"))
## }
##
## .cmp <- function(pp, resp, dims, fval,
## wrss, sqrLenU, pwrss,
## sigmaML, rcl, fac,
## tolPwrss = NULL,
## trivial.y = FALSE) {
## if(missing(rcl)) rcl <- class(resp)
## if(missing(fac)) fac <- as.numeric(rcl != "nlsResp")
## if(missing(wrss)) wrss <- resp$wrss()
## if(missing(sqrLenU)) sqrLenU <- pp$sqrL(fac)
## if(missing(pwrss)) pwrss <- wrss + sqrLenU
## if(missing(sigmaML)) sigmaML <- pwrss/dims['n']
## c(ldL2=pp$ldL2(), ldRX2=pp$ldRX2(), wrss=wrss,
## ussq=sqrLenU, pwrss=pwrss,
## drsum=if (rcl=="glmResp" && !trivial.y) resp$resDev() else NA,
## REML=if (rcl=="lmerResp" && resp$REML != 0L && !trivial.y)
## opt$fval else NA,
## ## FIXME: construct 'REML deviance' here?
## dev=if (rcl=="lmerResp" && resp$REML != 0L || trivial.y) NA else opt$fval,
## sigmaML=sqrt(unname(if (!dims["useSc"] || trivial.y) NA else sigmaML)),
## sigmaREML=sqrt(unname(if (rcl!="lmerResp" || trivial.y) NA else sigmaML*(dims['n']/dims['nmp']))),
## tolPwrss=rho$tolPwrss)
## }
################################################################################
.minimalOptinfo <- function()
list(conv = list(opt = 0L,
lme4 = list(messages = character(0))))
getConv <- function(x) {
if (!is.null(x[["conv"]])) {
x[["conv"]]
} else x[["convergence"]]
}
getMsg <- function(x) {
if (!is.null(x[["msg"]])) {
x[["msg"]]
} else if (!is.null(x[["message"]])) {
x[["message"]]
} else ""
}
.optinfo <- function(opt, lme4conv=NULL)
list(optimizer = attr(opt, "optimizer"),
control = attr(opt, "control"),
derivs = attr(opt, "derivs"),
conv = list(opt = getConv(opt), lme4 = lme4conv),
feval = if (is.null(opt$feval)) NA else opt$feval,
message = getMsg(opt),
warnings = attr(opt, "warnings"),
val = opt$par)
##' Potentially needed in more than one place, be sure to keep consistency!
##' hack (NB families have weird names) from @aosmith16; then corrected
isNBfamily <- function(familyString)
grepl("^Negative ?Binomial", familyString, ignore.case=TRUE)
normalizeFamilyName <- function(family) { # such as object@resp$family
if(isNBfamily(family$family))
family$family <- "negative.binomial"
family
}
##' Is it a family with no scale parameter
hasNoScale <- function(family)
any(substr(family$family, 1L, 12L)
== c("poisson", "binomial", "negative.bin", "Negative Bin"))
##--> ../man/mkMerMod.Rd ---Create a merMod object
##' @param rho the environment of the objective function
##' @param opt the value returned by the optimizer
##' @param reTrms reTrms list from the calling function
mkMerMod <- function(rho, opt, reTrms, fr, mc, lme4conv=NULL) {
if(missing(mc)) mc <- match.call()
stopifnot(is.environment(rho),
is(pp <- rho$pp, "merPredD"),
is(resp <- rho$resp, "lmResp"),
is.list(opt), "par" %in% names(opt),
c("conv", "fval") %in% substr(names(opt),1,4), ## "conv[ergence]", "fval[ues]"
is.list(reTrms), c("flist", "cnms", "Gp", "lower") %in% names(reTrms),
length(rcl <- class(resp)) == 1)
n <- nrow(pp$V)
p <- ncol(pp$V)
isGLMM <- (rcl == "glmResp")
dims <- c(N = nrow(pp$X), n=n, p=p, nmp = n-p, q = nrow(pp$Zt),
nth = length(pp$theta),
nAGQ= rho$nAGQ,
compDev=rho$compDev,
## 'use scale' in the sense of whether dispersion parameter should
## be reported/used (*not* whether theta should be scaled by sigma)
useSc = !(isGLMM && hasNoScale(resp$family)),
reTrms=length(reTrms$cnms),
spFe= 0L,
REML = if (rcl=="lmerResp") resp$REML else 0L,
GLMM= isGLMM,
NLMM= (rcl=="nlsResp"))
storage.mode(dims) <- "integer"
fac <- as.numeric(rcl != "nlsResp")
if (trivial.y <- (length(resp$y)==0)) {
## trivial model
sqrLenU <- wrss <- pwrss <- NA
} else {
sqrLenU <- pp$sqrL(fac)
wrss <- resp$wrss()
pwrss <- wrss + sqrLenU
}
## weights <- resp$weights
beta <- pp$beta(fac)
## rescale
if (!is.null(sc <- attr(pp$X, "scaled:scale"))) {
warning("auto(un)scaling not yet finished/tested")
## FIXME: test/handle no-intercept models
## (only need to worry if we do centering as well as scaling)
## FIXME: adjust Hessian/vcov
## FIXME: where else will these changes propagate?
## profiling?
beta2 <- beta
beta2[names(sc)] <- sc*beta2[names(sc)]
beta <- beta2
}
if (!is.null(attr(pp$X, "scaled:center"))) {
warning("auto(un)centering not yet implemented")
}
#sigmaML <- pwrss/sum(weights)
sigmaML <- pwrss/n
if (rcl != "lmerResp") {
pars <- opt$par
if (length(pars) > length(pp$theta)) beta <- pars[-(seq_along(pp$theta))]
}
cmp <- c(ldL2=pp$ldL2(), ldRX2=pp$ldRX2(), wrss=wrss,
ussq=sqrLenU, pwrss=pwrss,
drsum=if (rcl=="glmResp" && !trivial.y) resp$resDev() else NA,
REML=if (rcl=="lmerResp" && resp$REML != 0L && !trivial.y)
opt$fval else NA,
## FIXME: construct 'REML deviance' here?
dev=if (rcl=="lmerResp" && resp$REML != 0L || trivial.y) NA else opt$fval,
sigmaML=sqrt(unname(if (!dims["useSc"] || trivial.y) NA else sigmaML)),
sigmaREML=sqrt(unname(if (rcl!="lmerResp" || trivial.y) NA else
sigmaML*(dims['n']/dims['nmp']))),
tolPwrss=rho$tolPwrss)
## TODO: improve this hack to get something in frame slot (maybe need weights, etc...)
if(missing(fr)) fr <- data.frame(resp$y)
new(switch(rcl, lmerResp = "lmerMod", glmResp = "glmerMod", nlsResp = "nlmerMod"),
call=mc, frame=fr, flist=reTrms$flist, cnms=reTrms$cnms,
Gp=reTrms$Gp, theta=pp$theta, beta=beta,
u=if (trivial.y) rep(NA_real_,nrow(pp$Zt)) else pp$u(fac),
lower=reTrms$lower, devcomp=list(cmp=cmp, dims=dims),
pp=pp, resp=resp,
optinfo = .optinfo(opt, lme4conv))
}## {mkMerMod}
## generic argument checking
## 'type': name of calling function ("glmer", "lmer", "nlmer")
##
## NB: called from lFormula() and glFormula()
checkArgs <- function(type,...) {
l... <- list(...)
if (isTRUE(l...[["sparseX"]])) warning("sparseX = TRUE has no effect at present",call.=FALSE)
## '...' handling up front, safe-guarding against typos ("familiy") :
if(length(l... <- list(...))) {
if (!is.null(l...[["family"]])) { # call glmer if family specified
## we will only get here if 'family' is *not* in the arg list
warning("calling lmer with family() is deprecated: please use glmer() instead",call.=FALSE)
type <- "glmer"
}
## Check for method argument which is no longer used
## (different meanings/hints depending on glmer vs lmer)
if (!is.null(l...[["method"]])) {
msg <- paste("Argument", sQuote("method"), "is deprecated.")
if (type == "lmer")
msg <- paste(msg, "Use the REML argument to specify ML or REML estimation.")
else if (type == "glmer")
msg <- paste(msg, "Use the nAGQ argument to specify Laplace (nAGQ=1) or adaptive",
"Gauss-Hermite quadrature (nAGQ>1). PQL is no longer available.")
warning(msg,call.=FALSE)
l... <- l...[names(l...) != "method"]
}
if(length(l...)) {
warning("extra argument(s) ",
paste(sQuote(names(l...)), collapse=", "),
" disregarded",call.=FALSE)
}
}
}
## check formula and data: return an environment suitable for evaluating
## the formula.
## (1) if data is specified, return it
## (2) otherwise, if formula has an environment, use it
## (3) otherwise [e.g. if formula was passed as a string], try to use parent.frame(2)
## if #3 is true *and* the user is doing something tricky with nested functions,
## this may fail ...
## try to diagnose missing/bad data
checkFormulaData <- function(formula, data, checkLHS=TRUE,
checkData=TRUE, debug=FALSE) {
wd <- tryCatch(force(data), error = identity)
if (bad.data <- inherits(wd,"error")) {
bad.data.msg <- wd$message
}
## data not found (this *should* only happen with garbage input,
## OR when strings used as formulae -> drop1/update/etc.)
##
if (bad.data || debug) {
varex <- function(v, env) exists(v, envir=env, inherits=FALSE)
allvars <- all.vars(as.formula(formula))
allvarex <- function(env, vvec=allvars) all(vapply(vvec, varex, NA, env))
}
if (bad.data) { ## Choose helpful error message:
if (allvarex(environment(formula))) {
stop("bad 'data', but variables found in environment of formula: ",
"try specifying 'formula' as a formula rather ",
"than a string in the original model",call.=FALSE)
} else {
stop("bad 'data': ", bad.data.msg, call. = FALSE)
}
} else {
denv <- ## The data as environment
if (is.null(data)) {
if (!is.null(ee <- environment(formula))) {
ee ## use environment of formula
} else {
## e.g. no environment, e.g. because formula is a character vector
## parent.frame(2L) works because [g]lFormula (our calling environment)
## has been called within [g]lmer with env=parent.frame(1L)
## If you call checkFormulaData in some other bizarre way such that
## parent.frame(2L) is *not* OK, you deserve what you get
## calling checkFormulaData directly from the global
## environment should be OK, since trying to go up beyond the global
## environment keeps bringing you back to the global environment ...
parent.frame(2L)
}
} else ## data specified
list2env(data)
}
##
## FIXME: set enclosing environment of denv to environment(formula), or parent.frame(2L) ?
if (debug) {
cat("Debugging parent frames in checkFormulaData:\n")
## find global environment -- could do this with sys.nframe() ?
glEnv <- 1L
while (!identical(parent.frame(glEnv),.GlobalEnv)) {
glEnv <- glEnv+1L
}
## where are vars?
for (i in 1:glEnv) {
OK <- allvarex(parent.frame(i))
cat("vars exist in parent frame ", i)
if (i == glEnv) cat(" (global)")
cat(" ",OK, "\n")
}
cat("vars exist in env of formula ", allvarex(denv), "\n")
} ## if (debug)
stopifnot(!checkLHS || length(as.formula(formula,env=denv)) == 3) ## check for two-sided formula
return(denv)
}
## checkFormulaData <- function(formula,data) {
## ee <- environment(formula)
## if (is.null(ee)) {
## ee <- parent.frame(2)
## }
## if (missing(data)) data <- ee
## stopifnot(length(as.formula(formula,env=as.environment(data))) == 3)
## return(data)
## }
##' Not exported; for tests (and examples) that can be slow;
##' Use if(lme4:::testLevel() >= 1.) ..... see ../tests/README.md
testLevel <- function()
if(nzchar(s <- Sys.getenv("LME4_TEST_LEVEL")) &&
is.finite(s <- as.numeric(s))) s else 1
##' General conditional variance-covariance matrix
##'
##' Experimental function for estimating the variance-covariance
##' matrix of the random effects, conditional on the observed data
##' and at the (RE)ML estimate of the fixed effects and covariance
##' parameters. Applicable for any Lambda matrix, but slower than
##' other block-by-block methods.
##' Not exported.
##'
##' TODO:
##' - Write up quick note on theory (e.g. Laplace approximation).