Skip to content

Commit

Permalink
subfits: Fix an error thrown if newdata contains only a single
Browse files Browse the repository at this point in the history
level of a `factor`-coerced `character` variable.

Also fix a bug causing a re-ordering of the levels of a `factor` in
`newdata` (compared to that `factor` in the original dataset) not to get
recognized by `predict.subfit()`, which in turn causes
`x %*% rbind(alpha, beta)` to be incorrect in such cases.

Also fix an error thrown by `fit_glm_ridge_callback()` in case of unused levels
of a `factor`, namely:
```
warning: solve(): system is singular; attempting approx solution
Error: solve(): solution not found
```
  • Loading branch information
fweber144 committed Apr 4, 2023
1 parent 0686c1d commit fcf1408
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 15 deletions.
25 changes: 16 additions & 9 deletions R/divergence_minimizers.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,18 @@ fit_glm_ridge_callback <- function(formula, data,
projpred_var = matrix(nrow = nrow(data)),
projpred_regul = 1e-4, ...) {
# Preparations:
fr <- model.frame(formula, data = data)
fr <- model.frame(formula, data = data, drop.unused.levels = TRUE)
da_classes <- attr(attr(fr, "terms"), "dataClasses")
nms_chr_fac <- names(da_classes)[da_classes %in% c("character", "factor")]
resp_nm <- all.vars(attr(fr, "terms"))[attr(attr(fr, "terms"), "response")]
nms_chr_fac <- setdiff(nms_chr_fac, resp_nm)
if (length(nms_chr_fac) > 0) {
xlvls <- lapply(setNames(nm = nms_chr_fac), function(nm_chr_fac) {
levels(as.factor(fr[[nm_chr_fac]]))
})
} else {
xlvls <- NULL
}
# TODO: In the following model.matrix() call, allow user-specified contrasts
# to be passed to argument `contrasts.arg`. The `contrasts.arg` default
# (`NULL`) uses `options("contrasts")` internally, but it might be more
Expand All @@ -124,13 +135,8 @@ fit_glm_ridge_callback <- function(formula, data,
))
# Post-processing:
rownames(fit$beta) <- colnames(x)
sub <- nlist(
alpha = fit$beta0,
beta = fit$beta,
w = fit$w,
formula,
x, y
)
sub <- nlist(alpha = fit$beta0, beta = fit$beta, w = fit$w, formula, x, y,
xlvls)
class(sub) <- "subfit"
return(sub)
}
Expand Down Expand Up @@ -1067,7 +1073,8 @@ predict.subfit <- function(subfit, newdata = NULL) {
# (`NULL`) uses `options("contrasts")` internally, but it might be more
# convenient to let users specify contrasts directly. At that occasion,
# contrasts should also be tested thoroughly (not done until now).
x <- model.matrix(delete.response(terms(subfit$formula)), data = newdata)
x <- model.matrix(delete.response(terms(subfit$formula)), data = newdata,
xlev = subfit$xlvls)
if (is.null(beta)) {
return(as.matrix(rep(alpha, nrow(x))))
} else {
Expand Down
23 changes: 17 additions & 6 deletions R/search.R
Original file line number Diff line number Diff line change
Expand Up @@ -135,12 +135,26 @@ search_L1 <- function(p_ref, refmodel, nterms_max, penalty, opt) {
stop("L1 search cannot be used for an empty (i.e. intercept-only) ",
"full-model formula or `nterms_max = 0`.")
}
# Preparations:
fr <- model.frame(refmodel$formula, data = refmodel$fetch_data(),
drop.unused.levels = TRUE)
da_classes <- attr(attr(fr, "terms"), "dataClasses")
nms_chr_fac <- names(da_classes)[da_classes %in% c("character", "factor")]
resp_nm <- all.vars(attr(fr, "terms"))[attr(attr(fr, "terms"), "response")]
nms_chr_fac <- setdiff(nms_chr_fac, resp_nm)
if (length(nms_chr_fac) > 0) {
xlvls <- lapply(setNames(nm = nms_chr_fac), function(nm_chr_fac) {
levels(as.factor(fr[[nm_chr_fac]]))
})
} else {
xlvls <- NULL
}
# TODO: In the following model.matrix() call, allow user-specified contrasts
# to be passed to argument `contrasts.arg`. The `contrasts.arg` default
# (`NULL`) uses `options("contrasts")` internally, but it might be more
# convenient to let users specify contrasts directly. At that occasion,
# contrasts should also be tested thoroughly (not done until now).
x <- model.matrix(refmodel$formula, data = refmodel$fetch_data())
x <- model.matrix(refmodel$formula, data = fr)
x <- x[, colnames(x) != "(Intercept)", drop = FALSE]
## it's important to keep the original order because that's the order
## in which lasso will estimate the parameters
Expand Down Expand Up @@ -183,11 +197,8 @@ search_L1 <- function(p_ref, refmodel, nterms_max, penalty, opt) {
# re-use of `colnames(x)` should provide another sanity check:
x <- x[, colnames(x)[search_path$solution_terms[indices]], drop = FALSE]
}
sub <- nlist(alpha = search_path$alpha[nterms + 1],
beta,
w = search_path$w[, nterms + 1],
formula,
x)
sub <- nlist(alpha = search_path$alpha[nterms + 1], beta,
w = search_path$w[, nterms + 1], formula, x, xlvls)
class(sub) <- "subfit"
return(list(sub))
})
Expand Down

0 comments on commit fcf1408

Please sign in to comment.