Skip to content

Commit

Permalink
work on variances for multi-category outcomes (#60, #98)
Browse files Browse the repository at this point in the history
  • Loading branch information
leeper committed Jul 22, 2018
1 parent 3f32640 commit 248bae0
Show file tree
Hide file tree
Showing 15 changed files with 400 additions and 56 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ S3method(marginal_effects,lmerMod)
S3method(marginal_effects,loess)
S3method(marginal_effects,margins)
S3method(marginal_effects,merMod)
S3method(marginal_effects,multinom)
S3method(marginal_effects,nnet)
S3method(marginal_effects,polr)
S3method(margins,betareg)
Expand All @@ -32,6 +33,7 @@ S3method(margins,lm)
S3method(margins,lmerMod)
S3method(margins,loess)
S3method(margins,merMod)
S3method(margins,multinom)
S3method(margins,nnet)
S3method(margins,polr)
S3method(margins,svyglm)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# margins 0.3.24

* Added new function `margins_summary()` which provides a single-function expression of `summary(margins(...))`. (#94, h/t Mike DeCrescenzo)
* Added variances of marginal effects to "polr" objects from **MASS**. (#98, @eijoac)
* Fix a bug in `persp()` related to attempting to take the mean of a factor variable. (#93, h/t Jared Knowles)

# margins 0.3.23
Expand Down
6 changes: 2 additions & 4 deletions R/find_terms_in_model.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,8 @@ find_terms_in_model.default <- function(model, variables = NULL) {
}

# drop specially named "(weights)" variables
wts <- weights(model)
if (!is.null(wts)) {
classes <- classes[!names(classes) %in% "(weights)"]
}
classes <- classes[!names(classes) %in% "(weights)"]

# handle character variables as factors
classes[classes == "character"] <- "factor"
## cleanup names of terms
Expand Down
68 changes: 51 additions & 17 deletions R/get_effect_variances.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,16 @@ function(data,
...) {

# march.arg() for arguments
type <- match.arg(type)
if (!is.null(type)) {
type <- match.arg(type)
}
vce <- match.arg(vce)

# deploy appropriate vce procedure
if (vce == "none") {
return(list(variances = NULL, vcov = NULL, jacobian = NULL))
}

# setup vcov
if (is.function(vcov)) {
vcov <- vcov(model)
Expand All @@ -26,12 +33,7 @@ function(data,
varslist <- find_terms_in_model(model, variables = variables)
}

# deploy appropriate vce procedure
if (vce == "none") {

return(list(variances = NULL, vcov = NULL, jacobian = NULL))

} else if (vce == "delta") {
if (vce == "delta") {

# default method

Expand Down Expand Up @@ -60,7 +62,13 @@ function(data,
# sandwich
vc <- as.matrix(jacobian %*% vcov %*% t(jacobian))
} else {
# check that vcov() only contains coefficients from model
if (nrow(vcov) != length(coef(model))) {
vcov <- vcov[intersect(rownames(vcov), names(coef(model))), intersect(rownames(vcov), names(coef(model)))]
}

jacobian <- jacobian(FUN, coef(model)[names(coef(model)) %in% c("(Intercept)", colnames(vcov))], weights = weights, eps = eps)

# sandwich
vc <- jacobian %*% vcov %*% t(jacobian)
}
Expand All @@ -73,16 +81,29 @@ function(data,
tmpmodel <- model
tmpmodel[["model"]] <- NULL # remove data from model for memory

# check that vcov() only contains coefficients from model
if (nrow(vcov) != length(coef(model))) {
vcov <- vcov[intersect(rownames(vcov), names(coef(model))), intersect(rownames(vcov), names(coef(model)))]
}

# simulate from multivariate normal
coefmat <- MASS::mvrnorm(iterations, coef(model), vcov)

# estimate AME from from each simulated coefficient vector
effectmat <- apply(coefmat, 1, function(coefrow) {
tmpmodel[["coefficients"]] <- coefrow
if (is.null(weights)) {
means <- colMeans(marginal_effects(model = tmpmodel, data = data, variables = variables, type = type, eps = eps, varslist = varslist, ...), na.rm = TRUE)
if (is.null(type)) {
means <- colMeans(marginal_effects(model = tmpmodel, data = data, variables = variables, eps = eps, varslist = varslist, ...), na.rm = TRUE)
} else {
means <- colMeans(marginal_effects(model = tmpmodel, data = data, variables = variables, type = type, eps = eps, varslist = varslist, ...), na.rm = TRUE)
}
} else {
me_tmp <- marginal_effects(model = tmpmodel, data = data, variables = variables, type = type, eps = eps, varslist = varslist, ...)
if (is.null(type)) {
me_tmp <- marginal_effects(model = tmpmodel, data = data, variables = variables, eps = eps, varslist = varslist, ...)
} else {
me_tmp <- marginal_effects(model = tmpmodel, data = data, variables = variables, type = type, eps = eps, varslist = varslist, ...)
}
means <- unlist(stats::setNames(lapply(me_tmp, stats::weighted.mean, w = weights, na.rm = TRUE), names(me_tmp)))
}
if (!is.matrix(means)) {
Expand All @@ -104,15 +125,28 @@ function(data,
tmpmodel[["call"]][["data"]] <- data[samp,]
tmpmodel <- eval(tmpmodel[["call"]])
if (is.null(weights)) {
means <- colMeans(marginal_effects(model = tmpmodel,
data = data[samp,],
variables = variables,
type = type,
eps = eps,
varslist = varslist,
...), na.rm = TRUE)
if (is.null(type)) {
means <- colMeans(marginal_effects(model = tmpmodel,
data = data[samp,],
variables = variables,
eps = eps,
varslist = varslist,
...), na.rm = TRUE)
} else {
means <- colMeans(marginal_effects(model = tmpmodel,
data = data[samp,],
variables = variables,
type = type,
eps = eps,
varslist = varslist,
...), na.rm = TRUE)
}
} else {
me_tmp <- marginal_effects(model = tmpmodel, data = data[samp,], variables = variables, type = type, eps = eps, varslist = varslist, ...)
if (is.null(type)) {
me_tmp <- marginal_effects(model = tmpmodel, data = data[samp,], variables = variables, eps = eps, varslist = varslist, ...)
} else {
me_tmp <- marginal_effects(model = tmpmodel, data = data[samp,], variables = variables, type = type, eps = eps, varslist = varslist, ...)
}
means <- unlist(stats::setNames(lapply(me_tmp, stats::weighted.mean, w = weights, na.rm = TRUE), names(me_tmp)))
}
means
Expand Down
12 changes: 10 additions & 2 deletions R/gradient_factory.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,20 @@ gradient_factory.default <- function(data, model, variables = NULL, type = "resp
model <- reset_coefs(model, coefs)
if (is.null(weights)) {
# build matrix of unit-specific marginal effects
me_tmp <- marginal_effects(model = model, data = data, variables = variables, type = type, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
if (is.null(type)) {
me_tmp <- marginal_effects(model = model, data = data, variables = variables, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
} else {
me_tmp <- marginal_effects(model = model, data = data, variables = variables, type = type, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
}
# apply colMeans to get average marginal effects
means <- stats::setNames(.colMeans(me_tmp, nrow(me_tmp), ncol(me_tmp), na.rm = TRUE), colnames(me_tmp))
} else {
# build matrix of unit-specific marginal effects
me_tmp <- marginal_effects(model = model, data = data, variables = variables, type = type, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
if (is.null(type)) {
me_tmp <- marginal_effects(model = model, data = data, variables = variables, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
} else {
me_tmp <- marginal_effects(model = model, data = data, variables = variables, type = type, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
}
# apply colMeans to get average marginal effects
means <- apply(me_tmp, 2L, stats::weighted.mean, w = weights, na.rm = TRUE)
}
Expand Down
1 change: 1 addition & 0 deletions R/marginal_effects.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#' \item \dQuote{lm}, see \code{\link[stats]{lm}}
#' \item \dQuote{loess}, see \code{\link[stats]{loess}}
#' \item \dQuote{merMod}, see \code{\link[lme4]{lmer}}, \code{\link[lme4]{glmer}}
#' \item \dQuote{multinom}, see \code{\link[nnet]{multinom}}
#' \item \dQuote{nnet}, see \code{\link[nnet]{nnet}}
#' \item \dQuote{polr}, see \code{\link[MASS]{polr}}
#' \item \dQuote{svyglm}, see \code{\link[survey]{svyglm}}
Expand Down
37 changes: 37 additions & 0 deletions R/marginal_effects_multinom.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#' @rdname marginal_effects
#' @importFrom prediction find_data
#' @export
marginal_effects.multinom <-
function(model,
data = find_data(model, parent.frame()),
variables = NULL,
type = NULL,
eps = 1e-7,
varslist = NULL,
as.data.frame = TRUE,
...) {

# identify classes of terms in `model`
if (is.null(varslist)) {
varslist <- find_terms_in_model(model, variables = variables)
}

# estimate numerical derivatives with respect to each variable (for numeric terms in the model)
# add discrete differences for logical terms
out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...)

# add discrete differences for factor terms
## exact number depends on number of factor levels
out2 <- list()
for (i in seq_along(varslist$fnames)) {
out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], fwrap = FALSE, type = NULL, as.data.frame = as.data.frame, ...)
}

out <- c(out1, out2)
if (isTRUE(as.data.frame)) {
out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))])
} else {
out <- do.call("cbind", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))])
}
return(out)
}
1 change: 1 addition & 0 deletions R/marginal_effects_nnet.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ marginal_effects.nnet <-
function(model,
data = find_data(model, parent.frame()),
variables = NULL,
type = NULL,
eps = 1e-7,
varslist = NULL,
as.data.frame = TRUE,
Expand Down
35 changes: 34 additions & 1 deletion R/marginal_effects_polr.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,37 @@
#' @rdname marginal_effects
#' @importFrom prediction find_data
#' @export
marginal_effects.polr <- marginal_effects.nnet
marginal_effects.polr <-
function(model,
data = find_data(model, parent.frame()),
variables = NULL,
type = NULL,
eps = 1e-7,
varslist = NULL,
as.data.frame = TRUE,
...) {

# identify classes of terms in `model`
if (is.null(varslist)) {
varslist <- find_terms_in_model(model, variables = variables)
}

# estimate numerical derivatives with respect to each variable (for numeric terms in the model)
# add discrete differences for logical terms
out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...)

# add discrete differences for factor terms
## exact number depends on number of factor levels
out2 <- list()
for (i in seq_along(varslist$fnames)) {
out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], fwrap = FALSE, type = NULL, as.data.frame = as.data.frame, ...)
}

out <- c(out1, out2)
if (isTRUE(as.data.frame)) {
out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))])
} else {
out <- do.call("cbind", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))])
}
return(out)
}
78 changes: 78 additions & 0 deletions R/margins_multinom.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#' @rdname margins
#' @importFrom prediction find_data
#' @export
margins.multinom <-
function(model,
data = find_data(model, parent.frame()),
variables = NULL,
at = NULL,
type = NULL,
vcov = stats::vcov(model),
vce = c("delta", "simulation", "bootstrap", "none"),
iterations = 50L, # if vce == "bootstrap" or "simulation"
unit_ses = FALSE,
eps = 1e-7,
...) {

# match.arg()
vce <- match.arg(vce)

# setup data
data_list <- build_datalist(data, at = at)
if (is.null(names(data_list))) {
names(data_list) <- NA_character_
}
at_specification <- attr(data_list, "at_specification")

# identify classes of terms in `model`
varslist <- find_terms_in_model(model, variables = variables)

# reduce memory profile
model[["model"]] <- NULL
attr(model[["terms"]], ".Environment") <- NULL

# calculate marginal effects
out <- list()
for (i in seq_along(data_list)) {
out[[i]] <- build_margins(model = model,
data = data_list[[i]],
variables = variables,
type = NULL,
vcov = vcov,
vce = vce,
iterations = iterations,
unit_ses = unit_ses,
eps = eps,
varslist = varslist,
...)
out[[i]][["_at_number"]] <- i
}
if (vce == "delta") {
jac <- do.call("rbind", lapply(out, attr, "jacobian"))
rownames(jac) <- paste0(rownames(jac), ".", rep(seq_len(length(out)), each = length(unique(rownames(jac)))))

# check that vcov() only contains coefficients from model
if (nrow(vcov) != length(coef(model))) {
vcov <- vcov[colnames(coef(model)), colnames(coef(model))]
}

# sandwich
vc <- jac %*% vcov %*% t(jac)
} else {
jac <- NULL
vc <- NULL
}

# return value
structure(do.call("rbind", out),
class = c("margins", "data.frame"),
at = if (is.null(at)) NULL else at_specification,
type = type,
call = if ("call" %in% names(model)) model[["call"]] else NULL,
model_class = class(model),
vce = vce,
vcov = vc,
jacobian = jac,
weighted = FALSE,
iterations = if (vce == "bootstrap") iterations else NULL)
}
Loading

0 comments on commit 248bae0

Please sign in to comment.