work on variances for multi-category outcomes (#60, #98)

leeper · Jul 22, 2018 · 248bae0 · 248bae0
1 parent 3f32640
commit 248bae0
Show file tree

Hide file tree

Showing 15 changed files with 400 additions and 56 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -22,6 +22,7 @@ S3method(marginal_effects,lmerMod)
 S3method(marginal_effects,loess)
 S3method(marginal_effects,margins)
 S3method(marginal_effects,merMod)
+S3method(marginal_effects,multinom)
 S3method(marginal_effects,nnet)
 S3method(marginal_effects,polr)
 S3method(margins,betareg)
@@ -32,6 +33,7 @@ S3method(margins,lm)
 S3method(margins,lmerMod)
 S3method(margins,loess)
 S3method(margins,merMod)
+S3method(margins,multinom)
 S3method(margins,nnet)
 S3method(margins,polr)
 S3method(margins,svyglm)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,7 @@
 # margins 0.3.24
 
 * Added new function `margins_summary()` which provides a single-function expression of `summary(margins(...))`. (#94, h/t Mike DeCrescenzo)
+* Added variances of marginal effects to "polr" objects from **MASS**. (#98, @eijoac)
 * Fix a bug in `persp()` related to attempting to take the mean of a factor variable. (#93, h/t Jared Knowles)
 
 # margins 0.3.23

diff --git a/R/find_terms_in_model.R b/R/find_terms_in_model.R
@@ -31,10 +31,8 @@ find_terms_in_model.default <- function(model, variables = NULL) {
     }
 
     # drop specially named "(weights)" variables
-    wts <- weights(model)
-    if (!is.null(wts)) {
-        classes <- classes[!names(classes) %in% "(weights)"]
-    }
+    classes <- classes[!names(classes) %in% "(weights)"]
+
     # handle character variables as factors
     classes[classes == "character"] <- "factor"
     ## cleanup names of terms

diff --git a/R/get_effect_variances.R b/R/get_effect_variances.R
@@ -13,9 +13,16 @@ function(data,
          ...) {
 
     # march.arg() for arguments
-    type <- match.arg(type)
+    if (!is.null(type)) {
+        type <- match.arg(type)
+    }
     vce <- match.arg(vce)
 
+    # deploy appropriate vce procedure
+    if (vce == "none") {
+        return(list(variances = NULL, vcov = NULL, jacobian = NULL))
+    }
+
     # setup vcov
     if (is.function(vcov)) {
         vcov <- vcov(model)
@@ -26,12 +33,7 @@ function(data,
         varslist <- find_terms_in_model(model, variables = variables)
     }
 
-    # deploy appropriate vce procedure
-    if (vce == "none") {
-
-        return(list(variances = NULL, vcov = NULL, jacobian = NULL))
-
-    } else if (vce == "delta") {
+    if (vce == "delta") {
 
         # default method
 
@@ -60,7 +62,13 @@ function(data,
             # sandwich
             vc <- as.matrix(jacobian %*% vcov %*% t(jacobian))
         } else {
+            # check that vcov() only contains coefficients from model
+            if (nrow(vcov) != length(coef(model))) {
+                vcov <- vcov[intersect(rownames(vcov), names(coef(model))), intersect(rownames(vcov), names(coef(model)))]
+            }
+
             jacobian <- jacobian(FUN, coef(model)[names(coef(model)) %in% c("(Intercept)", colnames(vcov))], weights = weights, eps = eps)
+
             # sandwich
             vc <- jacobian %*% vcov %*% t(jacobian)
         }
@@ -73,16 +81,29 @@ function(data,
         tmpmodel <- model
         tmpmodel[["model"]] <- NULL # remove data from model for memory
 
+        # check that vcov() only contains coefficients from model
+        if (nrow(vcov) != length(coef(model))) {
+            vcov <- vcov[intersect(rownames(vcov), names(coef(model))), intersect(rownames(vcov), names(coef(model)))]
+        }
+
         # simulate from multivariate normal
         coefmat <- MASS::mvrnorm(iterations, coef(model), vcov)
 
         # estimate AME from from each simulated coefficient vector
         effectmat <- apply(coefmat, 1, function(coefrow) {
             tmpmodel[["coefficients"]] <- coefrow
             if (is.null(weights)) {
-                means <- colMeans(marginal_effects(model = tmpmodel, data = data, variables = variables, type = type, eps = eps, varslist = varslist, ...), na.rm = TRUE)
+                if (is.null(type)) {
+                    means <- colMeans(marginal_effects(model = tmpmodel, data = data, variables = variables, eps = eps, varslist = varslist, ...), na.rm = TRUE)
+                } else {
+                    means <- colMeans(marginal_effects(model = tmpmodel, data = data, variables = variables, type = type, eps = eps, varslist = varslist, ...), na.rm = TRUE)
+                }
             } else {
-                me_tmp <- marginal_effects(model = tmpmodel, data = data, variables = variables, type = type, eps = eps, varslist = varslist, ...)
+                if (is.null(type)) {
+                    me_tmp <- marginal_effects(model = tmpmodel, data = data, variables = variables, eps = eps, varslist = varslist, ...)
+                } else {
+                    me_tmp <- marginal_effects(model = tmpmodel, data = data, variables = variables, type = type, eps = eps, varslist = varslist, ...)
+                }
                 means <- unlist(stats::setNames(lapply(me_tmp, stats::weighted.mean, w = weights, na.rm = TRUE), names(me_tmp)))
             }
             if (!is.matrix(means)) {
@@ -104,15 +125,28 @@ function(data,
             tmpmodel[["call"]][["data"]] <- data[samp,]
             tmpmodel <- eval(tmpmodel[["call"]])
             if (is.null(weights)) {
-                means <- colMeans(marginal_effects(model = tmpmodel,
-                                                   data = data[samp,],
-                                                   variables = variables,
-                                                   type = type,
-                                                   eps = eps,
-                                                   varslist = varslist,
-                                                   ...), na.rm = TRUE)
+                if (is.null(type)) {
+                    means <- colMeans(marginal_effects(model = tmpmodel,
+                                                       data = data[samp,],
+                                                       variables = variables,
+                                                       eps = eps,
+                                                       varslist = varslist,
+                                                       ...), na.rm = TRUE)
+                } else {
+                    means <- colMeans(marginal_effects(model = tmpmodel,
+                                                       data = data[samp,],
+                                                       variables = variables,
+                                                       type = type,
+                                                       eps = eps,
+                                                       varslist = varslist,
+                                                       ...), na.rm = TRUE)
+                }
             } else {
-                me_tmp <- marginal_effects(model = tmpmodel, data = data[samp,], variables = variables, type = type, eps = eps, varslist = varslist, ...)
+                if (is.null(type)) {
+                    me_tmp <- marginal_effects(model = tmpmodel, data = data[samp,], variables = variables, eps = eps, varslist = varslist, ...)
+                } else {
+                    me_tmp <- marginal_effects(model = tmpmodel, data = data[samp,], variables = variables, type = type, eps = eps, varslist = varslist, ...)
+                }
                 means <- unlist(stats::setNames(lapply(me_tmp, stats::weighted.mean, w = weights, na.rm = TRUE), names(me_tmp)))
             }
             means

diff --git a/R/gradient_factory.R b/R/gradient_factory.R
@@ -16,12 +16,20 @@ gradient_factory.default <- function(data, model, variables = NULL, type = "resp
         model <- reset_coefs(model, coefs)
         if (is.null(weights)) {
             # build matrix of unit-specific marginal effects
-            me_tmp <- marginal_effects(model = model, data = data, variables = variables, type = type, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
+            if (is.null(type)) {
+                me_tmp <- marginal_effects(model = model, data = data, variables = variables, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
+            } else {
+                me_tmp <- marginal_effects(model = model, data = data, variables = variables, type = type, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
+            }
             # apply colMeans to get average marginal effects
             means <- stats::setNames(.colMeans(me_tmp, nrow(me_tmp), ncol(me_tmp), na.rm = TRUE), colnames(me_tmp))
         } else {
             # build matrix of unit-specific marginal effects
-            me_tmp <- marginal_effects(model = model, data = data, variables = variables, type = type, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
+            if (is.null(type)) {
+                me_tmp <- marginal_effects(model = model, data = data, variables = variables, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
+            } else {
+                me_tmp <- marginal_effects(model = model, data = data, variables = variables, type = type, eps = eps, as.data.frame = FALSE, varslist = varslist, ...)
+            }
             # apply colMeans to get average marginal effects
             means <- apply(me_tmp, 2L, stats::weighted.mean, w = weights, na.rm = TRUE)
         }

diff --git a/R/marginal_effects.R b/R/marginal_effects.R
@@ -19,6 +19,7 @@
 #'   \item \dQuote{lm}, see \code{\link[stats]{lm}}
 #'   \item \dQuote{loess}, see \code{\link[stats]{loess}}
 #'   \item \dQuote{merMod}, see \code{\link[lme4]{lmer}}, \code{\link[lme4]{glmer}}
+#'   \item \dQuote{multinom}, see \code{\link[nnet]{multinom}}
 #'   \item \dQuote{nnet}, see \code{\link[nnet]{nnet}}
 #'   \item \dQuote{polr}, see \code{\link[MASS]{polr}}
 #'   \item \dQuote{svyglm}, see \code{\link[survey]{svyglm}}

diff --git a/R/marginal_effects_multinom.R b/R/marginal_effects_multinom.R
@@ -0,0 +1,37 @@
+#' @rdname marginal_effects
+#' @importFrom prediction find_data
+#' @export
+marginal_effects.multinom <- 
+function(model, 
+         data = find_data(model, parent.frame()), 
+         variables = NULL,
+         type = NULL,
+         eps = 1e-7, 
+         varslist = NULL,
+         as.data.frame = TRUE,
+         ...) {
+
+    # identify classes of terms in `model`
+    if (is.null(varslist)) {
+        varslist <- find_terms_in_model(model, variables = variables)
+    }
+
+    # estimate numerical derivatives with respect to each variable (for numeric terms in the model)
+    # add discrete differences for logical terms
+    out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...)
+
+    # add discrete differences for factor terms
+    ## exact number depends on number of factor levels
+    out2 <- list()
+    for (i in seq_along(varslist$fnames)) {
+        out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], fwrap = FALSE, type = NULL, as.data.frame = as.data.frame, ...)
+    }
+
+    out <- c(out1, out2)
+    if (isTRUE(as.data.frame)) {
+        out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))])
+    } else {
+        out <- do.call("cbind", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))])
+    }
+    return(out)
+}
diff --git a/R/marginal_effects_nnet.R b/R/marginal_effects_nnet.R
@@ -5,6 +5,7 @@ marginal_effects.nnet <-
 function(model, 
          data = find_data(model, parent.frame()), 
          variables = NULL,
+         type = NULL,
          eps = 1e-7, 
          varslist = NULL,
          as.data.frame = TRUE,

diff --git a/R/marginal_effects_polr.R b/R/marginal_effects_polr.R
@@ -1,4 +1,37 @@
 #' @rdname marginal_effects
 #' @importFrom prediction find_data
 #' @export
-marginal_effects.polr <- marginal_effects.nnet
+marginal_effects.polr <- 
+function(model, 
+         data = find_data(model, parent.frame()), 
+         variables = NULL,
+         type = NULL,
+         eps = 1e-7, 
+         varslist = NULL,
+         as.data.frame = TRUE,
+         ...) {
+
+    # identify classes of terms in `model`
+    if (is.null(varslist)) {
+        varslist <- find_terms_in_model(model, variables = variables)
+    }
+
+    # estimate numerical derivatives with respect to each variable (for numeric terms in the model)
+    # add discrete differences for logical terms
+    out1 <- lapply(c(varslist$nnames, varslist$lnames), dydx, data = data, model = model, type = NULL, eps = eps, as.data.frame = as.data.frame, ...)
+
+    # add discrete differences for factor terms
+    ## exact number depends on number of factor levels
+    out2 <- list()
+    for (i in seq_along(varslist$fnames)) {
+        out2[[i]] <- dydx.factor(data = data, model = model, varslist$fnames[i], fwrap = FALSE, type = NULL, as.data.frame = as.data.frame, ...)
+    }
+
+    out <- c(out1, out2)
+    if (isTRUE(as.data.frame)) {
+        out <- do.call("cbind.data.frame", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))])
+    } else {
+        out <- do.call("cbind", out[vapply(out, function(x) length(x) > 0, FUN.VALUE = logical(1))])
+    }
+    return(out)
+}
diff --git a/R/margins_multinom.R b/R/margins_multinom.R
@@ -0,0 +1,78 @@
+#' @rdname margins
+#' @importFrom prediction find_data
+#' @export
+margins.multinom <- 
+function(model, 
+         data = find_data(model, parent.frame()), 
+         variables = NULL,
+         at = NULL, 
+         type = NULL,
+         vcov = stats::vcov(model),
+         vce = c("delta", "simulation", "bootstrap", "none"),
+         iterations = 50L, # if vce == "bootstrap" or "simulation"
+         unit_ses = FALSE,
+         eps = 1e-7,
+         ...) {
+
+    # match.arg()
+    vce <- match.arg(vce)
+
+    # setup data
+    data_list <- build_datalist(data, at = at)
+    if (is.null(names(data_list))) {
+        names(data_list) <- NA_character_
+    }
+    at_specification <- attr(data_list, "at_specification")
+
+    # identify classes of terms in `model`
+    varslist <- find_terms_in_model(model, variables = variables)
+
+    # reduce memory profile
+    model[["model"]] <- NULL
+    attr(model[["terms"]], ".Environment") <- NULL
+
+    # calculate marginal effects
+    out <- list()
+    for (i in seq_along(data_list)) {
+        out[[i]] <- build_margins(model = model,
+                                  data = data_list[[i]],
+                                  variables = variables,
+                                  type = NULL,
+                                  vcov = vcov,
+                                  vce = vce,
+                                  iterations = iterations,
+                                  unit_ses = unit_ses,
+                                  eps = eps,
+                                  varslist = varslist,
+                                  ...)
+        out[[i]][["_at_number"]] <- i
+    }
+    if (vce == "delta") {
+        jac <- do.call("rbind", lapply(out, attr, "jacobian"))
+        rownames(jac) <- paste0(rownames(jac), ".", rep(seq_len(length(out)), each = length(unique(rownames(jac)))))
+
+        # check that vcov() only contains coefficients from model
+        if (nrow(vcov) != length(coef(model))) {
+            vcov <- vcov[colnames(coef(model)), colnames(coef(model))]
+        }
+
+        # sandwich
+        vc <- jac %*% vcov %*% t(jac)
+    } else {
+        jac <- NULL
+        vc <- NULL
+    }
+
+    # return value
+    structure(do.call("rbind", out), 
+              class = c("margins", "data.frame"), 
+              at = if (is.null(at)) NULL else at_specification,
+              type = type,
+              call = if ("call" %in% names(model)) model[["call"]] else NULL,
+              model_class = class(model),
+              vce = vce, 
+              vcov = vc,
+              jacobian = jac,
+              weighted = FALSE,
+              iterations = if (vce == "bootstrap") iterations else NULL)
+}