Add ordinal logistic regression. Close #130.

mayoverse · Aug 29, 2018 · 39eee61 · 39eee61
1 parent 0976193
commit 39eee61
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 21 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -11,6 +11,8 @@
 
 * `modelsum()`: Fix formatting of error about unsupported families.
 
+* `modelsum()`: Add `family="ordinal"` to do ordinal logistic regression using `MASS::polr()`. (#130)
+
 # arsenal v1.3.0
 
 **This is a mostly backwards-compatible update.**

diff --git a/R/as.data.frame.modelsum.R b/R/as.data.frame.modelsum.R
@@ -4,18 +4,20 @@ get_the_estimate <- function(fitList, cntrl)
   # even if the model doesn't have an intercept, that's okay
   labs <- c("(Intercept)", fitList$label, fitList$adjlabels)
   names(labs) <- c("(Intercept)", fitList$xterms, fitList$adjterms)
+  trms <- fitList$coeff$term
 
   out <- data.frame(
-    term = fitList$coeff$term,
-    label = labs[fitList$coeff$term],
-    term.type = ifelse(fitList$coeff$term == "(Intercept)", "Intercept", ifelse(fitList$coeff$term %in% fitList$adjterms, "Adjuster", "Term")),
+    term = trms,
+    label = ifelse(trms %in% names(labs), labs[trms], trms),
+    term.type = ifelse(trms %in% fitList$adjterms, "Adjuster",
+                       ifelse(trms %in% fitList$xterms, "Term", "Intercept")),
     stringsAsFactors = FALSE
   )
 
   statFields <- switch(fitList$family,
                        quasibinomial = cntrl$binomial.stats, binomial = cntrl$binomial.stats,
                        quasipoisson = cntrl$poisson.stats, poisson = cntrl$poisson.stats,
-                       survival = cntrl$survival.stats,
+                       survival = cntrl$survival.stats, ordinal = cntrl$ordinal.stats,
                        cntrl$gaussian.stats)
 
   if(any(names(fitList$coeff) %in% statFields)) out <- cbind(out, fitList$coeff[, intersect(statFields, names(fitList$coeff)), drop = FALSE])

diff --git a/R/modelsum.R b/R/modelsum.R
@@ -74,9 +74,17 @@ modelsum <- function(formula,  family="gaussian", data, adjust=NULL, na.action =
   ## Allow family parameter to passed with or without quotes
   ##    exception is survival, would require public function named survival.
   ## Here, we force quotes to simplify in for loop below
-  if (is.function(family)) family <- family()$family
+  if(is.function(family) || is.character(family))
+  {
+    family.list <- match.fun(family)()
+    family <- family.list$family
+  } else
+  {
+    family.list <- family
+    family <- family$family
+  }
 
-  if(family %nin% c("survival", "gaussian", "binomial", "poisson", "quasibinomial", "quasipoisson"))
+  if(family %nin% c("survival", "gaussian", "binomial", "poisson", "quasibinomial", "quasipoisson", "ordinal"))
     stop("Family ", family, " not supported.\n")
 
   if(family != "survival" && any(grepl("Surv\\(", formula))) {
@@ -146,8 +154,18 @@ modelsum <- function(formula,  family="gaussian", data, adjust=NULL, na.action =
 
     ## placeholder for ordered, don't do any fitting
     ## y is ordered factor
-    if (family == "ordered") {
-      stop("family == 'ordered' isn't implemented yet")
+    if (family == "ordinal") {
+      temp.call[[1]] <- quote(MASS::polr)
+      temp.call$Hess <- TRUE
+      temp.call$method <- family.list$method
+      fit <- eval(temp.call, parent.frame())
+      coeffORTidy <- broom::tidy(fit, exponentiate=TRUE, conf.int=TRUE, conf.level=control$conf.level)
+      coeffORTidy[coeffORTidy$coefficient_type == "zeta", names(coeffORTidy) %nin% c("term", "coefficient_type")] <- NA
+      coeffTidy <- broom::tidy(fit, exponentiate=FALSE, conf.int=TRUE, conf.level=control$conf.level)
+      coeffTidy$p.value <- 2*pnorm(abs(coeffTidy$statistic), lower.tail = FALSE)
+      coeffTidy <- cbind(coeffTidy, OR=coeffORTidy$estimate, CI.lower.OR=coeffORTidy$conf.low, CI.upper.OR=coeffORTidy$conf.high)
+      modelGlance <- broom::glance(fit)
+
     } else if (family == "gaussian") {
       # ## issue warning if appears categorical
       if(length(unique(maindf[[1]])) <= 5) {
@@ -263,6 +281,12 @@ modelsum <- function(formula,  family="gaussian", data, adjust=NULL, na.action =
 ##   keep as private function
 survival <- function() list(family="survival")
 
+ordinal <- function(method = c("logistic", "probit", "loglog", "cloglog", "cauchit"))
+{
+  list(family = "ordinal", method = match.arg(method))
+}
+
+
 #' @rdname modelsum
 #' @export
 print.modelsum <- function(x, ...) {

diff --git a/R/modelsum.control.R b/R/modelsum.control.R
@@ -13,7 +13,7 @@
 #' @param show.adjust Logical, denoting whether to show adjustment terms.
 #' @param show.intercept Logical, denoting whether to show intercept terms.
 #' @param conf.level Numeric, giving the confidence level.
-#' @param binomial.stats,survival.stats,gaussian.stats,poisson.stats
+#' @param ordinal.stats,binomial.stats,survival.stats,gaussian.stats,poisson.stats
 #'   Character vectors denoting which stats to show for the various model types.
 #' @param stat.labels A named list of labels for all the stats used above.
 #' @param ... Other arguments (not in use at this time).
@@ -25,14 +25,16 @@
 #'   is less than the resulting number of places, it will be formatted to show so.
 #' @seealso \code{\link{modelsum}}, \code{\link{summary.modelsum}}
 #' @export
-modelsum.control <- function(digits = 3L, digits.ratio = 3L, digits.p = 3L, format.p = TRUE,
-            show.adjust = TRUE, show.intercept = TRUE, conf.level = 0.95,
-            binomial.stats=c("OR","CI.lower.OR","CI.upper.OR","p.value", "concordance","Nmiss"),
-            gaussian.stats=c("estimate","std.error","p.value","adj.r.squared","Nmiss"),
-            poisson.stats=c("RR","CI.lower.RR", "CI.upper.RR","p.value","concordance","Nmiss"),
-            survival.stats=c("HR","CI.lower.HR","CI.upper.HR","p.value","concordance","Nmiss"),
-            stat.labels = list(), ...)
-{
+modelsum.control <- function(
+  digits = 3L, digits.ratio = 3L, digits.p = 3L, format.p = TRUE,
+  show.adjust = TRUE, show.intercept = TRUE, conf.level = 0.95,
+  ordinal.stats=c("OR","CI.lower.OR","CI.upper.OR", "p.value","Nmiss"),
+  binomial.stats=c("OR","CI.lower.OR","CI.upper.OR","p.value", "concordance","Nmiss"),
+  gaussian.stats=c("estimate","std.error","p.value","adj.r.squared","Nmiss"),
+  poisson.stats=c("RR","CI.lower.RR", "CI.upper.RR","p.value","concordance","Nmiss"),
+  survival.stats=c("HR","CI.lower.HR","CI.upper.HR","p.value","concordance","Nmiss"),
+  stat.labels = list(), ...
+) {
 
   if("nsmall" %in% names(list(...))) .Deprecated(msg = "Using 'nsmall = ' is deprecated. Use 'digits = ' instead.")
   if("nsmall.ratio" %in% names(list(...))) .Deprecated(msg = "Using 'nsmall.ratio = ' is deprecated. Use 'digits.ratio = ' instead.")
@@ -60,6 +62,27 @@ modelsum.control <- function(digits = 3L, digits.ratio = 3L, digits.p = 3L, form
     conf.level <- 0.95
   }
 
+  ##########################
+  ## Ordinal stats:
+  ##########################
+
+  ordinal.stats.valid <- c(
+    "Nmiss", "OR", "CI.lower.OR", "CI.upper.OR", "p.value", # default
+    "estimate", "CI.OR", "CI.estimate", "CI.lower.estimate", "CI.upper.estimate", "N", "Nmiss2", "endpoint", "std.error", "statistic",
+    "logLik", "AIC", "BIC", "edf", "deviance", "df.residual"
+  )
+
+  if(any(ordinal.stats %nin% ordinal.stats.valid)) {
+    stop("Invalid binomial stats: ",
+         paste(ordinal.stats[ordinal.stats %nin% ordinal.stats.valid],collapse=","), "\n")
+  }
+  ## let CI.OR decode to CI.lower.OR and CI.upper.OR
+  if(any(ordinal.stats == "CI.OR")) {
+    ordinal.stats <- unique(c(ordinal.stats[ordinal.stats != "CI.OR"], "CI.lower.OR", "CI.upper.OR"))
+  }
+  if(any(ordinal.stats == "CI.estimate")) {
+    ordinal.stats <- unique(c(ordinal.stats[ordinal.stats != "CI.estimate"], "CI.lower.estimate", "CI.upper.estimate"))
+  }
 
   ##########################
   ## Binomial stats:
@@ -155,6 +178,6 @@ modelsum.control <- function(digits = 3L, digits.ratio = 3L, digits.p = 3L, form
   }
   list(digits=digits, digits.ratio=digits.ratio, digits.p = digits.p, format.p = format.p,
        show.adjust=show.adjust, show.intercept=show.intercept, conf.level=conf.level,
-       binomial.stats=binomial.stats, gaussian.stats=gaussian.stats,
+       ordinal.stats=ordinal.stats, binomial.stats=binomial.stats, gaussian.stats=gaussian.stats,
        poisson.stats=poisson.stats, survival.stats=survival.stats, stat.labels = stat.labels)
 }
diff --git a/R/summary.modelsum.R b/R/summary.modelsum.R
@@ -51,7 +51,7 @@ as.data.frame.summary.modelsum <- function(x, ..., text = x$text, term.name = x$
   # non-integers, one-per-model
   use.digits1 <- c("logLik", "AIC", "BIC", "null.deviance", "deviance",
                    "statistic.F", "dispersion", "statistic.sc", "concordance", "std.error.concordance",
-                   "adj.r.squared", "r.squared")
+                   "adj.r.squared", "r.squared", "edf")
 
   # non-integers, many-per-model
   use.digits2 <- c("estimate", "CI.lower.estimate", "CI.upper.estimate", "std.error", "statistic", "standard.estimate")

diff --git a/man/modelsum.control.Rd b/man/modelsum.control.Rd
diff --git a/tests/testthat/test_modelsum.R b/tests/testthat/test_modelsum.R
@@ -151,6 +151,52 @@ test_that("'weights=' works", {
 })
 
 
+test_that("OLR works", {
+  if(require(MASS))
+  {
+    data(housing)
+    expect_identical(
+      capture.kable(summary(modelsum(Sat ~ Infl, adjust = ~ Type + Cont, weights = Freq, data = housing, family = "ordinal"))),
+      c("|                   |OR    |CI.lower.OR |CI.upper.OR |p.value |",
+        "|:------------------|:-----|:-----------|:-----------|:-------|",
+        "|**Cont High**      |1.434 |1.189       |1.730       |< 0.001 |",
+        "|**Infl High**      |3.628 |2.832       |4.663       |< 0.001 |",
+        "|**Infl Medium**    |1.762 |1.436       |2.164       |< 0.001 |",
+        "|Low&#124;Medium    |NA    |NA          |NA          |< 0.001 |",
+        "|Medium&#124;High   |NA    |NA          |NA          |< 0.001 |",
+        "|**Type Apartment** |0.564 |0.446       |0.712       |< 0.001 |",
+        "|**Type Atrium**    |0.693 |0.511       |0.940       |0.018   |",
+        "|**Type Terrace**   |0.336 |0.249       |0.451       |< 0.001 |"
+      )
+    )
+    expect_identical(
+      capture.kable(summary(modelsum(Sat ~ Infl, adjust = ~ Type + Cont, weights = Freq, data = housing, family = "ordinal",
+                                     ordinal.stats = c("estimate", "statistic", "p.value")), text = TRUE)),
+      c("|                 |estimate |statistic |p.value |",
+        "|:----------------|:--------|:---------|:-------|",
+        "|Cont High        |0.360    |3.771     |< 0.001 |",
+        "|Infl High        |1.289    |10.136    |< 0.001 |",
+        "|Infl Medium      |0.566    |5.412     |< 0.001 |",
+        "|Low&#124;Medium  |-0.496   |-3.974    |< 0.001 |",
+        "|Medium&#124;High |0.691    |5.505     |< 0.001 |",
+        "|Type Apartment   |-0.572   |-4.800    |< 0.001 |",
+        "|Type Atrium      |-0.366   |-2.360    |0.018   |",
+        "|Type Terrace     |-1.091   |-7.202    |< 0.001 |"
+      )
+    )
+    expect_identical(
+      capture.kable(summary(modelsum(Sat ~ Infl, adjust = ~ Type + Cont, weights = Freq, data = housing, family = "ordinal",
+                                     show.adjust = FALSE, show.intercept = FALSE), text = TRUE)),
+      c("|            |OR    |CI.lower.OR |CI.upper.OR |p.value |",
+        "|:-----------|:-----|:-----------|:-----------|:-------|",
+        "|Infl High   |3.628 |2.832       |4.663       |< 0.001 |",
+        "|Infl Medium |1.762 |1.436       |2.164       |< 0.001 |"
+      )
+    )
+  } else skip("'MASS' is not available")
+})
+
+
 ###########################################################################################################
 #### Reported bugs for modelsum
 ###########################################################################################################