tieders for ordinal models

cf. tidymodels#292 and tidymodels#156
larmarange · Jun 8, 2018 · 1c93de4 · 1c93de4
1 parent 2ed373a
commit 1c93de4
Show file tree

Hide file tree

Showing 7 changed files with 429 additions and 80 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -86,6 +86,7 @@ Suggests:
     network,
     nnet,
     orcutt,
+    ordinal,
     plm,
     poLCA,
     psych,

diff --git a/NAMESPACE b/NAMESPACE
@@ -3,6 +3,7 @@
 S3method(augment,"NULL")
 S3method(augment,Mclust)
 S3method(augment,betareg)
+S3method(augment,clm)
 S3method(augment,coxph)
 S3method(augment,data.frame)
 S3method(augment,decomposed.ts)
@@ -23,6 +24,7 @@ S3method(augment,nlrq)
 S3method(augment,nls)
 S3method(augment,plm)
 S3method(augment,poLCA)
+S3method(augment,polr)
 S3method(augment,prcomp)
 S3method(augment,rowwise_df)
 S3method(augment,rq)
@@ -42,6 +44,8 @@ S3method(glance,betareg)
 S3method(glance,biglm)
 S3method(glance,binDesign)
 S3method(glance,cch)
+S3method(glance,clm)
+S3method(glance,clmm)
 S3method(glance,coxph)
 S3method(glance,cv.glmnet)
 S3method(glance,data.frame)
@@ -73,6 +77,7 @@ S3method(glance,nls)
 S3method(glance,orcutt)
 S3method(glance,plm)
 S3method(glance,poLCA)
+S3method(glance,polr)
 S3method(glance,pyears)
 S3method(glance,ridgelm)
 S3method(glance,rlm)
@@ -88,6 +93,7 @@ S3method(glance,survdiff)
 S3method(glance,survexp)
 S3method(glance,survfit)
 S3method(glance,survreg)
+S3method(glance,svyolr)
 S3method(glance,tbl_df)
 S3method(glance_,rowwise_df)
 S3method(tidy,"NULL")
@@ -117,6 +123,8 @@ S3method(tidy,btergm)
 S3method(tidy,cch)
 S3method(tidy,character)
 S3method(tidy,cld)
+S3method(tidy,clm)
+S3method(tidy,clmm)
 S3method(tidy,coeftest)
 S3method(tidy,confint.glht)
 S3method(tidy,coxph)
@@ -166,6 +174,7 @@ S3method(tidy,orcutt)
 S3method(tidy,pairwise.htest)
 S3method(tidy,plm)
 S3method(tidy,poLCA)
+S3method(tidy,polr)
 S3method(tidy,power.htest)
 S3method(tidy,prcomp)
 S3method(tidy,pyears)
@@ -189,6 +198,7 @@ S3method(tidy,survdiff)
 S3method(tidy,survexp)
 S3method(tidy,survfit)
 S3method(tidy,survreg)
+S3method(tidy,svyolr)
 S3method(tidy,table)
 S3method(tidy,tbl_df)
 S3method(tidy,ts)

diff --git a/NEWS.md b/NEWS.md
@@ -6,6 +6,7 @@ broom 0.4.4.9000
 * Updated old vignettes to use `map/unnest` workflow rather than `rowwise/do`
 * Bump version number
 * Added `augment` method for chi-squared tests
+* Added tieders for ordinal models: `clm` and `clmm` (ordinal), `polr` (MASS), `svyolr` (survey)
 
 broom 0.4.4
 -----------

diff --git a/R/ordinal_tieders.R b/R/ordinal_tieders.R
@@ -0,0 +1,231 @@
+#' Tidying methods for ordinal logistic regression models
+#'
+#' These methods tidy the coefficients of ordinal logistic regression
+#' models generated by \code{\link[ordinal]{clm}} or \code{\link[ordinal]{clmm}}
+#' of the \code{ordinal} package, \code{\link[MASS]{polr}} of the \code{MASS}
+#' packge, or \code{\link[survey]{svyolr}} of the \code{survey} package.
+#'
+#' @param x a model of class \code{clm}, \code{clmm}, \code{polr} or \code{svyolr}
+#' @param conf.int whether to include a confidence interval
+#' @param conf.level confidence level of the interval, used only if
+#' \code{conf.int=TRUE}
+#' @param exponentiate whether to exponentiate the coefficient estimates
+#' and confidence intervals (typical for ordinal logistic regression)
+#' @param quick	whether to compute a smaller and faster version, containing only
+#' the term, estimate and coefficient_type columns
+#' @param conf.type the type of confidence interval
+#' (see \code{\link[ordinal]{confint.clm}})
+#' @param data original data, defaults to the extracting it from the model
+#' @param newdata if provided, performs predictions on the new data
+#' @param type.predict type of prediction to compute for a CLM; passed on to
+#' \code{\link[ordinal]{predict.clm}} or \code{predict.polr}
+#' @param ... extra arguments
+#' @return
+#' \code{tidy.clm}, \code{tidy.clmm}, \code{tidy.polr} and \code{tidy.svyolr}
+#' return one row for each coefficient at each level of the response variable,
+#' with six columns:
+#'   \item{term}{term in the model}
+#'   \item{estimate}{estimated coefficient}
+#'   \item{std.error}{standard error}
+#'   \item{statistic}{z-statistic}
+#'   \item{p.value}{two-sided p-value}
+#'   \item{coefficient_type}{type of coefficient, see \code{\link[ordinal]{clm}}}
+#'
+#' If \code{conf.int=TRUE}, it also includes columns for \code{conf.low} and
+#'
+#' \code{glance.clm}, \code{glance.clmm}, \code{glance.polr} and \code{glance.svyolr}
+#' return a one-row data.frame with the columns:
+#'   \item{edf}{the effective degrees of freedom}
+#'   \item{logLik}{the data's log-likelihood under the model}
+#'   \item{AIC}{the Akaike Information Criterion}
+#'   \item{BIC}{the Bayesian Information Criterion}
+#'   \item{df.residual}{residual degrees of freedom}
+#'
+#' \code{augment.clm} and \code{augment.polr} returns
+#' one row for each observation, with additional columns added to
+#' the original data:
+#'   \item{.fitted}{fitted values of model}
+#'   \item{.se.fit}{standard errors of fitted values}
+#'
+#' \code{augment} is not supportted for \code{\link[ordinal]{clmm}}
+#' and \code{\link[survey]{svyolr}} models.
+#'
+#' All tidying methods return a \code{data.frame} without rownames.
+#' The structure depends on the method chosen.
+#'
+#' @name ordinal_tidiers
+#'
+#' @examples
+#' if (require(ordinal)){
+#'   clm_mod <- clm(rating ~ temp * contact, data = wine)
+#'   tidy(clm_mod)
+#'   tidy(clm_mod, conf.int = TRUE)
+#'   tidy(clm_mod, conf.int = TRUE, conf.type = "Wald", exponentiate = TRUE)
+#'   glance(clm_mod)
+#'   head(augment(clm_mod))
+#'
+#'   clm_mod2 <- clm(rating ~ temp, nominal = ~ contact, data = wine)
+#'   tidy(clm_mod2)
+#'
+#'   clmm_mod <- clmm(rating ~ temp + contact + (1 | judge), data = wine)
+#'   tidy(clmm_mod)
+#'   glance(clmm_mod)
+#' }
+#' if (require(MASS)) {
+#'   polr_mod <- polr(Sat ~ Infl + Type + Cont, weights = Freq, data = housing)
+#'   tidy(polr_mod, exponentiate = TRUE, conf.int = TRUE)
+#'   glance(polr_mod)
+#'   head(augment(polr_mod, type.predict = "class"))
+#' }
+NULL
+
+#' @rdname ordinal_tidiers
+#' @export
+tidy.clm <- function(x, conf.int = FALSE, conf.level = .95,
+                     exponentiate = FALSE, quick = FALSE,
+                     conf.type = c("profile", "Wald"), ...) {
+  if (quick) {
+    co <- coef(x)
+    ret <- data.frame(
+      term = names(co), estimate = unname(co),
+      stringsAsFactors = FALSE
+    )
+    return(process_clm(ret, x, conf.int = FALSE, exponentiate = exponentiate))
+  }
+  conf.type <- match.arg(conf.type)
+  co <- coef(summary(x))
+  nn <- c("estimate", "std.error", "statistic", "p.value")
+  ret <- fix_data_frame(co, nn[seq_len(ncol(co))])
+  process_clm(
+    ret, x,
+    conf.int = conf.int, conf.level = conf.level,
+    exponentiate = exponentiate, conf.type = conf.type
+  )
+}
+
+
+process_clm <- function(ret, x, conf.int = FALSE, conf.level = .95, 
+                        exponentiate = FALSE, conf.type = "profile") {
+  if (exponentiate) {
+    trans <- exp
+  } else {
+    trans <- identity
+  }
+
+  if (conf.int) {
+    CI <- suppressMessages(
+      trans(stats::confint(x, level = conf.level, type = conf.type))
+    )
+    colnames(CI) <- c("conf.low", "conf.high")
+    CI <- as.data.frame(CI)
+    CI$term <- rownames(CI)
+    ret <- merge(ret, unrowname(CI), by = "term", all.x = TRUE)
+  }
+
+  ret$estimate <- trans(ret$estimate)
+  ret$coefficient_type <- ""
+  ret[ret$term %in% names(x$alpha), "coefficient_type"] <- "alpha"
+  ret[ret$term %in% names(x$beta), "coefficient_type"] <- "beta"
+  ret[ret$term %in% names(x$zeta), "coefficient_type"] <- "zeta"
+  ret
+}
+
+#' @rdname ordinal_tidiers
+#' @export
+tidy.clmm <- function(x, conf.int = FALSE, conf.level = .95,
+                      exponentiate = FALSE, quick = FALSE,
+                      conf.type = c("profile", "Wald"), ...) {
+  tidy.clm(x, conf.int, conf.level, exponentiate, quick, ...)
+}
+
+
+#' @rdname ordinal_tidiers
+#' @export
+tidy.polr <- function(x, conf.int = FALSE, conf.level = .95,
+                      exponentiate = FALSE, quick = FALSE, ...) {
+  if (quick) {
+    co <- coef(x)
+    ret <- data.frame(
+      term = names(co), estimate = unname(co),
+      stringsAsFactors = FALSE
+    )
+    return(process_polr(ret, x, conf.int = FALSE, exponentiate = exponentiate))
+  }
+  co <- suppressMessages(coef(summary(x)))
+  nn <- c("estimate", "std.error", "statistic", "p.value")
+  ret <- fix_data_frame(co, nn[seq_len(ncol(co))])
+  process_polr(
+    ret, x,
+    conf.int = conf.int, conf.level = conf.level,
+    exponentiate = exponentiate
+  )
+}
+
+
+process_polr <- function(ret, x, conf.int = FALSE, conf.level = .95, 
+                         exponentiate = FALSE) {
+  if (exponentiate) {
+    trans <- exp
+  } else {
+    trans <- identity
+  }
+
+  if (conf.int) {
+    CI <- suppressMessages(trans(stats::confint(x, level = conf.level)))
+    colnames(CI) <- c("conf.low", "conf.high")
+    CI <- as.data.frame(CI)
+    CI$term <- rownames(CI)
+    ret <- merge(ret, unrowname(CI), by = "term", all.x = TRUE)
+  }
+
+  ret$estimate <- trans(ret$estimate)
+  ret$coefficient_type <- ""
+  ret[ret$term %in% names(x$coefficients), "coefficient_type"] <- "coefficient"
+  ret[ret$term %in% names(x$zeta), "coefficient_type"] <- "zeta"
+  ret
+}
+
+#' @rdname ordinal_tidiers
+#' @export
+tidy.svyolr <- tidy.polr
+
+
+#' @rdname ordinal_tidiers
+#' @export
+glance.clm <- function(x, ...) {
+  ret <- with(
+    x,
+    data.frame(
+      edf = edf
+    )
+  )
+  finish_glance(ret, x)
+}
+
+#' @rdname ordinal_tidiers
+#' @export
+glance.clmm <- glance.clm
+
+#' @rdname ordinal_tidiers
+#' @export
+glance.polr <- glance.clm
+
+#' @rdname ordinal_tidiers
+#' @export
+glance.svyolr <- glance.clm
+
+#' @rdname ordinal_tidiers
+#' @export
+augment.clm <- function(x, data = stats::model.frame(x),
+                        newdata, type.predict = c("prob", "class"), ...) {
+  type.predict <- match.arg(type.predict)
+  augment.lm(x, data, newdata, type.predict, ...)
+}
+
+#' @rdname ordinal_tidiers
+#' @export
+augment.polr <- function(x, data = stats::model.frame(x),
+                         newdata, type.predict = c("probs", "class"), ...) {
+  type.predict <- match.arg(type.predict)
+  augment.lm(x, data, newdata, type.predict, ...)
+}
diff --git a/man/garch_tidiers.Rd b/man/garch_tidiers.Rd