From 607fe01f5f4736c78d947423dac9c2f533e36a12 Mon Sep 17 00:00:00 2001 From: Lukas Wallrich Date: Thu, 16 Apr 2020 20:58:42 +0100 Subject: [PATCH 1/3] Add option to return p.values to tidy.polr --- R/mass-polr-tidiers.R | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/R/mass-polr-tidiers.R b/R/mass-polr-tidiers.R index 2de7e8318..94b6e92ab 100644 --- a/R/mass-polr-tidiers.R +++ b/R/mass-polr-tidiers.R @@ -4,6 +4,8 @@ #' @param x A `polr` object returned from [MASS::polr()]. #' @template param_confint #' @template param_exponentiate +#' @param p.values Logical. Should p-values be returned, +#' based on chi-squared tests from [MASS::dropterm()]. Defaults to FAlSE #' @template param_unused_dots #' #' @examples @@ -17,18 +19,30 @@ #' glance(fit) #' augment(fit, type.predict = "class") #' +#' fit2 <- polr(factor(gear) ~ am + mpg + qsec, data = mtcars) +#' tidy(fit, p.values = TRUE) +#' #' @evalRd return_tidy(regression = TRUE) #' #' @details In `broom 0.7.0` the `coefficient_type` column was renamed to #' `coef.type`, and the contents were changed as well. Now the contents #' are `coefficient` and `scale`, rather than `coefficient` and `zeta`. +#' +#' Calculating p.values with the `dropterm()` function is the approach +#' suggested by the MASS package author +#' (https://r.789695.n4.nabble.com/p-values-of-plor-td4668100.html). This +#' approach is computationally intensive, so that p.values are only +#' returned if requested explicitly. Additionally, it only works for +#' models containing no variables with more than two categories. If this +#' condition is not met, a message is shown and NA is returned instead of +#' p-values. #' #' @aliases polr_tidiers #' @export #' @seealso [tidy], [MASS::polr()] #' @family ordinal tidiers tidy.polr <- function(x, conf.int = FALSE, conf.level = 0.95, - exponentiate = FALSE, ...) { + exponentiate = FALSE, p.values = FALSE, ...) { ret <- as_tibble(coef(summary(x)), rownames = "term") colnames(ret) <- c("term", "estimate", "std.error", "statistic") @@ -41,6 +55,20 @@ tidy.polr <- function(x, conf.int = FALSE, conf.level = 0.95, if (exponentiate) ret <- exponentiate(ret) + if (p.values) { + sig <- MASS::dropterm(x, test = "Chisq") + p <- sig %>% dplyr::select(`Pr(Chi)`) %>% dplyr::pull() %>% .[-1] + terms <- purrr::map(rownames(sig)[-1], function(x) + ret$term[stringr::str_detect(ret$term, stringr::fixed(x))]) %>% unlist() + if (length(p) == length(terms)) { + ret <- dplyr::left_join(ret, tibble::tibble(term = terms, p.value = p), by = "term") + } else { + message("p-values can presently only be returned for models that contain + no categorical variables with more than two levels") + ret$p.value <- NA + } + } + mutate( ret, coef.type = if_else(term %in% names(x$zeta), "scale", "coefficient") From 0428c7dc519d56249b6f2e7a65b83d910ccb84f4 Mon Sep 17 00:00:00 2001 From: Lukas Wallrich Date: Fri, 24 Apr 2020 13:16:03 +0100 Subject: [PATCH 2/3] Update NEWS.md --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 700781c7d..95cb8de03 100644 --- a/NEWS.md +++ b/NEWS.md @@ -150,6 +150,8 @@ pending getting `safepredict()` going urgh) - `tidy.lsmobj()` gained a `conf.int` argument for consistency with other tidiers. +- `tidy.polr()` now returns p-values if `p.values` is set to TRUE and the model does not contain factors with more than two levels. + - `tidy.zoo()` now doesn't change column names that have spaces or other special characters (previously they were converted to data.frame friendly column names by `make.names`) From 1b5d17854bad4ccf3972a316b5cb754d97adbab8 Mon Sep 17 00:00:00 2001 From: Lukas Wallrich Date: Fri, 24 Apr 2020 13:18:51 +0100 Subject: [PATCH 3/3] Update DESCRIPTION --- DESCRIPTION | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 73542cd37..9787ee397 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -275,7 +275,12 @@ Authors@R: family = "Kuriwaki", role = "ctb", email = "shirokuriwaki@gmail.com", - comment = c(ORCID = "0000-0002-5687-2647"))) + comment = c(ORCID = "0000-0002-5687-2647")), + person(given = "Lukas", + family = "Wallrich", + role = "ctb", + email = "lukas.wallrich@gmail.com", + comment = c(ORCID = "0000-0003-2121-5177"))) Description: Summarizes key information about statistical objects in tidy tibbles. This makes it easy to report results, create plots and consistently work with large numbers of models at once.