Skip to content

Commit

Permalink
Add ordinal logistic regression. Close #130.
Browse files Browse the repository at this point in the history
  • Loading branch information
eheinzen committed Aug 29, 2018
1 parent 0976193 commit 39eee61
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 21 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Expand Up @@ -11,6 +11,8 @@

* `modelsum()`: Fix formatting of error about unsupported families.

* `modelsum()`: Add `family="ordinal"` to do ordinal logistic regression using `MASS::polr()`. (#130)

# arsenal v1.3.0

**This is a mostly backwards-compatible update.**
Expand Down
10 changes: 6 additions & 4 deletions R/as.data.frame.modelsum.R
Expand Up @@ -4,18 +4,20 @@ get_the_estimate <- function(fitList, cntrl)
# even if the model doesn't have an intercept, that's okay
labs <- c("(Intercept)", fitList$label, fitList$adjlabels)
names(labs) <- c("(Intercept)", fitList$xterms, fitList$adjterms)
trms <- fitList$coeff$term

out <- data.frame(
term = fitList$coeff$term,
label = labs[fitList$coeff$term],
term.type = ifelse(fitList$coeff$term == "(Intercept)", "Intercept", ifelse(fitList$coeff$term %in% fitList$adjterms, "Adjuster", "Term")),
term = trms,
label = ifelse(trms %in% names(labs), labs[trms], trms),
term.type = ifelse(trms %in% fitList$adjterms, "Adjuster",
ifelse(trms %in% fitList$xterms, "Term", "Intercept")),
stringsAsFactors = FALSE
)

statFields <- switch(fitList$family,
quasibinomial = cntrl$binomial.stats, binomial = cntrl$binomial.stats,
quasipoisson = cntrl$poisson.stats, poisson = cntrl$poisson.stats,
survival = cntrl$survival.stats,
survival = cntrl$survival.stats, ordinal = cntrl$ordinal.stats,
cntrl$gaussian.stats)

if(any(names(fitList$coeff) %in% statFields)) out <- cbind(out, fitList$coeff[, intersect(statFields, names(fitList$coeff)), drop = FALSE])
Expand Down
32 changes: 28 additions & 4 deletions R/modelsum.R
Expand Up @@ -74,9 +74,17 @@ modelsum <- function(formula, family="gaussian", data, adjust=NULL, na.action =
## Allow family parameter to passed with or without quotes
## exception is survival, would require public function named survival.
## Here, we force quotes to simplify in for loop below
if (is.function(family)) family <- family()$family
if(is.function(family) || is.character(family))
{
family.list <- match.fun(family)()
family <- family.list$family
} else
{
family.list <- family
family <- family$family
}

if(family %nin% c("survival", "gaussian", "binomial", "poisson", "quasibinomial", "quasipoisson"))
if(family %nin% c("survival", "gaussian", "binomial", "poisson", "quasibinomial", "quasipoisson", "ordinal"))
stop("Family ", family, " not supported.\n")

if(family != "survival" && any(grepl("Surv\\(", formula))) {
Expand Down Expand Up @@ -146,8 +154,18 @@ modelsum <- function(formula, family="gaussian", data, adjust=NULL, na.action =

## placeholder for ordered, don't do any fitting
## y is ordered factor
if (family == "ordered") {
stop("family == 'ordered' isn't implemented yet")
if (family == "ordinal") {
temp.call[[1]] <- quote(MASS::polr)
temp.call$Hess <- TRUE
temp.call$method <- family.list$method
fit <- eval(temp.call, parent.frame())
coeffORTidy <- broom::tidy(fit, exponentiate=TRUE, conf.int=TRUE, conf.level=control$conf.level)
coeffORTidy[coeffORTidy$coefficient_type == "zeta", names(coeffORTidy) %nin% c("term", "coefficient_type")] <- NA
coeffTidy <- broom::tidy(fit, exponentiate=FALSE, conf.int=TRUE, conf.level=control$conf.level)
coeffTidy$p.value <- 2*pnorm(abs(coeffTidy$statistic), lower.tail = FALSE)
coeffTidy <- cbind(coeffTidy, OR=coeffORTidy$estimate, CI.lower.OR=coeffORTidy$conf.low, CI.upper.OR=coeffORTidy$conf.high)
modelGlance <- broom::glance(fit)

} else if (family == "gaussian") {
# ## issue warning if appears categorical
if(length(unique(maindf[[1]])) <= 5) {
Expand Down Expand Up @@ -263,6 +281,12 @@ modelsum <- function(formula, family="gaussian", data, adjust=NULL, na.action =
## keep as private function
survival <- function() list(family="survival")

ordinal <- function(method = c("logistic", "probit", "loglog", "cloglog", "cauchit"))
{
list(family = "ordinal", method = match.arg(method))
}


#' @rdname modelsum
#' @export
print.modelsum <- function(x, ...) {
Expand Down
43 changes: 33 additions & 10 deletions R/modelsum.control.R
Expand Up @@ -13,7 +13,7 @@
#' @param show.adjust Logical, denoting whether to show adjustment terms.
#' @param show.intercept Logical, denoting whether to show intercept terms.
#' @param conf.level Numeric, giving the confidence level.
#' @param binomial.stats,survival.stats,gaussian.stats,poisson.stats
#' @param ordinal.stats,binomial.stats,survival.stats,gaussian.stats,poisson.stats
#' Character vectors denoting which stats to show for the various model types.
#' @param stat.labels A named list of labels for all the stats used above.
#' @param ... Other arguments (not in use at this time).
Expand All @@ -25,14 +25,16 @@
#' is less than the resulting number of places, it will be formatted to show so.
#' @seealso \code{\link{modelsum}}, \code{\link{summary.modelsum}}
#' @export
modelsum.control <- function(digits = 3L, digits.ratio = 3L, digits.p = 3L, format.p = TRUE,
show.adjust = TRUE, show.intercept = TRUE, conf.level = 0.95,
binomial.stats=c("OR","CI.lower.OR","CI.upper.OR","p.value", "concordance","Nmiss"),
gaussian.stats=c("estimate","std.error","p.value","adj.r.squared","Nmiss"),
poisson.stats=c("RR","CI.lower.RR", "CI.upper.RR","p.value","concordance","Nmiss"),
survival.stats=c("HR","CI.lower.HR","CI.upper.HR","p.value","concordance","Nmiss"),
stat.labels = list(), ...)
{
modelsum.control <- function(
digits = 3L, digits.ratio = 3L, digits.p = 3L, format.p = TRUE,
show.adjust = TRUE, show.intercept = TRUE, conf.level = 0.95,
ordinal.stats=c("OR","CI.lower.OR","CI.upper.OR", "p.value","Nmiss"),
binomial.stats=c("OR","CI.lower.OR","CI.upper.OR","p.value", "concordance","Nmiss"),
gaussian.stats=c("estimate","std.error","p.value","adj.r.squared","Nmiss"),
poisson.stats=c("RR","CI.lower.RR", "CI.upper.RR","p.value","concordance","Nmiss"),
survival.stats=c("HR","CI.lower.HR","CI.upper.HR","p.value","concordance","Nmiss"),
stat.labels = list(), ...
) {

if("nsmall" %in% names(list(...))) .Deprecated(msg = "Using 'nsmall = ' is deprecated. Use 'digits = ' instead.")
if("nsmall.ratio" %in% names(list(...))) .Deprecated(msg = "Using 'nsmall.ratio = ' is deprecated. Use 'digits.ratio = ' instead.")
Expand Down Expand Up @@ -60,6 +62,27 @@ modelsum.control <- function(digits = 3L, digits.ratio = 3L, digits.p = 3L, form
conf.level <- 0.95
}

##########################
## Ordinal stats:
##########################

ordinal.stats.valid <- c(
"Nmiss", "OR", "CI.lower.OR", "CI.upper.OR", "p.value", # default
"estimate", "CI.OR", "CI.estimate", "CI.lower.estimate", "CI.upper.estimate", "N", "Nmiss2", "endpoint", "std.error", "statistic",
"logLik", "AIC", "BIC", "edf", "deviance", "df.residual"
)

if(any(ordinal.stats %nin% ordinal.stats.valid)) {
stop("Invalid binomial stats: ",
paste(ordinal.stats[ordinal.stats %nin% ordinal.stats.valid],collapse=","), "\n")
}
## let CI.OR decode to CI.lower.OR and CI.upper.OR
if(any(ordinal.stats == "CI.OR")) {
ordinal.stats <- unique(c(ordinal.stats[ordinal.stats != "CI.OR"], "CI.lower.OR", "CI.upper.OR"))
}
if(any(ordinal.stats == "CI.estimate")) {
ordinal.stats <- unique(c(ordinal.stats[ordinal.stats != "CI.estimate"], "CI.lower.estimate", "CI.upper.estimate"))
}

##########################
## Binomial stats:
Expand Down Expand Up @@ -155,6 +178,6 @@ modelsum.control <- function(digits = 3L, digits.ratio = 3L, digits.p = 3L, form
}
list(digits=digits, digits.ratio=digits.ratio, digits.p = digits.p, format.p = format.p,
show.adjust=show.adjust, show.intercept=show.intercept, conf.level=conf.level,
binomial.stats=binomial.stats, gaussian.stats=gaussian.stats,
ordinal.stats=ordinal.stats, binomial.stats=binomial.stats, gaussian.stats=gaussian.stats,
poisson.stats=poisson.stats, survival.stats=survival.stats, stat.labels = stat.labels)
}
2 changes: 1 addition & 1 deletion R/summary.modelsum.R
Expand Up @@ -51,7 +51,7 @@ as.data.frame.summary.modelsum <- function(x, ..., text = x$text, term.name = x$
# non-integers, one-per-model
use.digits1 <- c("logLik", "AIC", "BIC", "null.deviance", "deviance",
"statistic.F", "dispersion", "statistic.sc", "concordance", "std.error.concordance",
"adj.r.squared", "r.squared")
"adj.r.squared", "r.squared", "edf")

# non-integers, many-per-model
use.digits2 <- c("estimate", "CI.lower.estimate", "CI.upper.estimate", "std.error", "statistic", "standard.estimate")
Expand Down
5 changes: 3 additions & 2 deletions man/modelsum.control.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 46 additions & 0 deletions tests/testthat/test_modelsum.R
Expand Up @@ -151,6 +151,52 @@ test_that("'weights=' works", {
})


test_that("OLR works", {
if(require(MASS))
{
data(housing)
expect_identical(
capture.kable(summary(modelsum(Sat ~ Infl, adjust = ~ Type + Cont, weights = Freq, data = housing, family = "ordinal"))),
c("| |OR |CI.lower.OR |CI.upper.OR |p.value |",
"|:------------------|:-----|:-----------|:-----------|:-------|",
"|**Cont High** |1.434 |1.189 |1.730 |< 0.001 |",
"|**Infl High** |3.628 |2.832 |4.663 |< 0.001 |",
"|**Infl Medium** |1.762 |1.436 |2.164 |< 0.001 |",
"|Low&#124;Medium |NA |NA |NA |< 0.001 |",
"|Medium&#124;High |NA |NA |NA |< 0.001 |",
"|**Type Apartment** |0.564 |0.446 |0.712 |< 0.001 |",
"|**Type Atrium** |0.693 |0.511 |0.940 |0.018 |",
"|**Type Terrace** |0.336 |0.249 |0.451 |< 0.001 |"
)
)
expect_identical(
capture.kable(summary(modelsum(Sat ~ Infl, adjust = ~ Type + Cont, weights = Freq, data = housing, family = "ordinal",
ordinal.stats = c("estimate", "statistic", "p.value")), text = TRUE)),
c("| |estimate |statistic |p.value |",
"|:----------------|:--------|:---------|:-------|",
"|Cont High |0.360 |3.771 |< 0.001 |",
"|Infl High |1.289 |10.136 |< 0.001 |",
"|Infl Medium |0.566 |5.412 |< 0.001 |",
"|Low&#124;Medium |-0.496 |-3.974 |< 0.001 |",
"|Medium&#124;High |0.691 |5.505 |< 0.001 |",
"|Type Apartment |-0.572 |-4.800 |< 0.001 |",
"|Type Atrium |-0.366 |-2.360 |0.018 |",
"|Type Terrace |-1.091 |-7.202 |< 0.001 |"
)
)
expect_identical(
capture.kable(summary(modelsum(Sat ~ Infl, adjust = ~ Type + Cont, weights = Freq, data = housing, family = "ordinal",
show.adjust = FALSE, show.intercept = FALSE), text = TRUE)),
c("| |OR |CI.lower.OR |CI.upper.OR |p.value |",
"|:-----------|:-----|:-----------|:-----------|:-------|",
"|Infl High |3.628 |2.832 |4.663 |< 0.001 |",
"|Infl Medium |1.762 |1.436 |2.164 |< 0.001 |"
)
)
} else skip("'MASS' is not available")
})


###########################################################################################################
#### Reported bugs for modelsum
###########################################################################################################
Expand Down

0 comments on commit 39eee61

Please sign in to comment.