diff --git a/DESCRIPTION b/DESCRIPTION index 511b1299a..2dc4faf6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -221,7 +221,11 @@ Authors@R: family = "Gegzna", email = "GegznaV@gmail.com", role = "ctb", - comment = c(ORCID = "0000-0002-9500-5167"))) + comment = c(ORCID = "0000-0002-9500-5167")), + person(given = "Eduard", + family = "Szoecs", + email = "eduardszoecs@gmail.com", + role = "ctb")) Description: Summarizes key information about statistical objects in tidy tibbles. This makes it easy to report results, create plots and consistently work with large numbers of models at once. @@ -327,6 +331,10 @@ Suggests: tseries, xergm, zoo, + modeltests, + leaps, + lm.beta, + drc metafor VignetteBuilder: knitr @@ -353,6 +361,7 @@ Collate: 'car-tidiers.R' 'caret-tidiers.R' 'data-frame-tidiers.R' + 'drc-tidiers.R' 'emmeans-tidiers.R' 'ergm-tidiers.R' 'gam-tidiers.R' diff --git a/NAMESPACE b/NAMESPACE index 4dc755453..96fe2b258 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ S3method(augment,clm) S3method(augment,coxph) S3method(augment,decomposed.ts) S3method(augment,default) +S3method(augment,drc) S3method(augment,factanal) S3method(augment,felm) S3method(augment,glm) @@ -49,6 +50,7 @@ S3method(glance,coxph) S3method(glance,cv.glmnet) S3method(glance,data.frame) S3method(glance,default) +S3method(glance,drc) S3method(glance,durbinWatsonTest) S3method(glance,ergm) S3method(glance,factanal) @@ -131,6 +133,7 @@ S3method(tidy,cv.glmnet) S3method(tidy,default) S3method(tidy,density) S3method(tidy,dist) +S3method(tidy,drc) S3method(tidy,durbinWatsonTest) S3method(tidy,emmGrid) S3method(tidy,ergm) diff --git a/NEWS.md b/NEWS.md index a42e8eddc..9687c337d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -86,6 +86,13 @@ TODO: sort out what happens to `glance.aov()` - Added `tidy.regsubsets()` for best subsets linear regression from the `leaps` package - Added method `tidy.lm.beta()` to tidy `lm.beta` class models (#545 by @mattle24) + +- Add feature for glance.biglm to return df.residual + +- Patch bug in glance.lavaan (#577) + +- Added tidiers for `drc::drm` models (#574 by @edild) + - `tidy.prcomp()` parameter `matrix` gained new options `"scores"`, `"loadings"`, and `"eigenvalues"` (#557 by @GegznaV) - `tidy.kmeans()` now uses the names of the input variables in the output by diff --git a/R/drc-tidiers.R b/R/drc-tidiers.R new file mode 100644 index 000000000..f73ae752c --- /dev/null +++ b/R/drc-tidiers.R @@ -0,0 +1,186 @@ +#' @templateVar class drc +#' @template title_desc_tidy +#' +#' @param x A `drc` object produced by a call to [drc::drm()]. +#' @template param_confint +#' @template param_unused_dots +#' @param quick whether to compute a smaller and faster version, containing +#' only the \code{term}, \code{curveid} and \code{estimate} columns. +#' @evalRd return_tidy( +#' curveid = "Id of the curve", +#' "term", +#' "estimate", +#' "std.error", +#' "statistic", +#' "p.value", +#' "conf.low", +#' "conf.high" +#' ) +#' @details The tibble has one row for each curve and term in the regression. The +#' `curveid` column indicates the curve. +#' @author Eduard Szoecs, \email{eduardszoecs@@gmail.com} +#' @examples +#' library(drc) +#' mod <- drm(dead/total~conc, type, +#' weights = total, data = selenium, fct = LL.2(), type = "binomial") +#' mod +#' +#' tidy(mod) +#' tidy(mod, conf.int = TRUE) +#' tidy(mod, quick = TRUE) + +#' glance(mod) + +#' # augment(mod) +#' @export +#' @seealso [tidy()], [drc::drm()] +#' @family drc tidiers +#' @aliases drc_tidiers +tidy.drc <- function(x, conf.int = FALSE, conf.level = 0.95, quick = FALSE, ...) { + if (quick) { + co <- coef(x) + nam <- names(co) + term <- gsub("^(.*):(.*)$", "\\1", nam) + curves <- x[["dataList"]][["curveid"]] + if (length(unique(curves)) > 1) { + curveid <- gsub("^(.*):(.*)$", "\\2", nam) + } else { + curveid <- unique(curves) + } + ret <- tibble(term = term, + curveid = curveid, + estimate = unname(co)) + return(ret) + } + + co <- coef(summary(x)) + + nam <- rownames(co) + term <- gsub("^(.*):(.*)$", "\\1", nam) + curves <- x[["dataList"]][["curveid"]] + if (length(unique(curves)) > 1) { + curveid <- gsub("^(.*):(.*)$", "\\2", nam) + } else { + curveid <- unique(curves) + } + ret <- data.frame(term = term, + curveid = curveid, + co, stringsAsFactors = FALSE) + names(ret) <- c("term", "curveid", "estimate", "std.error", "statistic", + "p.value") + rownames(ret) <- NULL + + if (conf.int) { + conf <- confint(x, level = conf.level) + colnames(conf) <- c("conf.low", "conf.high") + rownames(conf) <- NULL + ret <- cbind(ret, conf) + } + + return(as_tibble(ret)) +} + +#' @templateVar class drc +#' @template title_desc_glance +#' +#' @inherit tidy.drc params examples +#' @template param_unused_dots +#' +#' @evalRd return_glance( +#' "logLik", +#' "AIC", +#' "AICc" = "AIC corrected for small samples", +#' "BIC", +#' "df.residual" +#' ) +#' @seealso [glance()], [drc::drm()] +#' @export +#' @family drc tidiers +glance.drc <- function(x, ...) { + ret <- data.frame(AIC = AIC(x), + BIC = BIC(x), + logLik = logLik(x), + df.residual = x$df.residual) + return(as_tibble(ret)) +} + +#' @templateVar class drc +#' @template title_desc_augment + +#' @inherit tidy.drc params examples +#' @template param_data +#' @template param_newdata +#' @template param_confint +#' @template param_se_fit +#' @template param_unused_dots +#' +#' @evalRd return_augment(".conf.low" = "Lower Confidence Interval", +#' ".conf.high" = "Upper Confidence Interval", +#' ".se.fit", +#' ".fitted", +#' ".resid", +#' ".cooksd") +#' +#' @seealso [augment()], [drc::drm()] +#' @export +#' @author Eduard Szoecs, \email{eduardszoecs@@gmail.com} +#' @family drc tidiers +augment.drc <- function(x, data = NULL, newdata = NULL, + se_fit = FALSE, conf.int = FALSE, conf.level = 0.95, ...) { + + if (is.null(data) && is.null(newdata)) { + stop("Must specify either `data` or `newdata` argument.", call. = FALSE) + } + + # drc doesn't like tibbles + if (inherits(newdata, "tbl")) { + newdata <- data.frame(newdata) + } + + # drc doesn't like NA in the type + if (!missing(newdata) || is.null(newdata)) { + original <- newdata + original$.rownames <- rownames(original) + } + + if (!missing(newdata) && x$curveVarNam %in% names(newdata) && + any(is.na(newdata[[x$curveVarNam]]))) { + newdata <- newdata[!is.na(newdata[[x$curveVarNam]]), ] + } + + ret <- augment_columns(x, data, newdata, se.fit = FALSE) + + if (!is.null(newdata)) { + if (conf.int) { + preds <- data.frame(predict(x, newdata = newdata, interval = "confidence", + level = conf.level)) + ret[[".conf.low"]] <- preds[["Lower"]] + ret[[".conf.high"]] <- preds[["Upper"]] + } + if (se_fit) { + preds <- data.frame(predict(x, newdata = newdata, se.fit = TRUE)) + ret[[".se.fit"]] <- preds[["SE"]] + } + } + + # join back removed rows + if (!".rownames" %in% names(ret)) { + ret$.rownames <- rownames(ret) + } + + if (!is.null(original)) { + reto <- ret %>% select(starts_with(".")) + ret <- merge(reto, original, by = ".rownames", all.y = TRUE) + } + + # reorder to line up with original + ret <- ret[order(match(ret$.rownames, rownames(original))), ] + rownames(ret) <- NULL + + # if rownames are just the original 1...n, they can be removed + if (all(ret$.rownames == seq_along(ret$.rownames))) { + ret$.rownames <- NULL + } + + as_tibble(ret) +} \ No newline at end of file diff --git a/man/augment.drc.Rd b/man/augment.drc.Rd new file mode 100644 index 000000000..03ad6f25e --- /dev/null +++ b/man/augment.drc.Rd @@ -0,0 +1,117 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/drc-tidiers.R +\name{augment.drc} +\alias{augment.drc} +\title{Augment data with information from a(n) drc object} +\usage{ +\method{augment}{drc}(x, data = NULL, newdata = NULL, se_fit = FALSE, + conf.int = FALSE, conf.level = 0.95, ...) +} +\arguments{ +\item{x}{A \code{drc} object produced by a call to \code{\link[drc:drm]{drc::drm()}}.} + +\item{data}{A \code{\link[=data.frame]{data.frame()}} or \code{\link[tibble:tibble]{tibble::tibble()}} containing the original +data that was used to produce the object \code{x}. Defaults to +\code{stats::model.frame(x)} so that \code{augment(my_fit)} returns the augmented +original data. \strong{Do not} pass new data to the \code{data} argument. +Augment will report information such as influence and cooks distance for +data passed to the \code{data} argument. These measures are only defined for +the original training data.} + +\item{newdata}{A \code{\link[=data.frame]{data.frame()}} or \code{\link[tibble:tibble]{tibble::tibble()}} containing all +the original predictors used to create \code{x}. Defaults to \code{NULL}, indicating +that nothing has been passed to \code{newdata}. If \code{newdata} is specified, +the \code{data} argument will be ignored.} + +\item{se_fit}{Logical indicating whether or not a \code{.se.fit} column should be +added to the augmented output. For some models, this calculation can be +somwhat time-consuming. Defaults to \code{FALSE}.} + +\item{conf.int}{Logical indicating whether or not to include a confidence +interval in the tidied output. Defaults to \code{FALSE}.} + +\item{conf.level}{The confidence level to use for the confidence interval +if \code{conf.int = TRUE}. Must be strictly greater than 0 and less than 1. +Defaults to 0.95, which corresponds to a 95 percent confidence interval.} + +\item{...}{Additional arguments. Not used. Needed to match generic +signature only. \strong{Cautionary note:} Misspelled arguments will be +absorbed in \code{...}, where they will be ignored. If the misspelled +argument has a default value, the default value will be used. +For example, if you pass \code{conf.lvel = 0.9}, all computation will +proceed using \code{conf.level = 0.95}. Additionally, if you pass +\code{newdata = my_tibble} to an \code{\link[=augment]{augment()}} method that does not +accept a \code{newdata} argument, it will use the default value for +the \code{data} argument.} +} +\description{ +Augment accepts a model object and a dataset and adds +information about each observation in the dataset. Most commonly, this +includes predicted values in the \code{.fitted} column, residuals in the +\code{.resid} column, and standard errors for the fitted values in a \code{.se.fit} +column. New columns always begin with a \code{.} prefix to avoid overwriting +columns in the original dataset. + +Users may pass data to augment via either the \code{data} argument or the +\code{newdata} argument. If the user passes data to the \code{data} argument, +it \strong{must} be exactly the data that was used to fit the model +object. Pass datasets to \code{newdata} to augment data that was not used +during model fitting. This still requires that all columns used to fit +the model are present. + +Augment will often behavior different depending on whether \code{data} or +\code{newdata} is specified. This is because there is often information +associated with training observations (such as influences or related) +measures that is not meaningfully defined for new observations. + +For convenience, many augment methods provide default \code{data} arguments, +so that \code{augment(fit)} will return the augmented training data. In these +cases augment tries to reconstruct the original data based on the model +object, with some varying degrees of success. + +The augmented dataset is always returned as a \link[tibble:tibble]{tibble::tibble} with the +\strong{same number of rows} as the passed dataset. This means that the +passed data must be coercible to a tibble. At this time, tibbles do not +support matrix-columns. This means you should not specify a matrix +of covariates in a model formula during the original model fitting +process, and that \code{\link[splines:ns]{splines::ns()}}, \code{\link[stats:poly]{stats::poly()}} and +\code{\link[survival:Surv]{survival::Surv()}} objects are not supported in input data. If you +encounter errors, try explicitly passing a tibble, or fitting the original +model on data in a tibble. + +We are in the process of defining behaviors for models fit with various +\link{na.action} arguments, but make no guarantees about behavior when data is +missing at this time. +} +\examples{ +library(drc) +mod <- drm(dead/total~conc, type, + weights = total, data = selenium, fct = LL.2(), type = "binomial") +mod + +tidy(mod) +tidy(mod, conf.int = TRUE) +tidy(mod, quick = TRUE) +glance(mod) +# augment(mod) +} +\seealso{ +\code{\link[=augment]{augment()}}, \code{\link[drc:drm]{drc::drm()}} + +Other drc tidiers: \code{\link{glance.drc}}, + \code{\link{tidy.drc}} +} +\author{ +Eduard Szoecs, \email{eduardszoecs@gmail.com} +} +\concept{drc tidiers} +\value{ +A \code{\link[tibble:tibble]{tibble::tibble()}} with columns: + \item{.cooksd}{Cooks distance.} + \item{.fitted}{Fitted or predicted value.} + \item{.resid}{The difference between fitted and observed values.} + \item{.se.fit}{Standard errors of fitted values.} + \item{.conf.low}{Lower Confidence Interval} + \item{.conf.high}{Upper Confidence Interval} + +} diff --git a/man/broom.Rd b/man/broom.Rd index aea95d46f..61c985c7f 100644 --- a/man/broom.Rd +++ b/man/broom.Rd @@ -79,6 +79,7 @@ Other contributors: \item Jason Muhlenkamp \email{jason.muhlenkamp@gmail.com} [contributor] \item Matt Lehman [contributor] \item Bill Denney \email{wdenney@humanpredictions.com} (0000-0002-5759-428X) [contributor] + \item Eduard Szoecs \email{eduardszoecs@gmail.com} [contributor] \item Nic Crane [contributor] \item Andrew Bates [contributor] \item Vincent Arel-Bundock \email{vincent.arel-bundock@umontreal.ca} (0000-0003-2042-7063) [contributor] diff --git a/man/glance.drc.Rd b/man/glance.drc.Rd new file mode 100644 index 000000000..a1d541e3c --- /dev/null +++ b/man/glance.drc.Rd @@ -0,0 +1,64 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/drc-tidiers.R +\name{glance.drc} +\alias{glance.drc} +\title{Glance at a(n) drc object} +\usage{ +\method{glance}{drc}(x, ...) +} +\arguments{ +\item{x}{A \code{drc} object produced by a call to \code{\link[drc:drm]{drc::drm()}}.} + +\item{...}{Additional arguments. Not used. Needed to match generic +signature only. \strong{Cautionary note:} Misspelled arguments will be +absorbed in \code{...}, where they will be ignored. If the misspelled +argument has a default value, the default value will be used. +For example, if you pass \code{conf.lvel = 0.9}, all computation will +proceed using \code{conf.level = 0.95}. Additionally, if you pass +\code{newdata = my_tibble} to an \code{\link[=augment]{augment()}} method that does not +accept a \code{newdata} argument, it will use the default value for +the \code{data} argument.} +} +\description{ +Glance accepts a model object and returns a \code{\link[tibble:tibble]{tibble::tibble()}} +with exactly one row of model summaries. The summaries are typically +goodness of fit measures, p-values for hypothesis tests on residuals, +or model convergence information. + +Glance never returns information from the original call to the modelling +function. This includes the name of the modelling function or any +arguments passed to the modelling function. + +Glance does not calculate summary measures. Rather, it farms out these +computations to appropriate methods and gathers the results together. +Sometimes a goodness of fit measure will be undefined. In these cases +the measure will be reported as \code{NA}. +} +\examples{ +library(drc) +mod <- drm(dead/total~conc, type, + weights = total, data = selenium, fct = LL.2(), type = "binomial") +mod + +tidy(mod) +tidy(mod, conf.int = TRUE) +tidy(mod, quick = TRUE) +glance(mod) +# augment(mod) +} +\seealso{ +\code{\link[=glance]{glance()}}, \code{\link[drc:drm]{drc::drm()}} + +Other drc tidiers: \code{\link{augment.drc}}, + \code{\link{tidy.drc}} +} +\concept{drc tidiers} +\value{ +A \code{\link[tibble:tibble]{tibble::tibble()}} with exactly one row and columns: + \item{AIC}{Akaike's Information Criterion for the model.} + \item{BIC}{Bayesian Information Criterion for the model.} + \item{df.residual}{Residual degrees of freedom.} + \item{logLik}{The log-likelihood of the model. [stats::logLik()] may be a useful reference.} + \item{AICc}{AIC corrected for small samples} + +} diff --git a/man/tidy.drc.Rd b/man/tidy.drc.Rd new file mode 100644 index 000000000..1d067924f --- /dev/null +++ b/man/tidy.drc.Rd @@ -0,0 +1,79 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/drc-tidiers.R +\name{tidy.drc} +\alias{tidy.drc} +\alias{drc_tidiers} +\title{Tidy a(n) drc object} +\usage{ +\method{tidy}{drc}(x, conf.int = FALSE, conf.level = 0.95, + quick = FALSE, ...) +} +\arguments{ +\item{x}{A \code{drc} object produced by a call to \code{\link[drc:drm]{drc::drm()}}.} + +\item{conf.int}{Logical indicating whether or not to include a confidence +interval in the tidied output. Defaults to \code{FALSE}.} + +\item{conf.level}{The confidence level to use for the confidence interval +if \code{conf.int = TRUE}. Must be strictly greater than 0 and less than 1. +Defaults to 0.95, which corresponds to a 95 percent confidence interval.} + +\item{quick}{whether to compute a smaller and faster version, containing +only the \code{term}, \code{curveid} and \code{estimate} columns.} + +\item{...}{Additional arguments. Not used. Needed to match generic +signature only. \strong{Cautionary note:} Misspelled arguments will be +absorbed in \code{...}, where they will be ignored. If the misspelled +argument has a default value, the default value will be used. +For example, if you pass \code{conf.lvel = 0.9}, all computation will +proceed using \code{conf.level = 0.95}. Additionally, if you pass +\code{newdata = my_tibble} to an \code{\link[=augment]{augment()}} method that does not +accept a \code{newdata} argument, it will use the default value for +the \code{data} argument.} +} +\description{ +Tidy summarizes information about the components of a model. +A model component might be a single term in a regression, a single +hypothesis, a cluster, or a class. Exactly what tidy considers to be a +model component varies cross models but is usually self-evident. +If a model has several distinct types of components, you will need to +specify which components to return. +} +\details{ +The tibble has one row for each curve and term in the regression. The +\code{curveid} column indicates the curve. +} +\examples{ +library(drc) +mod <- drm(dead/total~conc, type, + weights = total, data = selenium, fct = LL.2(), type = "binomial") +mod + +tidy(mod) +tidy(mod, conf.int = TRUE) +tidy(mod, quick = TRUE) +glance(mod) +# augment(mod) +} +\seealso{ +\code{\link[=tidy]{tidy()}}, \code{\link[drc:drm]{drc::drm()}} + +Other drc tidiers: \code{\link{augment.drc}}, + \code{\link{glance.drc}} +} +\author{ +Eduard Szoecs, \email{eduardszoecs@gmail.com} +} +\concept{drc tidiers} +\value{ +A \code{\link[tibble:tibble]{tibble::tibble()}} with columns: + \item{conf.high}{Upper bound on the confidence interval for the estimate.} + \item{conf.low}{Lower bound on the confidence interval for the estimate.} + \item{estimate}{The estimated value of the regression term.} + \item{p.value}{The two-sided p-value associated with the observed statistic.} + \item{statistic}{The value of a T-statistic to use in a hypothesis that the regression term is non-zero.} + \item{std.error}{The standard error of the regression term.} + \item{term}{The name of the regression term.} + \item{curveid}{Id of the curve} + +} diff --git a/tests/testthat/test-drc.R b/tests/testthat/test-drc.R new file mode 100644 index 000000000..e5cc42432 --- /dev/null +++ b/tests/testthat/test-drc.R @@ -0,0 +1,84 @@ +context("drc") + +skip_if_not_installed("modeltests") +library(modeltests) + +skip_if_not_installed("drc") +library(drc) + +mod <- drm(dead/total~conc, type, + weights = total, data = selenium, fct = LL.2(), type = "binomial") + +test_that("drc tidier arguments", { + check_arguments(tidy.drc) + check_arguments(glance.drc) + check_arguments(augment.drc, strict = FALSE) + # Arguments conf.int, conf.level to `augment.drc` must be listed in the argument glossary. +}) + +test_that("tidy.drc", { + + td1 <- tidy(mod) + td2 <- tidy(mod, quick = TRUE) + td3 <- tidy(mod, robust = TRUE) + + check_tidy_output(td1, strict = FALSE) + check_tidy_output(td2, strict = FALSE) + check_tidy_output(td3, strict = FALSE) +}) + +test_that("glance.drc", { + + gl1 <- glance(mod) + + check_glance_outputs(gl1, strict = FALSE) +}) + +test_that("augment.drc", { + + expect_error( + augment(mod), + regexp = "Must specify either `data` or `newdata` argument." + ) + + check_augment_function( + augment.drc, + mod, + data = selenium, + newdata = selenium) +}) + + +mod2 <- drm(rootl ~ conc, data = ryegrass, fct = W2.4()) + +test_that("tidy.drc", { + + td1 <- tidy(mod2) + td2 <- tidy(mod2, quick = TRUE) + td3 <- tidy(mod2, robust = TRUE) + + check_tidy_output(td1, strict = FALSE) + check_tidy_output(td2, strict = FALSE) + check_tidy_output(td3, strict = FALSE) +}) + +test_that("glance.drc", { + + gl2 <- glance(mod2) + + check_glance_outputs(gl2, strict = FALSE) +}) + +test_that("augment.drc", { + + expect_error( + augment(mod2), + regexp = "Must specify either `data` or `newdata` argument." + ) + + check_augment_function( + augment.drc, + mod2, + data = ryegrass, + newdata = ryegrass) +}) \ No newline at end of file