diff --git a/DESCRIPTION b/DESCRIPTION index bb17379be..97495a992 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,7 +23,8 @@ Authors@R: c( person("Lukasz", "Komsta", email = "lukasz.komsta@umlub.pl", role = "ctb"), person("Frederick", "Novometsky", role = "ctb"), person("Wilson", "Freitas", role = "ctb"), - person("Jason Cory", "Brunson", email = "cornelioid@gmail.com", role = "ctb")) + person("Jason Cory", "Brunson", email = "cornelioid@gmail.com", role = "ctb"), + person("Simon", "Jackson", email = "drsimonjackson@gmail.com", role = "ctb")) Maintainer: David Robinson Description: Convert statistical analysis objects from R into tidy data frames, so that they can more easily be combined, reshaped and otherwise processed diff --git a/NAMESPACE b/NAMESPACE index 69e5dd34e..52b712f79 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,6 +8,7 @@ S3method(augment,coxph) S3method(augment,data.frame) S3method(augment,decomposed.ts) S3method(augment,default) +S3method(augment,factanal) S3method(augment,felm) S3method(augment,garch) S3method(augment,glmRob) @@ -51,6 +52,7 @@ S3method(glance,cv.glmnet) S3method(glance,data.frame) S3method(glance,default) S3method(glance,ergm) +S3method(glance,factanal) S3method(glance,felm) S3method(glance,fitdistr) S3method(glance,gam) @@ -137,6 +139,7 @@ S3method(tidy,dgTMatrix) S3method(tidy,dist) S3method(tidy,emmGrid) S3method(tidy,ergm) +S3method(tidy,factanal) S3method(tidy,felm) S3method(tidy,fitdistr) S3method(tidy,ftable) diff --git a/R/factanal_tidiers.R b/R/factanal_tidiers.R new file mode 100644 index 000000000..a1ccaa29f --- /dev/null +++ b/R/factanal_tidiers.R @@ -0,0 +1,131 @@ +#' Tidying methods for a factor analysis +#' +#' These methods tidy the factor loadings of a factor analysis, conducted via +#' \code{\link{factanal}}, into a summary, augment the original data with factor +#' scores, and construct a one-row glance of the model's statistics. +#' +#' @return All tidying methods return a \code{\link[tibble]{tibble}} without +#' rownames. +#' +#' @name factanal_tidiers +#' +#' @param x \code{\link{factanal}} object +#' @param data Original data +#' @param ... Additional arguments, not used +#' +#' @examples +#' +#' mod <- factanal(mtcars, 3, scores = "regression") +#' +#' glance(mod) +#' tidy(mod) +#' augment(mod) +#' augment(mod, mtcars) # Must include original data if desired +#' +NULL + +#' @rdname factanal_tidiers +#' +#' @return \code{tidy.factanal} returns one row for each variable used in the +#' analysis and the following columns: +#' \item{variable}{The variable being estimated in the factor analysis} +#' \item{uniqueness}{Proportion of residual, or unexplained variance} +#' \item{flX}{Factor loading of term on factor X. There will be as many columns +#' of this format as there were factors fitted.} +#' +#' @export +tidy.factanal <- function(x, ...) { + # Convert to format that we can work with + loadings <- stats::loadings(x) + class(loadings) <- "matrix" + + # Place relevant values into a tidy data frame + tidy_df <- data.frame(variable = rownames(loadings), + uniqueness = x$uniquenesses, + data.frame(loadings)) %>% + as_tibble() + + tidy_df$variable <- as.character(tidy_df$variable) + + # Remove row names and clean column names + rownames(tidy_df) <- NULL + colnames(tidy_df) <- gsub("Factor", "fl", colnames(tidy_df)) + + tidy_df +} + +#' @rdname factanal_tidiers +#' +#' @return When \code{data} is not supplied \code{augment.factanal} returns one +#' row for each observation, with a factor score column added for each factor +#' X, (\code{.fsX}). This is because \code{\link{factanal}}, unlike other +#' stats methods like \code{\link{lm}}, does not retain the original data. +#' +#' When \code{data} is supplied, \code{augment.factanal} returns one row for +#' each observation, with a factor score column added for each factor X, +#' (\code{.fsX}). +#' +#' @export +augment.factanal <- function(x, data, ...) { + scores <- x$scores + + # Check scores were computed + if (is.null(scores)) { + stop("Factor scores were not computed. Change the `scores` argument in factanal().") + } + + # Place relevant values into a tidy data frame + tidy_df <- data.frame(.rowname = rownames(scores), data.frame(scores)) %>% as_tibble() + tidy_df$.rowname <- as.character(tidy_df$.rowname) + + # Remove row names and clean column names + rownames(tidy_df) <- NULL + colnames(tidy_df) <- gsub("Factor", ".fs", colnames(tidy_df)) + + # Check if original data provided + if (missing(data)) { + return(tidy_df) + } + + # Bind to data + data$.rowname <- rownames(data) + tidy_df <- tidy_df %>% right_join(data, by = ".rowname") + + tidy_df %>% select(.rowname, everything(), + -matches("\\.fs[0-9]*"), matches("\\.fs[0-9]*")) +} + +#' @rdname factanal_tidiers +#' +#' @return \code{glance.factanal} returns a one-row data.frame with the columns: +#' \item{n.factors}{The number of fitted factors} +#' \item{total.variance}{Total cumulative proportion of variance accounted for by all factors} +#' \item{statistic}{Significance-test statistic} +#' \item{p.value}{p-value from the significance test, describing whether the +#' covariance matrix estimated from the factors is significantly different +#' from the observed covariance matrix} +#' \item{df}{Degrees of freedom used by the factor analysis} +#' \item{n}{Sample size used in the analysis} +#' \item{method}{The estimation method; always Maximum Likelihood, "mle"} +#' \item{converged}{Whether the factor analysis converged} +#' +#' @export +glance.factanal <- function(x, ...) { + + # Compute total variance accounted for by all factors + loadings <- stats::loadings(x) + class(loadings) <- "matrix" + total.variance <- sum(apply(loadings, 2, function(i) sum(i^2) / length(i))) + + # Results as single-row data frame + data_frame( + n.factors = x$factors, + total.variance = total.variance, + statistic = unname(x$STATISTIC), + p.value = unname(x$PVAL), + df = x$dof, + n = x$n.obs, + method = x$method, + converged = x$converged + ) +} diff --git a/man/factanal_tidiers.Rd b/man/factanal_tidiers.Rd new file mode 100644 index 000000000..3f067ad9d --- /dev/null +++ b/man/factanal_tidiers.Rd @@ -0,0 +1,69 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/factanal_tidiers.R +\name{factanal_tidiers} +\alias{factanal_tidiers} +\alias{tidy.factanal} +\alias{augment.factanal} +\alias{glance.factanal} +\title{Tidying methods for a factor analysis} +\usage{ +\method{tidy}{factanal}(x, ...) + +\method{augment}{factanal}(x, data, ...) + +\method{glance}{factanal}(x, ...) +} +\arguments{ +\item{x}{\code{\link{factanal}} object} + +\item{...}{Additional arguments, not used} + +\item{data}{Original data} +} +\value{ +All tidying methods return a \code{\link[tibble]{tibble}} without + rownames. + +\code{tidy.factanal} returns one row for each variable used in the + analysis and the following columns: + \item{variable}{The variable being estimated in the factor analysis} + \item{uniqueness}{Proportion of residual, or unexplained variance} + \item{flX}{Factor loading of term on factor X. There will be as many columns + of this format as there were factors fitted.} + +When \code{data} is not supplied \code{augment.factanal} returns one + row for each observation, with a factor score column added for each factor + X, (\code{.fsX}). This is because \code{\link{factanal}}, unlike other + stats methods like \code{\link{lm}}, does not retain the original data. + +When \code{data} is supplied, \code{augment.factanal} returns one row for +each observation, with a factor score column added for each factor X, +(\code{.fsX}). + +\code{glance.factanal} returns a one-row data.frame with the columns: + \item{n.factors}{The number of fitted factors} + \item{total.variance}{Total cumulative proportion of variance accounted for by all factors} + \item{statistic}{Significance-test statistic} + \item{p.value}{p-value from the significance test, describing whether the + covariance matrix estimated from the factors is significantly different + from the observed covariance matrix} + \item{df}{Degrees of freedom used by the factor analysis} + \item{n}{Sample size used in the analysis} + \item{method}{The estimation method; always Maximum Likelihood, "mle"} + \item{converged}{Whether the factor analysis converged} +} +\description{ +These methods tidy the factor loadings of a factor analysis, conducted via +\code{\link{factanal}}, into a summary, augment the original data with factor +scores, and construct a one-row glance of the model's statistics. +} +\examples{ + +mod <- factanal(mtcars, 3, scores = "regression") + +glance(mod) +tidy(mod) +augment(mod) +augment(mod, mtcars) # Must include original data if desired + +} diff --git a/tests/testthat/test-factanal.R b/tests/testthat/test-factanal.R new file mode 100644 index 000000000..3130e0d4c --- /dev/null +++ b/tests/testthat/test-factanal.R @@ -0,0 +1,43 @@ +# test tidy, augment, glance from factanal objects + +context("factanal tidiers") + +test_that("tidy.factanal works", { + n_factors <- 3 + fit <- factanal(mtcars, n_factors) + td <- tidy(fit) + check_tidy(td, exp.row = ncol(mtcars), exp.col = 2 + n_factors) + expect_equal(td$variable, colnames(mtcars)) + + n_factors2 <- 3 + fit2 <- factanal(mtcars, n_factors2) + td2 <- tidy(fit2) + expect_equal(ncol(td2), 2 + n_factors2) +}) + +test_that("glance.factanal works", { + n_factors <- 3 + fit <- factanal(mtcars, n_factors) + td <- glance(fit) + check_tidy(td, exp.row = 1, exp.col = 8) + expect_equal(td$n.factors, n_factors) +}) + +test_that("augment.factanal works", { + n_factors <- 3 + fit <- factanal(mtcars, n_factors, scores = "regression") + td <- augment(fit) + check_tidy(td, exp.row = nrow(mtcars), exp.col = 1 + n_factors) + expect_equal(td$.rowname, rownames(mtcars)) + + fit2 <- factanal(mtcars, n_factors, scores = "Bartlett") + td2 <- augment(fit2, mtcars) + check_tidy(td2, exp.row = nrow(mtcars), exp.col = 1 + n_factors + ncol(mtcars)) +}) + +test_that("augment.factanal does not support none scores", { + n_factors <- 3 + fit <- factanal(mtcars, n_factors, scores = "none") + expect_error(augment(fit)) + expect_error(augment(fit, mtcars)) +})