Add factanal_tidiers (#171)

tidymodels · Jun 9, 2018 · 188c095 · 188c095
1 parent 6cc6398
commit 188c095
Show file tree

Hide file tree

Showing 5 changed files with 248 additions and 1 deletion.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -23,7 +23,8 @@ Authors@R: c(
     person("Lukasz", "Komsta", email = "lukasz.komsta@umlub.pl", role = "ctb"),
     person("Frederick", "Novometsky", role = "ctb"),
     person("Wilson", "Freitas", role = "ctb"),
-    person("Jason Cory", "Brunson", email = "cornelioid@gmail.com", role = "ctb"))  
+    person("Jason Cory", "Brunson", email = "cornelioid@gmail.com", role = "ctb"),
+    person("Simon", "Jackson", email = "drsimonjackson@gmail.com", role = "ctb"))  
 Maintainer: David Robinson <admiral.david@gmail.com>
 Description: Convert statistical analysis objects from R into tidy data frames,
     so that they can more easily be combined, reshaped and otherwise processed

diff --git a/NAMESPACE b/NAMESPACE
@@ -8,6 +8,7 @@ S3method(augment,coxph)
 S3method(augment,data.frame)
 S3method(augment,decomposed.ts)
 S3method(augment,default)
+S3method(augment,factanal)
 S3method(augment,felm)
 S3method(augment,garch)
 S3method(augment,glmRob)
@@ -51,6 +52,7 @@ S3method(glance,cv.glmnet)
 S3method(glance,data.frame)
 S3method(glance,default)
 S3method(glance,ergm)
+S3method(glance,factanal)
 S3method(glance,felm)
 S3method(glance,fitdistr)
 S3method(glance,gam)
@@ -137,6 +139,7 @@ S3method(tidy,dgTMatrix)
 S3method(tidy,dist)
 S3method(tidy,emmGrid)
 S3method(tidy,ergm)
+S3method(tidy,factanal)
 S3method(tidy,felm)
 S3method(tidy,fitdistr)
 S3method(tidy,ftable)

diff --git a/R/factanal_tidiers.R b/R/factanal_tidiers.R
@@ -0,0 +1,131 @@
+#' Tidying methods for a factor analysis
+#' 
+#' These methods tidy the factor loadings of a factor analysis, conducted via 
+#' \code{\link{factanal}}, into a summary, augment the original data with factor
+#' scores, and construct a one-row glance of the model's statistics.
+#' 
+#' @return All tidying methods return a \code{\link[tibble]{tibble}} without
+#'   rownames.
+#' 
+#' @name factanal_tidiers
+#' 
+#' @param x \code{\link{factanal}} object
+#' @param data Original data
+#' @param ... Additional arguments, not used
+#' 
+#' @examples 
+#' 
+#' mod <- factanal(mtcars, 3, scores = "regression")
+#' 
+#' glance(mod)
+#' tidy(mod)
+#' augment(mod)
+#' augment(mod, mtcars)  # Must include original data if desired
+#' 
+NULL
+
+#' @rdname factanal_tidiers
+#' 
+#' @return \code{tidy.factanal} returns one row for each variable used in the 
+#'   analysis and the following columns:
+#'   \item{variable}{The variable being estimated in the factor analysis}
+#'   \item{uniqueness}{Proportion of residual, or unexplained variance}
+#'   \item{flX}{Factor loading of term on factor X. There will be as many columns
+#'   of this format as there were factors fitted.}
+#' 
+#' @export
+tidy.factanal <- function(x, ...) {
+    # Convert to format that we can work with
+    loadings <- stats::loadings(x)
+    class(loadings) <- "matrix"
+
+    # Place relevant values into a tidy data frame
+    tidy_df <- data.frame(variable = rownames(loadings),
+                          uniqueness = x$uniquenesses,
+                          data.frame(loadings)) %>% 
+      as_tibble()
+
+    tidy_df$variable <- as.character(tidy_df$variable)
+
+    # Remove row names and clean column names
+    rownames(tidy_df) <- NULL
+    colnames(tidy_df) <- gsub("Factor", "fl", colnames(tidy_df))
+
+    tidy_df
+}
+
+#' @rdname factanal_tidiers
+#' 
+#' @return When \code{data} is not supplied \code{augment.factanal} returns one 
+#'   row for each observation, with a factor score column added for each factor 
+#'   X, (\code{.fsX}). This is because \code{\link{factanal}}, unlike other 
+#'   stats methods like \code{\link{lm}}, does not retain the original data.
+#' 
+#' When \code{data} is supplied, \code{augment.factanal} returns one row for
+#' each observation, with a factor score column added for each factor X,
+#' (\code{.fsX}).
+#' 
+#' @export
+augment.factanal <- function(x, data, ...) {
+    scores <- x$scores
+
+    # Check scores were computed
+    if (is.null(scores)) {
+        stop("Factor scores were not computed. Change the `scores` argument in factanal().")
+    }
+
+    # Place relevant values into a tidy data frame
+    tidy_df <- data.frame(.rowname = rownames(scores), data.frame(scores)) %>% as_tibble()
+    tidy_df$.rowname <- as.character(tidy_df$.rowname)
+
+    # Remove row names and clean column names
+    rownames(tidy_df) <- NULL
+    colnames(tidy_df) <- gsub("Factor", ".fs", colnames(tidy_df))
+
+    # Check if original data provided
+    if (missing(data)) {
+        return(tidy_df)    
+    }
+
+    # Bind to data
+    data$.rowname <- rownames(data)
+    tidy_df <- tidy_df %>% right_join(data, by = ".rowname")
+
+    tidy_df %>% select(.rowname, everything(),
+                       -matches("\\.fs[0-9]*"), matches("\\.fs[0-9]*"))
+}
+
+#' @rdname factanal_tidiers
+#' 
+#' @return \code{glance.factanal} returns a one-row data.frame with the columns:
+#'   \item{n.factors}{The number of fitted factors}
+#'   \item{total.variance}{Total cumulative proportion of variance accounted for by all factors}
+#'   \item{statistic}{Significance-test statistic}
+#'   \item{p.value}{p-value from the significance test, describing whether the
+#'   covariance matrix estimated from the factors is significantly different
+#'   from the observed covariance matrix}
+#'   \item{df}{Degrees of freedom used by the factor analysis}
+#'   \item{n}{Sample size used in the analysis}
+#'   \item{method}{The estimation method; always Maximum Likelihood, "mle"}
+#'   \item{converged}{Whether the factor analysis converged}
+#' 
+#' @export
+glance.factanal <- function(x, ...) {
+
+    # Compute total variance accounted for by all factors
+    loadings <- stats::loadings(x)
+    class(loadings) <- "matrix"
+    total.variance <- sum(apply(loadings, 2, function(i) sum(i^2) / length(i)))
+
+    # Results as single-row data frame
+    data_frame(
+        n.factors = x$factors,
+        total.variance = total.variance,
+        statistic = unname(x$STATISTIC),
+        p.value = unname(x$PVAL),
+        df = x$dof,
+        n = x$n.obs,
+        method = x$method,
+        converged = x$converged
+    )
+}
diff --git a/man/factanal_tidiers.Rd b/man/factanal_tidiers.Rd
diff --git a/tests/testthat/test-factanal.R b/tests/testthat/test-factanal.R
@@ -0,0 +1,43 @@
+# test tidy, augment, glance from factanal objects
+
+context("factanal tidiers")
+
+test_that("tidy.factanal works", {
+  n_factors <- 3
+  fit <- factanal(mtcars, n_factors)
+  td <- tidy(fit)
+  check_tidy(td, exp.row = ncol(mtcars), exp.col = 2 + n_factors)
+  expect_equal(td$variable, colnames(mtcars))
+
+  n_factors2 <- 3
+  fit2 <- factanal(mtcars, n_factors2)
+  td2 <- tidy(fit2)
+  expect_equal(ncol(td2), 2 + n_factors2)
+})
+
+test_that("glance.factanal works", {
+  n_factors <- 3
+  fit <- factanal(mtcars, n_factors)
+  td <- glance(fit)
+  check_tidy(td, exp.row = 1, exp.col = 8)
+  expect_equal(td$n.factors, n_factors)
+})
+
+test_that("augment.factanal works", {
+  n_factors <- 3
+  fit <- factanal(mtcars, n_factors, scores = "regression")
+  td <- augment(fit)
+  check_tidy(td, exp.row = nrow(mtcars), exp.col = 1 + n_factors)
+  expect_equal(td$.rowname, rownames(mtcars))
+
+  fit2 <- factanal(mtcars, n_factors, scores = "Bartlett")
+  td2 <- augment(fit2, mtcars)
+  check_tidy(td2, exp.row = nrow(mtcars), exp.col = 1 + n_factors + ncol(mtcars))
+})
+
+test_that("augment.factanal does not support none scores", {
+  n_factors <- 3
+  fit <- factanal(mtcars, n_factors, scores = "none")
+  expect_error(augment(fit))
+  expect_error(augment(fit, mtcars))
+})