From 3c0f545b4214bf207ae941dbe41715d776020f57 Mon Sep 17 00:00:00 2001 From: "Simon P. Couch" Date: Mon, 26 Jul 2021 13:05:27 -0500 Subject: [PATCH] revert 5256dd6 --- DESCRIPTION | 2 + NAMESPACE | 3 + NEWS.md | 3 +- R/lfe-tidiers.R | 243 ++++++++++++++++++++++++++++++++++++++ man/augment.felm.Rd | 115 ++++++++++++++++++ man/glance.felm.Rd | 85 +++++++++++++ man/tidy.felm.Rd | 105 ++++++++++++++++ tests/testthat/test-lfe.R | 130 ++++++++++++++++++++ 8 files changed, 684 insertions(+), 2 deletions(-) create mode 100644 R/lfe-tidiers.R create mode 100644 man/augment.felm.Rd create mode 100644 man/glance.felm.Rd create mode 100644 man/tidy.felm.Rd create mode 100644 tests/testthat/test-lfe.R diff --git a/DESCRIPTION b/DESCRIPTION index 93ef6d17c..47aec2dfd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -573,6 +573,7 @@ Suggests: Lahman, lavaan, leaps, + lfe, lm.beta, lme4, lmodel2, @@ -660,6 +661,7 @@ Collate: 'ks-tidiers.R' 'lavaan-tidiers.R' 'leaps.R' + 'lfe-tidiers.R' 'list-irlba.R' 'list-optim-tidiers.R' 'list-svd-tidiers.R' diff --git a/NAMESPACE b/NAMESPACE index 7e7da7218..289a0cc49 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -12,6 +12,7 @@ S3method(augment,decomposed.ts) S3method(augment,default) S3method(augment,drc) S3method(augment,factanal) +S3method(augment,felm) S3method(augment,fixest) S3method(augment,gam) S3method(augment,glm) @@ -73,6 +74,7 @@ S3method(glance,drc) S3method(glance,durbinWatsonTest) S3method(glance,ergm) S3method(glance,factanal) +S3method(glance,felm) S3method(glance,fitdistr) S3method(glance,fixest) S3method(glance,gam) @@ -178,6 +180,7 @@ S3method(tidy,emmGrid) S3method(tidy,epi.2by2) S3method(tidy,ergm) S3method(tidy,factanal) +S3method(tidy,felm) S3method(tidy,fitdistr) S3method(tidy,fixest) S3method(tidy,ftable) diff --git a/NEWS.md b/NEWS.md index 012b102cf..1c0a4fbcf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,8 +2,7 @@ To be released as broom 0.7.9. -* Fixed confidence intervals in `tidy.crr()`, which were previously exponentiated when `exponentiate = FALSE` (`#1023` by `@leejasme`) -* Deprecate tidiers for `felm` objects from the `lfe` package, which was again archived from CRAN. +* Fixes confidence intervals in `tidy.crr()`, which were previously exponentiated when `exponentiate = FALSE` (`#1023` by `@leejasme`) * Deprecates `Rchoice` tidiers, as the newest 0.3-3 release requires R 4.0+ and does not re-export needed generics. * Updates to `ergm` tidiers in anticipation of changes in later releases. (`#1034` by `@krivit`) diff --git a/R/lfe-tidiers.R b/R/lfe-tidiers.R new file mode 100644 index 000000000..af5ecc1c3 --- /dev/null +++ b/R/lfe-tidiers.R @@ -0,0 +1,243 @@ +#' @templateVar class felm +#' @template title_desc_tidy +#' +#' @param x A `felm` object returned from [lfe::felm()]. +#' @template param_confint +#' @param fe Logical indicating whether or not to include estimates of +#' fixed effects. Defaults to `FALSE`. +#' @param se.type Character indicating the type of standard errors. Defaults to +#' using those of the underlying felm() model object, e.g. clustered errors +#' for models that were provided a cluster specification. Users can override +#' these defaults by specifying an appropriate alternative: "iid" (for +#' homoskedastic errors), "robust" (for Eicker-Huber-White robust errors), or +#' "cluster" (for clustered standard errors; if the model object supports it). +#' @template param_unused_dots +#' +#' @evalRd return_tidy(regression = TRUE) +#' +#' @examples +#' +#' library(lfe) +#' +#' # Use built-in "airquality" dataset +#' head(airquality) +#' +#' # No FEs; same as lm() +#' est0 <- felm(Ozone ~ Temp + Wind + Solar.R, airquality) +#' tidy(est0) +#' augment(est0) +#' +#' # Add month fixed effects +#' est1 <- felm(Ozone ~ Temp + Wind + Solar.R | Month, airquality) +#' tidy(est1) +#' tidy(est1, fe = TRUE) +#' augment(est1) +#' glance(est1) +#' +#' # The "se.type" argument can be used to switch out different standard errors +#' # types on the fly. In turn, this can be useful exploring the effect of +#' # different error structures on model inference. +#' tidy(est1, se.type = "iid") +#' tidy(est1, se.type = "robust") +#' +#' # Add clustered SEs (also by month) +#' est2 <- felm(Ozone ~ Temp + Wind + Solar.R | Month | 0 | Month, airquality) +#' tidy(est2, conf.int = TRUE) +#' tidy(est2, conf.int = TRUE, se.type = "cluster") +#' tidy(est2, conf.int = TRUE, se.type = "robust") +#' tidy(est2, conf.int = TRUE, se.type = "iid") +#' @export +#' @aliases felm_tidiers lfe_tidiers +#' @family felm tidiers +#' @seealso [tidy()], [lfe::felm()] +tidy.felm <- function(x, conf.int = FALSE, conf.level = .95, fe = FALSE, se.type = c("default", "iid", "robust", "cluster"), ...) { + has_multi_response <- length(x$lhs) > 1 + + # warn users about deprecated "robust" argument + dots <- list(...) + if (!is.null(dots$robust)) { + warning('\nThe "robust" argument has been deprecated in tidy.felm and will be ignored. Please use the "se.type" argument instead.\n') + } + + # match SE args + se.type <- match.arg(se.type) + if (se.type == "default") { + se.type <- NULL + } + + # get "robust" logical to pass on to summary.lfe + if (is.null(se.type)) { + robust <- !is.null(x$clustervar) + } else if (se.type == 'iid') { + robust <- FALSE + } else { + # catch potential user error, asking for clusters where none exist + if (se.type == "cluster" && is.null(x$clustervar)) { + warning("Clustered SEs requested, but weren't calculated in underlying model object. Reverting to default SEs.\n") + se.type <- NULL + } + + robust <- TRUE + } + + nn <- c("estimate", "std.error", "statistic", "p.value") + if (has_multi_response) { + ret <- map_df(x$lhs, function(y) { + stats::coef(summary(x, lhs = y, robust = robust)) %>% + as_tidy_tibble(new_names = nn) %>% + mutate(response = y) + }) %>% + select(response, dplyr::everything()) + } else { + ret <- as_tidy_tibble( + stats::coef(summary(x, robust = robust)), + new_names = nn + ) + } + + # Catch edge case where users specify "robust" SEs on felm() object that + # contains clusters. Reason: Somewhat confusingly, summary.felm(robust = TRUE) + # reports clustered SEs even though robust SEs are available. In contrast, + # confint.felm distinguishes between robust and clustered SEs regardless + # of the underlying model. See also: https://github.com/sgaure/lfe/pull/17/files + if (!is.null(se.type)) { + if (se.type == "robust" && !is.null(x$clustervar)) { + ret$std.error <- x$rse + ret$statistic <- x$rtval + ret$p.value <- x$rpval + } + } + + + if (conf.int) { + if (has_multi_response) { + ci <- map_df(x$lhs, function(y) { + broom_confint_terms(x, level = conf.level, type = NULL, lhs = y) %>% + mutate(response=y) + }) + ret <- dplyr::left_join(ret, ci, by = c("response", "term")) + } else { + ci <- broom_confint_terms(x, level = conf.level, type = se.type) + ret <- dplyr::left_join(ret, ci, by = "term") + } + } + + if (fe) { + ret <- mutate(ret, N = NA, comp = NA) + + nn <- c("estimate", "std.error", "N", "comp") + ret_fe_prep <- lfe::getfe(x, se = TRUE, bN = 100) %>% + tibble::rownames_to_column(var = "term") %>% + # effect and se are multiple if multiple y + select(term, contains("effect"), contains("se"), obs, comp) %>% + rename(N = obs) + + if (has_multi_response) { + ret_fe_prep <- ret_fe_prep %>% + tidyr::pivot_longer( + cols = c( + starts_with("effect."), + starts_with("se.") + ), + names_to = "stat_resp", + values_to = "value" + ) %>% + tidyr::separate( + col = "stat_resp", + c("stat", "response"), + sep = "\\." + ) %>% + tidyr::pivot_wider( + id_cols = c(term, N, comp, response), + names_from = stat, + values_from = value + ) %>% + dplyr::arrange(term) %>% + as.data.frame() + } + ret_fe <- ret_fe_prep %>% + rename(estimate = effect, std.error = se) %>% + select(contains("response"), dplyr::everything()) %>% + mutate(statistic = estimate / std.error) %>% + mutate(p.value = 2 * (1 - stats::pt(statistic, df = N))) + + if (conf.int) { + crit_val_low <- stats::qnorm(1 - (1 - conf.level) / 2) + crit_val_high <- stats::qnorm(1 - (1 - conf.level) / 2) + + ret_fe <- ret_fe %>% + mutate( + conf.low = estimate - crit_val_low * std.error, + conf.high = estimate + crit_val_high * std.error + ) + } + ret <- rbind(ret, ret_fe) + } + as_tibble(ret) +} + +#' @templateVar class felm +#' @template title_desc_augment +#' +#' @inherit tidy.felm params examples +#' @template param_data +#' +#' @evalRd return_augment() +#' +#' @export +#' @family felm tidiers +#' @seealso [augment()], [lfe::felm()] +augment.felm <- function(x, data = model.frame(x), ...) { + has_multi_response <- length(x$lhs) > 1 + + if (has_multi_response) { + stop( + "Augment does not support linear models with multiple responses.", + call. = FALSE + ) + } + df <- as_augment_tibble(data) + mutate(df, .fitted = as.vector(x$fitted.values), .resid = as.vector(x$residuals)) +} + +#' @templateVar class felm +#' @template title_desc_glance +#' +#' @inherit tidy.felm params examples +#' +#' @evalRd return_glance( +#' "r.squared", +#' "adj.r.squared", +#' "sigma", +#' "statistic", +#' "p.value", +#' "df", +#' "df.residual", +#' "nobs" +#' ) +#' +#' @export +glance.felm <- function(x, ...) { + has_multi_response <- length(x$lhs) > 1 + + if (has_multi_response) { + stop( + "Glance does not support linear models with multiple responses.", + call. = FALSE + ) + } + + s <- summary(x) + + as_glance_tibble( + r.squared = s$r2, + adj.r.squared = s$r2adj, + sigma = s$rse, + statistic = s$fstat, + p.value = unname(s$pval), + df = s$df[1], + df.residual = s$rdf, + nobs = stats::nobs(x), + na_types = "rrrrriii" + ) +} diff --git a/man/augment.felm.Rd b/man/augment.felm.Rd new file mode 100644 index 000000000..5bae74e60 --- /dev/null +++ b/man/augment.felm.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lfe-tidiers.R +\name{augment.felm} +\alias{augment.felm} +\title{Augment data with information from a(n) felm object} +\usage{ +\method{augment}{felm}(x, data = model.frame(x), ...) +} +\arguments{ +\item{x}{A \code{felm} object returned from \code{\link[lfe:felm]{lfe::felm()}}.} + +\item{data}{A \link[base:data.frame]{base::data.frame} or \code{\link[tibble:tibble]{tibble::tibble()}} containing the original +data that was used to produce the object \code{x}. Defaults to +\code{stats::model.frame(x)} so that \code{augment(my_fit)} returns the augmented +original data. \strong{Do not} pass new data to the \code{data} argument. +Augment will report information such as influence and cooks distance for +data passed to the \code{data} argument. These measures are only defined for +the original training data.} + +\item{...}{Additional arguments. Not used. Needed to match generic +signature only. \strong{Cautionary note:} Misspelled arguments will be +absorbed in \code{...}, where they will be ignored. If the misspelled +argument has a default value, the default value will be used. +For example, if you pass \code{conf.lvel = 0.9}, all computation will +proceed using \code{conf.level = 0.95}. Additionally, if you pass +\code{newdata = my_tibble} to an \code{\link[=augment]{augment()}} method that does not +accept a \code{newdata} argument, it will use the default value for +the \code{data} argument.} +} +\description{ +Augment accepts a model object and a dataset and adds +information about each observation in the dataset. Most commonly, this +includes predicted values in the \code{.fitted} column, residuals in the +\code{.resid} column, and standard errors for the fitted values in a \code{.se.fit} +column. New columns always begin with a \code{.} prefix to avoid overwriting +columns in the original dataset. + +Users may pass data to augment via either the \code{data} argument or the +\code{newdata} argument. If the user passes data to the \code{data} argument, +it \strong{must} be exactly the data that was used to fit the model +object. Pass datasets to \code{newdata} to augment data that was not used +during model fitting. This still requires that at least all predictor +variable columns used to fit the model are present. If the original outcome +variable used to fit the model is not included in \code{newdata}, then no +\code{.resid} column will be included in the output. + +Augment will often behave differently depending on whether \code{data} or +\code{newdata} is given. This is because there is often information +associated with training observations (such as influences or related) +measures that is not meaningfully defined for new observations. + +For convenience, many augment methods provide default \code{data} arguments, +so that \code{augment(fit)} will return the augmented training data. In these +cases, augment tries to reconstruct the original data based on the model +object with varying degrees of success. + +The augmented dataset is always returned as a \link[tibble:tibble]{tibble::tibble} with the +\strong{same number of rows} as the passed dataset. This means that the +passed data must be coercible to a tibble. At this time, tibbles do not +support matrix-columns. This means you should not specify a matrix +of covariates in a model formula during the original model fitting +process, and that \code{\link[splines:ns]{splines::ns()}}, \code{\link[stats:poly]{stats::poly()}} and +\code{\link[survival:Surv]{survival::Surv()}} objects are not supported in input data. If you +encounter errors, try explicitly passing a tibble, or fitting the original +model on data in a tibble. + +We are in the process of defining behaviors for models fit with various +\code{na.action} arguments, but make no guarantees about behavior when data is +missing at this time. +} +\examples{ + +library(lfe) + +# Use built-in "airquality" dataset +head(airquality) + +# No FEs; same as lm() +est0 <- felm(Ozone ~ Temp + Wind + Solar.R, airquality) +tidy(est0) +augment(est0) + +# Add month fixed effects +est1 <- felm(Ozone ~ Temp + Wind + Solar.R | Month, airquality) +tidy(est1) +tidy(est1, fe = TRUE) +augment(est1) +glance(est1) + +# The "se.type" argument can be used to switch out different standard errors +# types on the fly. In turn, this can be useful exploring the effect of +# different error structures on model inference. +tidy(est1, se.type = "iid") +tidy(est1, se.type = "robust") + +# Add clustered SEs (also by month) +est2 <- felm(Ozone ~ Temp + Wind + Solar.R | Month | 0 | Month, airquality) +tidy(est2, conf.int = TRUE) +tidy(est2, conf.int = TRUE, se.type = "cluster") +tidy(est2, conf.int = TRUE, se.type = "robust") +tidy(est2, conf.int = TRUE, se.type = "iid") +} +\seealso{ +\code{\link[=augment]{augment()}}, \code{\link[lfe:felm]{lfe::felm()}} + +Other felm tidiers: +\code{\link{tidy.felm}()} +} +\concept{felm tidiers} +\value{ +A \code{\link[tibble:tibble]{tibble::tibble()}} with columns: + \item{.fitted}{Fitted or predicted value.} + \item{.resid}{The difference between observed and fitted values.} + +} diff --git a/man/glance.felm.Rd b/man/glance.felm.Rd new file mode 100644 index 000000000..6522423e4 --- /dev/null +++ b/man/glance.felm.Rd @@ -0,0 +1,85 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lfe-tidiers.R +\name{glance.felm} +\alias{glance.felm} +\title{Glance at a(n) felm object} +\usage{ +\method{glance}{felm}(x, ...) +} +\arguments{ +\item{x}{A \code{felm} object returned from \code{\link[lfe:felm]{lfe::felm()}}.} + +\item{...}{Additional arguments. Not used. Needed to match generic +signature only. \strong{Cautionary note:} Misspelled arguments will be +absorbed in \code{...}, where they will be ignored. If the misspelled +argument has a default value, the default value will be used. +For example, if you pass \code{conf.lvel = 0.9}, all computation will +proceed using \code{conf.level = 0.95}. Additionally, if you pass +\code{newdata = my_tibble} to an \code{\link[=augment]{augment()}} method that does not +accept a \code{newdata} argument, it will use the default value for +the \code{data} argument.} +} +\description{ +Glance accepts a model object and returns a \code{\link[tibble:tibble]{tibble::tibble()}} +with exactly one row of model summaries. The summaries are typically +goodness of fit measures, p-values for hypothesis tests on residuals, +or model convergence information. + +Glance never returns information from the original call to the modeling +function. This includes the name of the modeling function or any +arguments passed to the modeling function. + +Glance does not calculate summary measures. Rather, it farms out these +computations to appropriate methods and gathers the results together. +Sometimes a goodness of fit measure will be undefined. In these cases +the measure will be reported as \code{NA}. + +Glance returns the same number of columns regardless of whether the +model matrix is rank-deficient or not. If so, entries in columns +that no longer have a well-defined value are filled in with an \code{NA} +of the appropriate type. +} +\examples{ + +library(lfe) + +# Use built-in "airquality" dataset +head(airquality) + +# No FEs; same as lm() +est0 <- felm(Ozone ~ Temp + Wind + Solar.R, airquality) +tidy(est0) +augment(est0) + +# Add month fixed effects +est1 <- felm(Ozone ~ Temp + Wind + Solar.R | Month, airquality) +tidy(est1) +tidy(est1, fe = TRUE) +augment(est1) +glance(est1) + +# The "se.type" argument can be used to switch out different standard errors +# types on the fly. In turn, this can be useful exploring the effect of +# different error structures on model inference. +tidy(est1, se.type = "iid") +tidy(est1, se.type = "robust") + +# Add clustered SEs (also by month) +est2 <- felm(Ozone ~ Temp + Wind + Solar.R | Month | 0 | Month, airquality) +tidy(est2, conf.int = TRUE) +tidy(est2, conf.int = TRUE, se.type = "cluster") +tidy(est2, conf.int = TRUE, se.type = "robust") +tidy(est2, conf.int = TRUE, se.type = "iid") +} +\value{ +A \code{\link[tibble:tibble]{tibble::tibble()}} with exactly one row and columns: + \item{adj.r.squared}{Adjusted R squared statistic, which is like the R squared statistic except taking degrees of freedom into account.} + \item{df}{Degrees of freedom used by the model.} + \item{df.residual}{Residual degrees of freedom.} + \item{nobs}{Number of observations used.} + \item{p.value}{P-value corresponding to the test statistic.} + \item{r.squared}{R squared statistic, or the percent of variation explained by the model. Also known as the coefficient of determination.} + \item{sigma}{Estimated standard error of the residuals.} + \item{statistic}{Test statistic.} + +} diff --git a/man/tidy.felm.Rd b/man/tidy.felm.Rd new file mode 100644 index 000000000..503ae1013 --- /dev/null +++ b/man/tidy.felm.Rd @@ -0,0 +1,105 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lfe-tidiers.R +\name{tidy.felm} +\alias{tidy.felm} +\alias{felm_tidiers} +\alias{lfe_tidiers} +\title{Tidy a(n) felm object} +\usage{ +\method{tidy}{felm}( + x, + conf.int = FALSE, + conf.level = 0.95, + fe = FALSE, + se.type = c("default", "iid", "robust", "cluster"), + ... +) +} +\arguments{ +\item{x}{A \code{felm} object returned from \code{\link[lfe:felm]{lfe::felm()}}.} + +\item{conf.int}{Logical indicating whether or not to include a confidence +interval in the tidied output. Defaults to \code{FALSE}.} + +\item{conf.level}{The confidence level to use for the confidence interval +if \code{conf.int = TRUE}. Must be strictly greater than 0 and less than 1. +Defaults to 0.95, which corresponds to a 95 percent confidence interval.} + +\item{fe}{Logical indicating whether or not to include estimates of +fixed effects. Defaults to \code{FALSE}.} + +\item{se.type}{Character indicating the type of standard errors. Defaults to +using those of the underlying felm() model object, e.g. clustered errors +for models that were provided a cluster specification. Users can override +these defaults by specifying an appropriate alternative: "iid" (for +homoskedastic errors), "robust" (for Eicker-Huber-White robust errors), or +"cluster" (for clustered standard errors; if the model object supports it).} + +\item{...}{Additional arguments. Not used. Needed to match generic +signature only. \strong{Cautionary note:} Misspelled arguments will be +absorbed in \code{...}, where they will be ignored. If the misspelled +argument has a default value, the default value will be used. +For example, if you pass \code{conf.lvel = 0.9}, all computation will +proceed using \code{conf.level = 0.95}. Additionally, if you pass +\code{newdata = my_tibble} to an \code{\link[=augment]{augment()}} method that does not +accept a \code{newdata} argument, it will use the default value for +the \code{data} argument.} +} +\description{ +Tidy summarizes information about the components of a model. +A model component might be a single term in a regression, a single +hypothesis, a cluster, or a class. Exactly what tidy considers to be a +model component varies across models but is usually self-evident. +If a model has several distinct types of components, you will need to +specify which components to return. +} +\examples{ + +library(lfe) + +# Use built-in "airquality" dataset +head(airquality) + +# No FEs; same as lm() +est0 <- felm(Ozone ~ Temp + Wind + Solar.R, airquality) +tidy(est0) +augment(est0) + +# Add month fixed effects +est1 <- felm(Ozone ~ Temp + Wind + Solar.R | Month, airquality) +tidy(est1) +tidy(est1, fe = TRUE) +augment(est1) +glance(est1) + +# The "se.type" argument can be used to switch out different standard errors +# types on the fly. In turn, this can be useful exploring the effect of +# different error structures on model inference. +tidy(est1, se.type = "iid") +tidy(est1, se.type = "robust") + +# Add clustered SEs (also by month) +est2 <- felm(Ozone ~ Temp + Wind + Solar.R | Month | 0 | Month, airquality) +tidy(est2, conf.int = TRUE) +tidy(est2, conf.int = TRUE, se.type = "cluster") +tidy(est2, conf.int = TRUE, se.type = "robust") +tidy(est2, conf.int = TRUE, se.type = "iid") +} +\seealso{ +\code{\link[=tidy]{tidy()}}, \code{\link[lfe:felm]{lfe::felm()}} + +Other felm tidiers: +\code{\link{augment.felm}()} +} +\concept{felm tidiers} +\value{ +A \code{\link[tibble:tibble]{tibble::tibble()}} with columns: + \item{conf.high}{Upper bound on the confidence interval for the estimate.} + \item{conf.low}{Lower bound on the confidence interval for the estimate.} + \item{estimate}{The estimated value of the regression term.} + \item{p.value}{The two-sided p-value associated with the observed statistic.} + \item{statistic}{The value of a T-statistic to use in a hypothesis that the regression term is non-zero.} + \item{std.error}{The standard error of the regression term.} + \item{term}{The name of the regression term.} + +} diff --git a/tests/testthat/test-lfe.R b/tests/testthat/test-lfe.R new file mode 100644 index 000000000..5f07dabd8 --- /dev/null +++ b/tests/testthat/test-lfe.R @@ -0,0 +1,130 @@ +context("lfe") + +skip_on_cran() + +skip_if_not_installed("modeltests") +library(modeltests) + +skip_if_not_installed("lfe") + +set.seed(27) +n <- 100 +df <- data.frame( + id = sample(5, n, TRUE), + v1 = sample(5, n, TRUE), + v2 = sample(1e6, n, TRUE), + v3 = sample(round(runif(100, max = 100), 4), n, TRUE), + v4 = sample(round(runif(100, max = 100), 4), n, TRUE) +) + +# no FE or clus +fit <- lfe::felm(v2 ~ v3, df) +# with FE +fit2 <- lfe::felm(v2 ~ v3 | id + v1, df, na.action = na.exclude) + +# with clus +fit3 <- lfe::felm(v2 ~ v3 | 0 | 0 | id + v1, df, na.action = na.exclude) + +## with multiple outcomes +fit_multi <- lfe::felm(v1 + v2 ~ v3 , df) +fit_Y2 <- lfe::felm(v1 ~ v3 , df) + +form <- v2 ~ v4 +fit_form <- lfe::felm(form, df) # part of a regression test + +test_that("felm tidier arguments", { + check_arguments(tidy.felm) + check_arguments(glance.felm) + check_arguments(augment.felm) +}) + +test_that("tidy.felm", { + td1 <- tidy(fit) + td2 <- tidy(fit2, conf.int = TRUE, fe = TRUE, fe.error = FALSE) + td3 <- tidy(fit2, conf.int = TRUE, fe = TRUE) + td4 <- tidy(fit_form) + td5 <- tidy(fit, se = "robust") + td6 <- tidy(fit2, se = "robust") + td7 <- tidy(fit2, se = "robust", fe = TRUE) + td8 <- tidy(fit3) + td9 <- tidy(fit3, se = "iid") + + + td_multi <- tidy(fit_multi) + td_multi_CI <- tidy(fit_multi, conf.int = TRUE) + + check_tidy_output(td1) + check_tidy_output(td2) + check_tidy_output(td3) + check_tidy_output(td4) + check_tidy_output(td5) + check_tidy_output(td6) + check_tidy_output(td7) + check_tidy_output(td8) + check_tidy_output(td9) + check_tidy_output(td_multi) + check_tidy_output(td_multi_CI) + + check_dims(td1, 2, 5) + check_dims(td_multi_CI, 4, 8) + + expect_equal(tidy(fit_multi)[3:4, -1], + tidy(fit)) + expect_equal(tidy(fit_multi, conf.int = TRUE)[3:4, -1], + tidy(fit, conf.int = TRUE)) + expect_equal(tidy(fit_multi, conf.int = TRUE)[1:2, -1], + tidy(fit_Y2, conf.int = TRUE)) + + expect_equal(dplyr::pull(td5, std.error), + as.numeric(lfe:::summary.felm(fit, robust = TRUE)$coef[, "Robust s.e"])) + expect_equal(dplyr::pull(td6, std.error), + as.numeric(lfe:::summary.felm(fit2, robust = TRUE)$coef[, "Robust s.e"])) + expect_equal(dplyr::pull(td8, std.error), + as.numeric(lfe:::summary.felm(fit3)$coef[, "Cluster s.e."])) + expect_equal(dplyr::pull(td9, std.error), + as.numeric(lfe:::summary.felm(fit3, robust = FALSE)$coef[, "Std. Error"])) + + # check for deprecation warning from 0.7.0.9001 + expect_warning( + tidy(fit, robust = TRUE), + '"robust" argument has been deprecated' + ) +}) + +test_that("glance.felm", { + gl <- glance(fit) + gl2 <- glance(fit2) + + check_glance_outputs(gl, gl2) + check_dims(gl, expected_cols = 8) + + expect_error(glance(fit_multi), "Glance does not support linear models with multiple responses.") +}) + +test_that("augment.felm", { + check_augment_function( + aug = augment.felm, + model = fit, + data = df + ) + + check_augment_function( + aug = augment.felm, + model = fit2, + data = df + ) + + check_augment_function( + aug = augment.felm, + model = fit_form, + data = df + ) + expect_error(augment(fit_multi), + "Augment does not support linear models with multiple responses.") + + # Ensure that the .resid and .fitted columns are basic columns, not matrix + aug <- augment(fit) + expect_false(inherits(aug$.resid, "matrix")) + expect_false(inherits(aug$.fitted, "matrix")) + expect_null(c(colnames(aug$.resid), colnames(aug$.fitted))) +})