From b2f62c5712701a8567ecb582a4f14806285581bf Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 14 Jan 2024 11:50:37 +0100 Subject: [PATCH 1/2] fix #10' --- DESCRIPTION | 4 ++-- NEWS.md | 6 ++++++ R/rwolf.R | 6 +++++- tests/testthat/test_misc.R | 20 ++++++++++++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/test_misc.R diff --git a/DESCRIPTION b/DESCRIPTION index 4f63715..ac26944 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: wildrwolf Type: Package Title: Fast Computation of Romano-Wolf Corrected p-Values for Linear Regression Models -Version: 0.6.1 +Version: 0.7.0 Authors@R: c( person(given = "Alexander", @@ -19,7 +19,7 @@ Description: Fast Routines to Compute Romano-Wolf corrected p-Values License: GPL (>= 3) Encoding: UTF-8 LazyData: true -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.3 Imports: fixest, fwildclusterboot, diff --git a/NEWS.md b/NEWS.md index aa6bd8e..0a329c5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# wildrwolf 0.7.0 + +* Fixes a bug when the cluster in `feols()` was specified as character. + See [#10](https://github.com/s3alfisc/wildrwolf/issues/10) for details. + Thanks to [@SebKrantz](https://github.com/SebKrantz) for reporting! + # wildrwolf 0.6.1 * Hot-Fix release to address ATLAS test failures. Failing diff --git a/R/rwolf.R b/R/rwolf.R index a45dc37..e7b4902 100644 --- a/R/rwolf.R +++ b/R/rwolf.R @@ -200,7 +200,11 @@ rwolf <- function( ) if(!is.null(clustid)){ - boottest_quote$clustid <- formula(clustid) + if(is.character(clustid)){ + boottest_quote$clustid <- formula(paste0("~", clustid)) + } else { + boottest_quote$clustid <- formula(clustid) + } } if(!is.null(bootstrap_type)){ diff --git a/tests/testthat/test_misc.R b/tests/testthat/test_misc.R new file mode 100644 index 0000000..18c5b10 --- /dev/null +++ b/tests/testthat/test_misc.R @@ -0,0 +1,20 @@ +test_that("test cluster input", { + + # test that wildrwolf accepts formulas and characters + # as fixest cluster input + + library(fixest) + library(wildrwolf) + + dqrng::dqset.seed(123) + models1 = feols(c(vs, am) ~ mpg | cyl, mtcars, cluster = "carb") + rwolf1 = rwolf(models = models, param = "mpg", B = 9999) + + dqrng::dqset.seed(123) + models2 = feols(c(vs, am) ~ mpg | cyl, mtcars, cluster = ~carb) + rwolf2 = rwolf(models = models, param = "mpg", B = 9999) + + expect_equal(rwolf1, rwolf2) + + +}) From 076d9fae32184e6373ba67dee74147cfc183b6e8 Mon Sep 17 00:00:00 2001 From: Alexander Fischer Date: Sun, 14 Jan 2024 12:16:07 +0100 Subject: [PATCH 2/2] fix error in tests --- R/rwolf.R | 175 ++++++++++++++++++------------------- tests/testthat/test_misc.R | 8 +- 2 files changed, 91 insertions(+), 92 deletions(-) diff --git a/R/rwolf.R b/R/rwolf.R index e7b4902..1782fb3 100644 --- a/R/rwolf.R +++ b/R/rwolf.R @@ -1,95 +1,95 @@ -#' Romano-Wolf multiple hypotheses adjusted p-values -#' +#' Romano-Wolf multiple hypotheses adjusted p-values +#' #' Function implements the Romano-Wolf multiple hypothesis correction procedure -#' for objects of type `fixest_multi` (`fixest_multi` are objects created by -#' `fixest::feols()` that use `feols()` multiple-estimation interface). +#' for objects of type `fixest_multi` (`fixest_multi` are objects created by +#' `fixest::feols()` that use `feols()` multiple-estimation interface). #' The null hypothesis is always imposed on the bootstrap dgp. -#' -#' @param models An object of type `fixest_multi` or a list of objects of +#' +#' @param models An object of type `fixest_multi` or a list of objects of #' type `fixest`, estimated via ordinary least squares (OLS) #' @param param The regression parameter to be tested #' @param R Hypothesis Vector giving linear combinations of coefficients. -#' Must be either NULL or a vector of the same length as `param`. +#' Must be either NULL or a vector of the same length as `param`. #' If NULL, a vector of ones of length param. -#' @param r A numeric. Shifts the null hypothesis -#' H0: `param.` = r vs H1: `param.` != r +#' @param r A numeric. Shifts the null hypothesis +#' H0: `param.` = r vs H1: `param.` != r #' @param B The number of bootstrap iterations -#' @param p_val_type Character vector of length 1. Type of hypothesis test +#' @param p_val_type Character vector of length 1. Type of hypothesis test #' By default "two-tailed". Other options include "equal-tailed" -#' (for one-sided tests), ">" and "<" (for two-sided tests). -#' @param weights_type character or function. The character string specifies +#' (for one-sided tests), ">" and "<" (for two-sided tests). +#' @param weights_type character or function. The character string specifies #' the type of bootstrap to use: One of "rademacher", "mammen", "norm" -#' and "webb". Alternatively, type can be a function(n) for drawing -#' wild bootstrap factors. "rademacher" by default. For the Rademacher +#' and "webb". Alternatively, type can be a function(n) for drawing +#' wild bootstrap factors. "rademacher" by default. For the Rademacher #' distribution, if the number of replications B exceeds the number of possible -#' draw ombinations, 2^(#number of clusters), then `boottest()` will use each -#' possible combination once (enumeration). -#' @param bootstrap_type Either "11", "13", "31", "33", or "fnw11". -#' "fnw11" by default. See `?fwildclusterboot::boottest` for more details +#' draw ombinations, 2^(#number of clusters), then `boottest()` will use each +#' possible combination once (enumeration). +#' @param bootstrap_type Either "11", "13", "31", "33", or "fnw11". +#' "fnw11" by default. See `?fwildclusterboot::boottest` for more details #' @param engine Should the wild cluster bootstrap run via `fwildclusterboot's` -#' R implementation or via `WildBootTests.jl`? 'R' by default. -#' The other option is `WildBootTests.jl`. Running the bootstrap through -#' `WildBootTests.jl` might significantly reduce the runtime of `rwolf()` +#' R implementation or via `WildBootTests.jl`? 'R' by default. +#' The other option is `WildBootTests.jl`. Running the bootstrap through +#' `WildBootTests.jl` might significantly reduce the runtime of `rwolf()` #' for complex problems (e.g. problems with more than 500 clusters). -#' @param nthreads Integer. The number of threads to use when running the +#' @param nthreads Integer. The number of threads to use when running the #' bootstrap. -#' @param ... additional function values passed to the bootstrap function. -#' +#' @param ... additional function values passed to the bootstrap function. +#' #' @importFrom fwildclusterboot boottest #' @importFrom fixest coeftable #' @importFrom dreamerr check_arg #' @importFrom stats terms formula #' @importFrom utils txtProgressBar setTxtProgressBar #' @export -#' -#' @return -#' -#' A data.frame containing the following columns: +#' +#' @return +#' +#' A data.frame containing the following columns: #' \item{model}{Index of Models} #' \item{Estimate}{The estimated coefficient of `param` in the respective model.} #' \item{Std. Error}{The estimated standard error of `param` in the respective model.} #' \item{t value}{The t statistic of `param` in the respective model.} #' \item{Pr(>|t|)}{The uncorrected pvalue for `param` in the respective model.} #' \item{RW Pr(>|t|)}{The Romano-Wolf corrected pvalue of hypothesis test for `param` in the respective model.} -#' +#' #' @section Setting Seeds and Random Number Generation: -#' +#' #' To guarantee reproducibility, please set a global random seeds via #' `set.seed()`. -#' +#' #' @examples -#' +#' #' library(fixest) #' library(wildrwolf) -#' +#' #' set.seed(12345) -#' +#' #' N <- 1000 #' X1 <- rnorm(N) #' Y1 <- 1 + 1 * X1 + rnorm(N) #' Y2 <- 1 + 0.01 * X1 + rnorm(N) #' Y3 <- 1 + 0.01 * X1 + rnorm(N) #' Y4 <- 1 + 0.01 * X1 + rnorm(N) -#' +#' #' B <- 999 #' # intra-cluster correlation of 0 for all clusters #' cluster <- rep(1:50, N / 50) -#' -#' data <- data.frame(Y1 = Y1, -#' Y2 = Y2, -#' Y3 = Y3, +#' +#' data <- data.frame(Y1 = Y1, +#' Y2 = Y2, +#' Y3 = Y3, #' Y4 = Y4, -#' X1 = X1, +#' X1 = X1, #' cluster = cluster) -#' +#' #' res <- feols(c(Y1, Y2, Y3) ~ X1, data = data, cluster = ~ cluster) #' res_rwolf <- rwolf(models = res, param = "X1", B = B) #' res_rwolf -#' -#' @references -#' Clarke, Romano & Wolf (2019), STATA Journal. +#' +#' @references +#' Clarke, Romano & Wolf (2019), STATA Journal. #' IZA working paper: https://ftp.iza.org/dp12845.pdf -#' +#' rwolf <- function( @@ -104,7 +104,7 @@ rwolf <- function( nthreads = 1, bootstrap_type = "fnw11", ...){ - + check_arg(param, "character vector | character scalar | formula") check_arg(R, "NULL | numeric vector") @@ -120,18 +120,17 @@ rwolf <- function( if (inherits(param, "formula")) { param <- attr(terms(param), "term.labels") } - + # Check if 'models' is of type fixest_multi if(!inherits(models, "fixest_multi")){ } else if(inherits(models, "list")){ fixest_list <- mean(sapply(models, class) == "fixest") == 1L if(!fixest_list){ - stop("The object models needs to be either of type + stop("The object models needs to be either of type 'fixest_multi' or a list of objects of type 'fixest'.") } } - # brute force objects of type 'fixest_multi' to list models <- as.list(models) S <- length(models) @@ -145,9 +144,9 @@ rwolf <- function( ] res } - - # and get coefs, t-stats and ses - # no absolute values for coefs, ses + + # and get coefs, t-stats and ses + # no absolute values for coefs, ses coefs <- unlist( lapply(1:S, function(x) get_stats_fixest(x, stat = "Estimate"))) ses <- unlist( @@ -155,35 +154,35 @@ rwolf <- function( # absolute value for t-stats # t_stats <- abs( # unlist(lapply(1:S, function(x) get_stats_fixest(x, stat = "t value")))) - - # repeat line: for multiway clustering, it is not clear how many bootstrap - # test statistics will be invalied - for oneway, + + # repeat line: for multiway clustering, it is not clear how many bootstrap + # test statistics will be invalied - for oneway, # all vectors of length(boot_coefs) \leq B - - boot_coefs <- boot_ses <- matrix(NA, B, S) + + boot_coefs <- boot_ses <- matrix(NA, B, S) t_stats <- rep(NA, S) boot_t_stats <- list() - + # boottest() over all models for param pb <- txtProgressBar(min = 0, max = S, style = 3) - + # reset global seed state once exciting the function global_seed <- .Random.seed on.exit(set.seed(global_seed)) - + internal_seed <- sample.int(.Machine$integer.max, 1L) - - res <- - lapply(seq_along(models), + + res <- + lapply(seq_along(models), function(x){ - - # set seed, to guarantee that all S calls to + + # set seed, to guarantee that all S calls to # boottest() generate the same weight matrices # affects global seed outside of 'rwolf()'! - + set.seed(internal_seed) clustid <- models[[x]]$call$cluster - + boottest_quote <- quote( boottest( @@ -194,11 +193,11 @@ rwolf <- function( r = r, engine = engine, p_val_type = p_val_type, - type = weights_type, + type = weights_type, sampling = "standard" ) ) - + if(!is.null(clustid)){ if(is.character(clustid)){ boottest_quote$clustid <- formula(paste0("~", clustid)) @@ -206,59 +205,59 @@ rwolf <- function( boottest_quote$clustid <- formula(clustid) } } - + if(!is.null(bootstrap_type)){ boottest_quote$bootstrap_type <- bootstrap_type } - + suppressMessages( - boottest_eval <- + boottest_eval <- eval(boottest_quote) ) - + setTxtProgressBar(pb, x) - + boottest_eval - + }) - + for(x in seq_along(models)){ # take absolute values of bootstrap t statistics t_stats[x] <- (res[[x]]$t_stat) - boot_t_stats[[x]] <- (res[[x]]$t_boot) + boot_t_stats[[x]] <- (res[[x]]$t_boot) } - + boot_t_stats <- Reduce(cbind, boot_t_stats) - + # after calculating all bootstrap t statistics, initiate the RW procedure # stepwise p-value calculation - pval <- get_rwolf_pval(t_stats = t_stats, + pval <- get_rwolf_pval(t_stats = t_stats, boot_t_stats= boot_t_stats) - + # summarize all results models_info <- lapply(1:S, function(x){ - + tmp <- coeftable(models[[x]]) tmp1 <- tmp[which(rownames(tmp) == param),] - + suppressWarnings(tmp1$depvar <- as.character( as.expression(models[[x]]$fml[[2]]) ) ) - + suppressWarnings(tmp1$covars <- as.character( as.expression(models[[x]]$fml[[3]]) ) ) - + tmp1$model <- x tmp1 }) - + models_info <- Reduce(rbind, models_info) - + # some reordering models_info <- models_info[, c(7, 1:4)] models_info <- as.data.frame(models_info) @@ -266,5 +265,5 @@ rwolf <- function( models_info[, "RW Pr(>|t|)"] <- pval models_info - + } diff --git a/tests/testthat/test_misc.R b/tests/testthat/test_misc.R index 18c5b10..64f28a2 100644 --- a/tests/testthat/test_misc.R +++ b/tests/testthat/test_misc.R @@ -6,13 +6,13 @@ test_that("test cluster input", { library(fixest) library(wildrwolf) - dqrng::dqset.seed(123) + set.seed(123) models1 = feols(c(vs, am) ~ mpg | cyl, mtcars, cluster = "carb") - rwolf1 = rwolf(models = models, param = "mpg", B = 9999) + rwolf1 = rwolf(models = models1, param = "mpg", B = 999) - dqrng::dqset.seed(123) + set.seed(123) models2 = feols(c(vs, am) ~ mpg | cyl, mtcars, cluster = ~carb) - rwolf2 = rwolf(models = models, param = "mpg", B = 9999) + rwolf2 = rwolf(models = models2, param = "mpg", B = 999) expect_equal(rwolf1, rwolf2)