Skip to content

Westfall-Young (1993) p-value adjustments for multiple hypotheses testing via the wild bootstrap for objects of type fixest and fixest_multi from the fixest package

License

Unknown, MIT licenses found

Licenses found

Unknown
LICENSE
MIT
LICENSE.md
Notifications You must be signed in to change notification settings

s3alfisc/wildwyoung

Repository files navigation

wildwyoung

Lifecycle: experimental

The wildwyoung package computes Westfall-Young multiple-hypothesis-adjusted p-values for objects of type fixest and fixest_multi from the fixest package via a wild (cluster) bootstrap. At its current stage, the package is experimental and it is not thoroughly tested.

Because the bootstrap-resampling is based on the fwildclusterboot package, wildwyoung is usually really fast.

The package is complementary to wildwrolf, which implements the multiple hypothesis adjustment method following Romano and Wolf (2005).

Installation

You can install the development version from GitHub with:

# install.packages("devtools")
devtools::install_github("s3alfisc/wyoung")

# from r-universe (windows & mac, compiled R > 4.0 required)
install.packages('wyoung', repos ='https://s3alfisc.r-universe.dev')

Example

library(wildwyoung)
library(wildrwolf)
library(fixest)

set.seed(43)

N <- 1000
X1 <- rnorm(N)
X2 <- rnorm(N)
rho <- 0.5
sigma <- matrix(rho, 4, 4); diag(sigma) <- 1
u <- MASS::mvrnorm(n = N, mu = rep(0, 4), Sigma = sigma)
Y1 <- 1 + 1 * X1 + X2 
Y2 <- 1 + 0.01 * X1 + X2
Y3 <- 1 + 0.4 * X1 + X2
Y4 <- 1 + -0.02 * X1 + X2
for(x in 1:4){
  var_char <- paste0("Y", x)
  assign(var_char, get(var_char) + u[,x])
}

# intra-cluster correlation of 0 for all clusters
#numb_clusters <- N / 50
#group_id <- as.character(sample(1:numb_clusters, N, replace = TRUE))

data <- data.frame(Y1 = Y1,
                   Y2 = Y2,
                   Y3 = Y3,
                   Y4 = Y4,
                   X1 = X1,
                   X2 = X2,
                   #group_id = group_id,
                   splitvar = sample(1:2, N, TRUE))

fit <- feols(c(Y1, Y2, Y3, Y4) ~ csw(X1,X2),
             data = data,
             se = "hetero",
             ssc = ssc(cluster.adj = TRUE))

rm(list= ls()[!(ls() %in% c('fit','data'))])

res_wyoung <- wildwyoung::wyoung(
  models = fit,
  param = "X1", 
  B = 9999,
  seed = 23
)
#>   |                                                                              |                                                                      |   0%  |                                                                              |=========                                                             |  12%  |                                                                              |==================                                                    |  25%  |                                                                              |==========================                                            |  38%  |                                                                              |===================================                                   |  50%  |                                                                              |============================================                          |  62%  |                                                                              |====================================================                  |  75%  |                                                                              |=============================================================         |  88%  |                                                                              |======================================================================| 100%
res_rwolf <- wildrwolf::rwolf(
  models = fit,
  param = "X1", 
  B = 9999, 
  seed = 23
)
#>   |                                                                              |                                                                      |   0%  |                                                                              |=========                                                             |  12%  |                                                                              |==================                                                    |  25%  |                                                                              |==========================                                            |  38%  |                                                                              |===================================                                   |  50%  |                                                                              |============================================                          |  62%  |                                                                              |====================================================                  |  75%  |                                                                              |=============================================================         |  88%  |                                                                              |======================================================================| 100%
pvals <- lapply(fit, function(x) pvalue(x)["X1"]) |> unlist()

# Westfall-Young Corrected P-values
summary(res_wyoung)
#>   model    Estimate Std. Error    t value      Pr(>|t|) WY Pr(>|t|)
#> 1     1    1.068297 0.04609664   23.17516  2.065929e-95   0.0000000
#> 2     2    1.016816 0.03148187   32.29848 3.359662e-157   0.0000000
#> 3     3  0.02840066 0.04533561  0.6264537     0.5311606   0.8793879
#> 4     4 -0.02080291 0.03160324 -0.6582525     0.5105279   0.8793879
#> 5     5   0.4136845 0.04599385   8.994343  1.170761e-18   0.0000000
#> 6     6   0.3617969 0.03049297   11.86493  1.812883e-30   0.0000000
#> 7     7  0.05546035  0.0434969   1.275042     0.2025913   0.5940594
#> 8     8 0.006285337 0.03169741  0.1982918     0.8428572   0.8793879

# Romano-Wolf Corrected P-values
summary(res_rwolf)
#>   model    Estimate Std. Error    t value      Pr(>|t|) RW Pr(>|t|)
#> 1     1    1.068297 0.04609664   23.17516  2.065929e-95      0.0001
#> 2     2    1.016816 0.03148187   32.29848 3.359662e-157      0.0001
#> 3     3  0.02840066 0.04533561  0.6264537     0.5311606      0.8850
#> 4     4 -0.02080291 0.03160324 -0.6582525     0.5105279      0.8850
#> 5     5   0.4136845 0.04599385   8.994343  1.170761e-18      0.0001
#> 6     6   0.3617969 0.03049297   11.86493  1.812883e-30      0.0001
#> 7     7  0.05546035  0.0434969   1.275042     0.2025913      0.5943
#> 8     8 0.006285337 0.03169741  0.1982918     0.8428572      0.8850

# Holm Corrected P-values
p.adjust(pvals, method = "holm") |> round(4)
#>     X1     X1     X1     X1     X1     X1     X1     X1 
#> 0.0000 0.0000 1.0000 1.0000 0.0000 0.0000 0.8104 1.0000

Performance

The above procedures with S=8 hypotheses, N=5000 observations and k %in% (1,2) parameters finish each in around 3.5 seconds.

if(requireNamespace("microbenchmark")){
  
  microbenchmark::microbenchmark(
    "Westfall-Young" = wildwyoung::wyoung(
      models = fit,
      param = "X1", 
      B = 9999,
      seed = 23
    ),
    "Romano-Wolf" = wildrwolf::rwolf(
      models = fit,
      param = "X1", 
      B = 9999, 
      seed = 23
    ), 
    times = 1
  )
 
 # t: seconds
 #           expr      min       lq     mean   median       uq      max neval
 # Westfall-Young 3.625710 3.625710 3.625710 3.625710 3.625710 3.625710     1
 #    Romano-Wolf 3.382969 3.382969 3.382969 3.382969 3.382969 3.382969     1
   
}

About

Westfall-Young (1993) p-value adjustments for multiple hypotheses testing via the wild bootstrap for objects of type fixest and fixest_multi from the fixest package

Resources

License

Unknown, MIT licenses found

Licenses found

Unknown
LICENSE
MIT
LICENSE.md

Stars

Watchers

Forks

Releases

No releases published

Packages