diff --git a/.Rbuildignore b/.Rbuildignore index 351a5bc..aa392d1 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,3 +1,5 @@ ^LICENSE\.md$ -aux +^aux$ ^\.github$ +^README\.md$ +.gitignore diff --git a/DESCRIPTION b/DESCRIPTION index c1c0813..3ff058e 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: permChacko Title: Chacko Test for Order-Restriction with Permutation -Version: 0.0.0.9013 +Version: 0.1.0 Authors@R: c( person( @@ -16,8 +16,11 @@ Authors@R: email = "morten.fagerland@medisin.uio.no" ) ) -Description: Implements Ruxton's suggestion for a modified version of the Chacko - test for order-restruction which includes a permutation test. +Description: Implements an extension of the Chacko chi-square test for + ordered vectors (Chacko, 1966, ). + Our extension brings the Chacko test to the computer age by implementing + a permutation test to offer a numeric estimate of the p-value, which is + particularly useful when the analytic solution is not available. License: GPL (>= 3) Encoding: UTF-8 Roxygen: list(markdown = TRUE) @@ -25,3 +28,5 @@ RoxygenNote: 7.2.3 Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 +URL: https://ocbe-uio.github.io/permChacko/ +BugReports: https://github.com/ocbe-uio/permChacko/issues diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..896b445 --- /dev/null +++ b/NEWS.md @@ -0,0 +1,3 @@ +# permChacko 0.1.0 + +* Added a `NEWS.md` file to track changes to the package. diff --git a/R/permChacko.R b/R/permChacko.R index 378432e..09654c1 100644 --- a/R/permChacko.R +++ b/R/permChacko.R @@ -2,6 +2,14 @@ #' @param x vector of numeric values #' @param n_perm number of permutations to calculate the p-value numerically #' @param verbose if \code{TRUE}, prints intermediate messages and output +#' @return A named vector with the following elements: +#' \describe{ +#' \item{chisq_bar}{the test statistic} +#' \item{analytic_p-value}{the p-value calculated analytically} +#' \item{numeric_p-value}{the p-value calculated numerically} +#' \item{tabular_p-value}{the tabular p-value from Chacko (1963)} +#' } +#' #' @references #' Chacko, V. J. (1963). Testing homogeneity against ordered alternatives. The #' Annals of Mathematical Statistics, 945-956. @@ -24,38 +32,7 @@ permChacko <- function(x, n_perm = 1000L, verbose = FALSE) { k <- length(x) chisq_bar <- chackoStatistic(x_t, n = sum(x), k) - # Notice that Chacko was entirely comfortable with this ordering process - # ending with a single value. If you look at their table on page 188 then he - # suggests that under the null hypothesis – if you start with a list of 5 - # values, then you have a 20% chance that this process that the order process - # results in a single value. - # - # Even if the outcome is a single value, the test statistic can be calculated - # from equation 5 on page 188. - # - # The question then becomes how do we obtain a p-value associated with this - # test statistic. - # - # I agree that just under equation 5, Chacko says that the test statistic is - # asymptotically chi-squared with m- 1 degrees of freedom (where m is the - # length of the final order list of values). However, he does not discuss what - # this means in the event on m = 1. However, as I discuss above, they clearly - # expect that this will happen sometimes. - # - # If you look at their final example on how they evaluate significance – they - # say that you reject the null hypothesis if the observed calculated value - # obtained from equation (5) is greater than the value c obtained from - # equation 6. This approach does work when m = 1 but seems very cumbersome – - # requiring calculation of the probability values given on the table on page - # 188, which requires consultation of Chacko (1963). - # - # I think with the computing power now we would simply obtain the p-value - # using a permutation test. - - # That is – imagine that the sum of our original K values is N, then by a - # permutation I mean a stochastic distribution of N objects (independently) - # across the k categories (with each category being equally likely under the - # null hypothesis). + # Calculating the mean of the current permutation perm_chisq_bar <- vapply( X = seq_len(n_perm), FUN = function(n, x, k) { @@ -70,6 +47,7 @@ permChacko <- function(x, n_perm = 1000L, verbose = FALSE) { FUN.VALUE = vector("double", 1L), x = x, k = k ) + # The p-value is simply the fraction of such permutations that yield a test # statistic equal to or greater than the one we originally observed. perm_p_value <- sum(perm_chisq_bar >= chisq_bar) / n_perm @@ -83,6 +61,8 @@ permChacko <- function(x, n_perm = 1000L, verbose = FALSE) { # Calculating table p-value if (k %in% seq(3L, 10L) && m <= 10L) { table_p_value <- tablePvalue(k, m, chisq_bar) + } else { + table_p_value <- NA } return( c( diff --git a/R/reduceVector.R b/R/reduceVector.R index ecfcddc..8f59766 100644 --- a/R/reduceVector.R +++ b/R/reduceVector.R @@ -1,3 +1,9 @@ +# Notice that Chacko was entirely comfortable with this ordering process +# ending with a single value. If you look at their table on page 188 then he +# suggests that under the null hypothesis – if you start with a list of 5 +# values, then you have a 20% chance that this process that the order process +# results in a single value. + reduceVector <- function(x, verbose = FALSE) { x_t <- cbind("x" = unname(x), "t" = unname(x) ^ 0L) while (nrow(x_t) > 1L && isMonotoneIncreasing(x_t[, "x"])) { diff --git a/man/permChacko.Rd b/man/permChacko.Rd index 47ba8ce..8d56313 100644 --- a/man/permChacko.Rd +++ b/man/permChacko.Rd @@ -13,6 +13,15 @@ permChacko(x, n_perm = 1000L, verbose = FALSE) \item{verbose}{if \code{TRUE}, prints intermediate messages and output} } +\value{ +A named vector with the following elements: +\describe{ +\item{chisq_bar}{the test statistic} +\item{analytic_p-value}{the p-value calculated analytically} +\item{numeric_p-value}{the p-value calculated numerically} +\item{tabular_p-value}{the tabular p-value from Chacko (1963)} +} +} \description{ The Chacko test for order-restriction with permutation test } diff --git a/tests/testthat/test-customExamples.R b/tests/testthat/test-customExamples.R index 8df47df..bae3a7c 100644 --- a/tests/testthat/test-customExamples.R +++ b/tests/testthat/test-customExamples.R @@ -11,7 +11,19 @@ test_that("Other from produce correct vectors", { }) test_that("Expected output is produced", { + # Fixed input set.seed(862255) vec <- rpois(5L, lambda = 100L) expect_output(suppressMessages(reduceVector(vec, verbose = TRUE))) + + # Random imput + for (i in seq_len(10L)) { + n <- sample(1:20, size = 1L) + mu <- sample(10:100, size = 1L) + x <- rpois(n, lambda = mu) + reps <- sample(c(0L, 10L, 100L, 1000L, 2000L), size = 1L) + y <- permChacko(x, n_perm = reps) + expect_length(y, 4L) + expect_type(y, "double") + } })