/
power.cmh.test.r
335 lines (298 loc) · 10.8 KB
/
power.cmh.test.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
#' @title Power and sample size calculation for the Cochran-Mantel-Haenszel test
#'
#' @description
#' Compute the post-hoc power or required number of subjects for the
#' Cochran-Mantel-Haenszel test for association in \emph{J} stratified 2 x 2
#' tables.
#'
#' @details
#' This sample size calculation is based on the derivations described in the
#' Woolson \emph{et al.} (1986). It is designed for case-control studies where
#' one margin is fixed. The method is "based on the Cochran-Mantel-Haenszel
#' statistic expressed as a weighted difference in binomial proportions."
#'
#' Continuity corrected sample size is described in Nam's 1992 paper. This uses
#' the weighted binomial sample size calculation described in Woolson \emph{et
#' al.} (1986) but is enhanced for use with the continuity corrected Cochran's
#' test.
#'
#' Power calculations are based on the writings of Wittes and Wallenstein
#' (1987). They are functionally equivalent to the derivations of the sample
#' size calculation described by Woolson and others and Nam, but have slightly
#' added precision.
#'
#' Terminology and symbolic conventions are borrowed from Woolson \emph{et al.}
#' (1986). The \code{p1} group is dubbed the \emph{Case} group and \code{p2}
#' group is called the \emph{Control} group.
#'
#' @section Arguments:
#'
#' To calculate \strong{power}, the \code{power} parameter must be set to
#' \code{NULL}. To calculate \strong{sample size}, the \code{N} parameter must
#' be set to \code{NULL}.
#'
#' The \code{J} number of groups will be inferred by the maximum length of
#' \code{p1}, \code{p2}, or \code{theta}.
#'
#' Effect size must be specified using one of the following combinations of
#' arguments.
#' \itemize{
#' \item Both case and control proportion vectors, ex.,
#' \itemize{\item \code{p1} and \code{p2} with \code{theta = NULL}.}
#' \item One proportion vector and an effect size, ex.,
#' \itemize{
#' \item \code{p1} and \code{theta} with \code{p2 = NULL}, \strong{or}
#' \item \code{p2} and \code{theta} with \code{p1 = NULL}.
#' }
#' }
#'
#' @param p1 Vector of proportions of the \emph{J} case groups.
#' @param p2 Vector of proportions of the \emph{J} control groups.
#' @param theta Vector of odds ratios relating to the \emph{J} 2 x 2 tables.
#' @param N Total number of subjects.
#' @param sig.level Significance level (Type I error probability).
#' @param power Power of test (1 minus Type II error probability).
#' @param alternative Two- or one-sided test. If one-sided, the direction of the
#' association must be defined (less than 1 or greater than 1). Can be
#' abbreviated.
#' @param s Proportion (weight) of case versus control in \emph{J} stratum.
#' @param t Proportion (weight) of total number of cases of \emph{J} stratum.
#' @param correct Logical indicating whether to apply continuity correction.
#'
#' @return
#' An object of class \code{"power.cmh"}: a list of the original arguments and
#' the calculated sample size or power. Also included are vectors of n's per
#' each group, an indicator or whether continuity correction was used, the
#' original function call, and \code{N.effective}.
#'
#' The vectors of n's per each group, \code{n1} and \code{n2}, are the
#' fractional n's required to achieve a final total N specified by the
#' calculation while satisfying the constraints of \code{s} and \code{t}.
#' However, the effective N, given the requirement of cell counts populated by
#' whole numbers is provided by \code{N.effective}. By default, the print method
#' is set to \code{n.frac = FALSE}, which will round each cell n up to the
#' nearest whole number.
#'
#' @seealso
#' \link[stats]{power.prop.test},
#' \link[stats]{mantelhaen.test},
#' \link[DescTools]{BreslowDayTest}
#'
#' @author Paul W. Egeler, M.S.
#'
#' @examples
#' # From "Sample size determination for case-control studies and the comparison
#' # of stratified and unstratified analyses", (Nam 1992). See references.
#'
#' # Uncorrected sample size estimate first introduced
#' # by Woolson and others in 1986
#' sample_size_uncorrected <- power.cmh.test(
#' p2 = c(0.75,0.70,0.65,0.60),
#' theta = 3,
#' power = 0.9,
#' t = c(0.10,0.40,0.35,0.15),
#' alternative = "greater",
#' correct = FALSE
#' )
#'
#' print(sample_size_uncorrected, detail = FALSE)
#'
#' # We see that the N is 171, the same as calculated by Nam
#' sample_size_uncorrected$N
#'
#'
#' # Continuity corrected sample size estimate added by Nam
#' sample_size_corrected <- power.cmh.test(
#' p2 = c(0.75,0.70,0.65,0.60),
#' theta = 3,
#' power = 0.9,
#' t = c(0.10,0.40,0.35,0.15),
#' alternative = "greater",
#' correct = TRUE
#' )
#'
#' print(sample_size_corrected, n.frac = TRUE)
#'
#' # We see that the N is indeed equal to that which is reported in the paper
#' sample_size_corrected$N
#'
#' @references
#' Gail, M. (1973). "The determination of sample sizes for trials involving
#' several 2 x 2 tables."
#' \emph{Journal of Chronic Disease} \strong{26}: 669-673.
#'
#' Munoz, A. and B. Rosner. (1984). "Power and sample size for a collection of 2
#' x 2 tables." \emph{Biometrics} \strong{40}: 995-1004.
#'
#' Nam, J. (1992). "Sample size determination for case-control studies and the
#' comparison of stratified and unstratified analyses."
#' \emph{Biometrics} \strong{48}: 389-395.
#'
#' Wittes, J. and S. Wallenstein. (1987). "The power of the Mantel-Haenszel
#' test." \emph{Journal of the American Statistical Association} \strong{82}:
#' 1104-1109.
#'
#' Woolson, R. F., Bean, J. A., and P. B. Rojas. (1986).
#' "Sample size for case-control studies using Cochran's statistic."
#' \emph{Biometrics} \strong{42}: 927-932.
#'
#' @export
power.cmh.test <- function(
p1 = NULL,
p2 = NULL,
theta = NULL,
N = NULL,
sig.level = 0.05,
power = 0.80,
alternative = c("two.sided","less","greater"),
s = 0.5,
t = 1 / J,
correct = TRUE
) {
# Determine if the correct number of arguments are set to NULL
if (sum(sapply(list(p1,p2,theta),is.null)) != 1L) {
stop("exactly one of 'p1', 'p2', or 'theta' must be NULL")
}
if (sum(sapply(list(N, power),is.null)) != 1L) {
stop("either 'N' or 'power' must be NULL")
}
# Determine upper, lower, or two-sided hypothesis
alternative <- match.arg(alternative)
tside <- switch(alternative, two.sided = 2, 1)
lower.tail <- switch(alternative, less = TRUE, FALSE)
# Infer J from vector lengths of first three args
J <- max(sapply(list(p1,p2,theta),length))
# Ensure that 's' and 't' are of correct lengths
s <- rep(as.vector(s), length.out = J)
t <- rep(as.vector(t), length.out = J)
# Check that 's' and 't' are reasonable
if (any(s < 0) || any(s > 1)) {
stop("The variable 's' must be a decimal fraction")
}
if (!isTRUE(all.equal(sum(t),1))) {
stop("The 't' levels must sum to 1")
}
# Determine missing 'p' or 'theta' vector
if (is.null(theta)) {
theta <- props2theta(p1,p2)
} else if (is.null(p1)) {
p1 <- effect.size(p2,theta)
} else {
p2 <- effect.size(p1,theta)
}
if (all(theta == 1)) {
stop(
"Effect size ('theta') is 1 across all strata.\n",
"Calculation cannot be performed.\n"
)
}
if (!all(theta >= 1) && !all(theta <= 1)) {
message("NOTE: Effect size ('theta') differs in direction across strata.")
}
if (!isTRUE(all.equal(min(as.vector(theta)), max(as.vector(theta))))) {
message("NOTE: Effect size ('theta') is not consistent between strata.")
}
# Calculate z value for alpha
z_a <- stats::qnorm(sig.level / tside, lower.tail = lower.tail)
# Determine if N or power is missing and then complete the calculation
if (is.null(N)) {
z_b <- stats::qnorm(1 - power, lower.tail = lower.tail)
pbar <- p1 * s + p2 * (1 - s)
X <- sum(t * s * (1 - s) * pbar * (1 - pbar))
Y <- sum(t * s * (1 - s) * ((1 - s) * p1 * (1 - p1) + s * p2 * (1 - p2)))
Z <- sum(t * s * (1 - s) * (p1 - p2))
# The calculation itself
N <- (z_a * sqrt(X) + z_b * sqrt(Y))^2 / Z^2
# Continuity correction
if (correct) {
if (!lower.tail)
N <- (1 + sqrt(1 + 2 / (abs(Z) * N) ))^2 * N / 4
else
N <- (1 + sqrt(1 - 2 / (Z * N) ))^2 * N / 4
}
# Splitting N into groups
n1 <- N*t*s
n2 <- N*t*(1 - s)
# Calculating effective N
N.effective <- sum(ceiling(c(n1,n2)))
} else {
std_dev <- sqrt(
sum(t * s * (1 - s) * ((1 - s) * p1 * (1 - p1) + s * p2 * (1 - p2)))
)
pbar <- p1 * s + p2 * (1 - s)
phi <- pbar * (1 - pbar) + (p1 - p2)^2 * s * (1 - s) / (N * t - 1)
Eg <- sum(N * t * s * (1 - s) * (p1 - p2))
numerator <- (
ifelse(alternative == "two.sided", abs(Eg), Eg) +
ifelse(!correct,0,ifelse(alternative == "less",0.5,-0.5))) / sqrt(N) -
z_a * sqrt(sum(t * s * (1 - s) * phi))
U <- numerator / std_dev
power <- stats::pnorm(U, lower.tail = !lower.tail)
# Splitting N into groups
n1 <- round(N*t*s,0)
n2 <- round(N*t*(1 - s),0)
# N.effective will just be N
N.effective <- N
}
# Return an object of class "power.cmh"
structure(
list(
p1 = p1, p2 = p2, theta = theta, N = N, sig.level = sig.level,
power = power, alternative = alternative, s = s, t = t, J = J,
n1 = n1, n2 = n2, N.effective = N.effective, call = match.call()
),
class = "power.cmh"
)
}
#' Print \code{"power.cmh"} object
#'
#' The S3 print method for the \code{"power.cmh"} object
#'
#' @param x A \code{"power.cmh"} object.
#' @param detail Logical to toggle detailed or simple output.
#' @param n.frac Logical indicating whether sample n's should be rounded to the
#' next whole number.
#' @param ... Ignored.
#' @export
print.power.cmh <- function(x, detail = TRUE, n.frac = FALSE, ...) {
with(x, {
cat(
"Power and sample size calculation for the Cochran Mantel Haenszel test\n\n",
" N = ",ifelse(n.frac,N,ceiling(N)),"\n",
" Effective N = ",N.effective,"\n",
"Significance level = ",sig.level,"\n",
" Power = ",power,"\n",
" Alternative = ",alternative,"\n\n",
sep = ""
)
if (detail) {
if (n.frac) {
cat(
"Number of subjects per each group:\n",
rep("_",10 + J * 9),"\n",
"Group |",sprintf(" %7i ",1:J),"\n",
rep("=",10 + J * 9),"\n",
"Case |",sprintf(" %7.2f ",n1),"\n",
"Control |",sprintf(" %7.2f ",n2),"\n\n",
sep = ""
)
} else {
cat(
"Number of subjects per each group:\n",
rep("_",10 + J * 7),"\n",
"Group |",sprintf(" %5i ",1:J),"\n",
rep("=",10 + J * 7),"\n",
"Case |",sprintf(" %5i ",ceiling(n1)),"\n",
"Control |",sprintf(" %5i ",ceiling(n2)),"\n\n",
sep = ""
)
}
}
cat(
"CALL: \n",
paste(deparse(x$call), sep = "\n", collapse = "\n"),
"\n", sep = ""
)
})
invisible(x)
}