-
Notifications
You must be signed in to change notification settings - Fork 0
/
referenceInterval.R
288 lines (262 loc) · 10.9 KB
/
referenceInterval.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
#' Robust Method in Calculation of Reference Interval
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' This robust method is used to calculate the reference interval on small sample
#' size (below to 120 observations).
#'
#' @param x (`numeric`)\cr numeric measurements from target population.
#' @param ind (`integer`)\cr integer vector for boot process, default is all elements
#' in `x`.
#' @param conf.level (`numeric`)\cr significance level for the internal t statistic.
#' @param tol (`numeric`)\cr tolerance for when the iterative process can be stopped.
#'
#' @return a vector of robust reference interval
#' @export
#'
#' @references This robust algorithm is referring to CLSI document EP28A3.
#'
#' @examples
#' # This example data is taken from EP28A3 Appendix B. to ensure the result is in accordance.
#' x <- c(8.9, 9.2, rep(9.4, 2), rep(9.5, 3), rep(9.6, 4), rep(9.7, 5), 9.8, rep(9.9, 2), 10.2)
#' robustRI(x)
robustRI <- function(x, ind = 1:length(x), conf.level = 0.95, tol = 1e-06) {
assert_numeric(x)
assert_vector(ind)
assert_number(conf.level, lower = 0.7, upper = 1)
x <- sort(x[ind])
n <- length(x)
median <- median(x)
Tbi <- median
c <- 3.7
MAD <- median(abs(x - median))
repeat {
ui <- (x - Tbi) / (c * MAD / 0.6745)
wi <- ifelse(ui > 1 | ui < -1, 0, (1 - ui^2)^2)
TbiNew <- sum(x * wi) / sum(wi)
if (!is.finite(TbiNew) | (abs(TbiNew - Tbi)) < tol) {
break
}
Tbi <- TbiNew
}
fm <- function(n, ui) {
ui <- ui[ui > -1 & ui < 1]
sqrt((n * sum(ui^2 * (1 - ui^2)^4))
/ (sum((1 - ui^2) * (1 - 5 * ui^2)) *
max(c(1, -1 + sum((1 - ui^2) * (1 - 5 * ui^2))))))
}
fm2 <- function(ui) {
ui <- ui[ui > -1 & ui < 1]
sqrt(sum(ui^2 * (1 - ui^2)^4)
/ (sum((1 - ui^2) * (1 - 5 * ui^2)) *
max(c(1, -1 + sum((1 - ui^2) * (1 - 5 * ui^2))))))
}
ui <- (x - median) / (205.6 * MAD / 0.6745)
sbi205_6 <- 205.6 * MAD / 0.6745 * fm(n, ui)
ui <- (x - median) / (3.7 * MAD / 0.6745)
sbi3_7 <- 3.7 * MAD / 0.6745 * fm(n, ui)
ui <- (x - Tbi) / (3.7 * sbi3_7)
St3_7 <- 3.7 * sbi3_7 * fm2(ui)
t <- qt(1 - ((1 - conf.level) / 2), (n - 1))
robust_lower <- Tbi - t * sqrt(sbi205_6^2 + St3_7^2)
robust_upper <- Tbi + t * sqrt(sbi205_6^2 + St3_7^2)
c(robust_lower, robust_upper)
}
#' Nonparametric Method in Calculation of Reference Interval
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' This nonparametric method is used to calculate the reference interval when the
#' distribution is skewed and the sample size is above to 120 observations.
#'
#' @param x (`numeric`)\cr numeric measurements from target population.
#' @param ind (`integer`)\cr integer vector for boot process, default is all
#' elements in `x`.
#' @param conf.level (`numeric`)\cr the percentile of reference limit.
#'
#' @return a vector of nonparametric reference interval
#' @export
#'
#' @examples
#' data("calcium")
#' x <- calcium$Value
#' nonparRI(x)
nonparRI <- function(x, ind = 1:length(x), conf.level = 0.95) {
assert_numeric(x)
assert_vector(ind)
assert_number(conf.level, lower = 0.7, upper = 1)
x <- x[ind]
nonpar_lower <- quantile(x, (1 - conf.level) / 2)
nonpar_upper <- quantile(x, 1 - ((1 - conf.level) / 2))
c(nonpar_lower, nonpar_upper)
}
#' Calculate Reference Interval and Corresponding Confidence Interval
#'
#' @description `r lifecycle::badge("experimental")`
#'
#' This function is used to establish the reference interval for target population
#' with parametric, non-parametric and robust methods that follows the CLSI-EP28A3
#' and NMPA guideline. In additional, it also provides the corresponding confidence
#' interval for lower/upper reference limit if needed. Given that outliers should be
#' identified beforehand, Tukey and Dixon methods can be applied depending on
#' distribution of the data.
#'
#' @note There are some conditions of use to be aware of:
#' - If parametric method is used to calculate reference interval, confidence
#' interval should be the same method as well.
#' - If non-parametric method is used to calculate the reference interval and
#' the sample size is up to 120 observations, the non-parametric is suggested for
#' confidence interval. Otherwise if the sample size is below to 120, the bootstrap
#' method is the better choice. Beside the non-parametric method for confidence
#' interval only allows the `refLevel=0.95` and `confLevel=0.9` arguments,
#' if not the bootstrap methods will be used automatically.
#' - If robust method is used to calculate the reference interval, the method for
#' confidence interval must be bootstrap.
#'
#' @param x (`numeric`)\cr numeric measurements from target population.
#' @param out_method (`string`)\cr string specifying the which outlier detection to use.
#' @param out_rm (`logical`)\cr whether the outliers is removed or not.
#' @param RI_method (`string`)\cr string specifying the which method for computing
#' reference interval to use. Default is `parametric`, options can be `nonparametric`
#' and `robust`.
#' @param CI_method (`string`)\cr string specifying the which method for computing
#' confidence interval of reference limit(lower or upper) to use. Default is
#' `parametric`, options can be `nonparametric` and `boot`.
#' @param refLevel (`numeric`)\cr reference range/interval, usual is 0.95.
#' @param bootCI (`string`)\cr string specifying the which bootstrap confidence
#' interval from `boot.ci()` function in `boot` package. Default is
#' `perc`(bootstrap percentile), options can be `norm`(normal approximation),
#' `boot`(basic bootstrap), `stud`(studentized bootstrap) and `bca`(adjusted
#' bootstrap percentile).
#' @param confLevel (`numeric`)\cr significance level for the confidence interval
#' of reference limit.
#' @param rng.seed (`integer`)\cr number of the random number generator seed
#' for bootstrap sampling. If set to NULL currently in the R session used RNG
#' setting will be used.
#' @param tol (`numeric`)\cr tolerance for when the iterative process can be
#' stopped in robust method.
#' @param R (`integer`)\cr number of bootstrap replicates, is used in `boot()` function.
#'
#' @return A `RefInt` object contains relevant results in establishing of reference interval.
#' @export
#'
#' @examples
#' data("calcium")
#' x <- calcium$Value
#' refInterval(x, RI_method = "parametric", CI_method = "parametric")
#' refInterval(x, RI_method = "nonparametric", CI_method = "nonparametric")
#' refInterval(x, RI_method = "robust", CI_method = "boot", R = 1000)
refInterval <- function(x, out_method = c("doxin", "tukey"),
out_rm = FALSE,
RI_method = c("parametric", "nonparametric", "robust"),
CI_method = c("parametric", "nonparametric", "boot"),
refLevel = 0.95,
bootCI = c("perc", "norm", "basic", "stud", "bca"),
confLevel = 0.9,
rng.seed = NULL, tol = 1e-06, R = 1e+04) {
assert_numeric(x, any.missing = FALSE)
assert_logical(out_rm)
assert_numeric(refLevel, lower = 0.7, upper = 1)
out_method <- match.arg(out_method, c("doxin", "tukey"), several.ok = FALSE)
RI_method <- match.arg(RI_method, c("parametric", "nonparametric", "robust"), several.ok = FALSE)
CI_method <- match.arg(CI_method, c("parametric", "nonparametric", "boot"), several.ok = FALSE)
bootCI <- match.arg(bootCI, c("perc", "norm", "basic", "stud", "bca"), several.ok = FALSE)
rd <- x
if (out_method == "tukey") {
outres <- tukey_outlier(x)
}
if (out_method == "doxin") {
outres <- dixon_outlier(x)
}
if (out_rm == TRUE) {
x <- outres$subset
}
n <- length(x)
if (RI_method == "parametric") {
sd <- sd(x)
z <- qnorm(1 - ((1 - refLevel) / 2))
refLimit_lower <- mean(x) - z * sd
refLimit_upper <- mean(x) + z * sd
if (CI_method != "parametric") {
warning("As the parametric RI is selected, the parametric of CI is used automatically.")
CI_method <- "parametric"
}
if (CI_method == "parametric") {
# the Var of reference limit can be calculated as
se <- sd * sqrt(1 / n + z^2 / (2 * (n - 1)))
zconf <- qnorm(1 - ((1 - confLevel) / 2))
refLowerLimit_lower <- refLimit_lower - zconf * se
refLowerLimit_upper <- refLimit_lower + zconf * se
refUpperLimit_lower <- refLimit_upper - zconf * se
refUpperLimit_upper <- refLimit_upper + zconf * se
}
}
if (RI_method == "nonparametric") {
x <- sort(x)
npar <- nonparRI(x, conf.level = refLevel)
refLimit_lower <- as.numeric(npar[1])
refLimit_upper <- as.numeric(npar[2])
if (!CI_method %in% c("nonparametric", "boot")) {
stop("If RI is established by nonparametric, the CI should be nonparametric or boot")
}
if (n < 120 & CI_method != "boot") {
warning("Sample size is too small (<120) for non-parametric CI, boot CI is used automatically.")
CI_method <- "boot"
}
if (refLevel != 0.95 | confLevel != 0.9) {
warning("The non-parametric RI only works when `refLevel=0.95` and `confLevel=0.9`, boot CI is used automatically.")
CI_method <- "boot"
}
if (CI_method == "nonparametric") {
ranks <- nonparRanks[which(nonparRanks$SampleSize == n), ]
refLowerLimit_lower <- x[ranks$Lower]
refLowerLimit_upper <- x[ranks$Upper]
refUpperLimit_lower <- x[n + 1 - ranks$Upper]
refUpperLimit_upper <- x[n + 1 - ranks$Lower]
}
}
if (RI_method == "robust") {
robust <- robustRI(x, conf.level = refLevel, tol = tol)
refLimit_lower <- robust[1]
refLimit_upper <- robust[2]
if (!CI_method %in% c("boot")) {
warning("Boot CI is used automatically.")
CI_method <- "boot"
}
}
if (CI_method == "boot" & RI_method %in% c("nonparametric", "robust")) {
if (!is.null(rng.seed)) {
set.seed(rng.seed)
}
if (RI_method == "nonparametric") {
message("Bootstrape process could take a short while.")
bootres <- boot(data = x, statistic = nonparRI, R = R, conf.level = refLevel)
} else if (RI_method == "robust") {
message("Bootstrape process could take a short while.")
bootres <- boot(data = x, statistic = robustRI, R = R, conf.level = refLevel, tol = tol)
}
bootlowerci <- boot.ci(bootres, conf = confLevel, type = bootCI, index = c(1, 2))
bootupperci <- boot.ci(bootres, conf = confLevel, type = bootCI, index = c(2, 2))
bootlen <- length(bootlowerci[[4]])
refLowerLimit_lower <- bootlowerci[[4]][bootlen - 1]
refLowerLimit_upper <- bootlowerci[[4]][bootlen]
refUpperLimit_lower <- bootupperci[[4]][bootlen - 1]
refUpperLimit_upper <- bootupperci[[4]][bootlen]
}
methods <- paste0(
"Reference Interval Method: ", RI_method,
", Confidence Interval Method: ", CI_method
)
RefInt(
call = match.call(),
method = methods,
n = n,
data = rd,
outlier = outres,
refInt = c(refLimit_lower, refLimit_upper),
confInt = list(
refLower = c(refLowerLimit_lower, refLowerLimit_upper),
refUpper = c(refUpperLimit_lower, refUpperLimit_upper)
)
)
}