/
weighted_quantile.R
108 lines (107 loc) · 3.38 KB
/
weighted_quantile.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#' Weighted Median and Quantiles
#'
#' Compute the median or quantiles a set of numbers which have weights
#' associated with them.
#'
#' @param x a numeric vector of values
#' @param w a numeric vector of weights
#' @param probs probabilities for which the quantiles should be computed, a
#' numeric vector of values between 0 and 1
#' @param na.rm a logical indicating whether to ignore `NA` values
#' @param type Integer specifying the rule for calculating the median or
#' quantile, corresponding to the rules available for `stats:quantile()`.
#' The only valid choices are type=1, 2 or 4. See Details.
#' @details
#' The `i`th observation `x[i]` is treated as having a weight proportional to
#' `w[i]`.
#'
#' The weighted median is a value `m` such that the total weight of data less
#' than or equal to `m` is equal to half the total weight. More generally, the
#' weighted quantile with probability `p` is a value `q` such that the total
#' weight of data less than or equal to `q` is equal to `p` times the total
#' weight.
#'
#' If there is no such value, then
#'
#' - if `type = 1`, the next largest value is returned (this is the
#' right-continuous inverse of the left-continuous cumulative distribution
#' function);
#' - if `type = 2`, the average of the two surrounding values is returned
#' (the average of the right-continuous and left-continuous inverses);
#' - if `type = 4`, linear interpolation is performed.
#'
#' Note that the default rule for `weighted.median()` is `type = 2`, consistent
#' with the traditional definition of the median, while the default for
#' `weighted.quantile()` is `type = 4`.
#' @source These functions are adapted from their homonyms developed by Adrian
#' Baddeley in the `spatstat` package.
#' @returns A numeric vector.
#' @export
#' @examples
#' x <- 1:20
#' w <- runif(20)
#' weighted.median(x, w)
#' weighted.quantile(x, w)
weighted.median <- function(x, w, na.rm = TRUE, type = 2) {
unname(weighted.quantile(x,
probs = 0.5, w = w, na.rm = na.rm,
type = type
))
}
#' @export
#' @rdname weighted.median
weighted.quantile <- function(x, w, probs = seq(0, 1, 0.25),
na.rm = TRUE, type = 4) {
x <- as.numeric(as.vector(x))
w <- as.numeric(as.vector(w))
if (length(x) == 0) {
stop("No data given")
}
stopifnot(length(x) == length(w))
if (is.na(m <- match(type, c(1, 2, 4)))) {
stop("Argument 'type' must equal 1, 2 or 4", call. = FALSE)
}
type <- c(1, 2, 4)[m]
if (anyNA(x) || anyNA(w)) {
ok <- !(is.na(x) | is.na(w))
x <- x[ok]
w <- w[ok]
}
if (length(x) == 0) {
stop("At least one non-NA value is required")
}
stopifnot(all(w >= 0))
if (all(w == 0)) {
stop("All weights are zero", call. = FALSE)
}
oo <- order(x)
x <- x[oo]
w <- w[oo]
Fx <- cumsum(w) / sum(w)
if (length(x) > 1) {
out <- switch(as.character(type),
`1` = stats::approx(Fx, x,
xout = probs, ties = "ordered", rule = 2,
method = "constant",
f = 1
),
`2` = stats::approx(Fx, x,
xout = probs, ties = "ordered",
rule = 2, method = "constant", f = 1 / 2
),
`4` = stats::approx(Fx,
x,
xout = probs, ties = "ordered", rule = 2,
method = "linear"
)
)
result <- out$y
} else {
result <- rep.int(x, length(probs))
}
names(result) <- paste0(
format(100 * probs, trim = TRUE),
"%"
)
return(result)
}