-
Notifications
You must be signed in to change notification settings - Fork 2.1k
/
Copy pathstat-bin.R
174 lines (160 loc) · 6.56 KB
/
stat-bin.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#' @param binwidth The width of the bins. Can be specified as a numeric value
#' or as a function that takes x after scale transformation as input and
#' returns a single numeric value. When specifying a function along with a
#' grouping structure, the function will be called once per group.
#' The default is to use the number of bins in `bins`,
#' covering the range of the data. You should always override
#' this value, exploring multiple widths to find the best to illustrate the
#' stories in your data.
#'
#' The bin width of a date variable is the number of days in each time; the
#' bin width of a time variable is the number of seconds.
#' @param bins Number of bins. Overridden by `binwidth`. Defaults to 30.
#' @param center,boundary bin position specifiers. Only one, `center` or
#' `boundary`, may be specified for a single plot. `center` specifies the
#' center of one of the bins. `boundary` specifies the boundary between two
#' bins. Note that if either is above or below the range of the data, things
#' will be shifted by the appropriate integer multiple of `binwidth`.
#' For example, to center on integers use `binwidth = 1` and `center = 0`, even
#' if `0` is outside the range of the data. Alternatively, this same alignment
#' can be specified with `binwidth = 1` and `boundary = 0.5`, even if `0.5` is
#' outside the range of the data.
#' @param breaks Alternatively, you can supply a numeric vector giving
#' the bin boundaries. Overrides `binwidth`, `bins`, `center`,
#' and `boundary`. Can also be a function that takes group-wise values as input and returns bin boundaries.
#' @param closed One of `"right"` or `"left"` indicating whether right
#' or left edges of bins are included in the bin.
#' @param pad If `TRUE`, adds empty bins at either end of x. This ensures
#' frequency polygons touch 0. Defaults to `FALSE`.
#' @param drop Treatment of zero count bins. If `"none"` (default), such
#' bins are kept as-is. If `"all"`, all zero count bins are filtered out.
#' If `"extremes"` only zero count bins at the flanks are filtered out, but
#' not in the middle. `TRUE` is shorthand for `"all"` and `FALSE` is shorthand
#' for `"none"`.
#' @eval rd_computed_vars(
#' count = "number of points in bin.",
#' density = "density of points in bin, scaled to integrate to 1.",
#' ncount = "count, scaled to a maximum of 1.",
#' ndensity = "density, scaled to a maximum of 1.",
#' width = "widths of bins."
#' )
#'
#' @section Dropped variables:
#' \describe{
#' \item{`weight`}{After binning, weights of individual data points (if supplied) are no longer available.}
#' }
#'
#' @seealso [stat_count()], which counts the number of cases at each x
#' position, without binning. It is suitable for both discrete and continuous
#' x data, whereas `stat_bin()` is suitable only for continuous x data.
#' @export
#' @rdname geom_histogram
stat_bin <- function(mapping = NULL, data = NULL,
geom = "bar", position = "stack",
...,
binwidth = NULL,
bins = NULL,
center = NULL,
boundary = NULL,
breaks = NULL,
closed = c("right", "left"),
pad = FALSE,
na.rm = FALSE,
drop = "none",
orientation = NA,
show.legend = NA,
inherit.aes = TRUE) {
layer(
data = data,
mapping = mapping,
stat = StatBin,
geom = geom,
position = position,
show.legend = show.legend,
inherit.aes = inherit.aes,
params = list2(
binwidth = binwidth,
bins = bins,
center = center,
boundary = boundary,
breaks = breaks,
closed = closed,
pad = pad,
na.rm = na.rm,
orientation = orientation,
drop = drop,
...
)
)
}
#' @rdname ggplot2-ggproto
#' @format NULL
#' @usage NULL
#' @export
StatBin <- ggproto("StatBin", Stat,
setup_params = function(self, data, params) {
params$flipped_aes <- has_flipped_aes(data, params, main_is_orthogonal = FALSE)
if (is.logical(params$drop)) {
params$drop <- if (isTRUE(params$drop)) "all" else "none"
}
drop <- params$drop
params$drop <- arg_match0(
params$drop %||% "none",
c("all", "none", "extremes"), arg_nm = "drop"
)
has_x <- !(is.null(data$x) && is.null(params$x))
has_y <- !(is.null(data$y) && is.null(params$y))
if (!has_x && !has_y) {
cli::cli_abort("{.fn {snake_class(self)}} requires an {.field x} or {.field y} aesthetic.")
}
if (has_x && has_y) {
cli::cli_abort("{.fn {snake_class(self)}} must only have an {.field x} {.emph or} {.field y} aesthetic.")
}
x <- flipped_names(params$flipped_aes)$x
if (is_mapped_discrete(data[[x]])) {
cli::cli_abort(c(
"{.fn {snake_class(self)}} requires a continuous {.field {x}} aesthetic.",
"x" = "the {.field {x}} aesthetic is discrete.",
"i" = "Perhaps you want {.code stat=\"count\"}?"
))
}
params <- fix_bin_params(params, fun = snake_class(self), version = "2.1.0")
params
},
extra_params = c("na.rm", "orientation"),
compute_group = function(data, scales, binwidth = NULL, bins = NULL,
center = NULL, boundary = NULL,
closed = c("right", "left"), pad = FALSE,
breaks = NULL, flipped_aes = FALSE, drop = "none",
# The following arguments are not used, but must
# be listed so parameters are computed correctly
origin = NULL, right = NULL) {
x <- flipped_names(flipped_aes)$x
bins <- compute_bins(
data[[x]], scales[[x]],
breaks = breaks, binwidth = binwidth, bins = bins,
center = center, boundary = boundary, closed = closed
)
bins <- bin_vector(data[[x]], bins, weight = data$weight, pad = pad)
keep <- switch(
drop,
all = bins$count != 0,
extremes = inner_runs(bins$count != 0),
TRUE
)
bins <- vec_slice(bins, keep)
bins$flipped_aes <- flipped_aes
flip_data(bins, flipped_aes)
},
default_aes = aes(x = after_stat(count), y = after_stat(count), weight = 1),
required_aes = "x|y",
dropped_aes = "weight" # after statistical transformation, weights are no longer available
)
inner_runs <- function(x) {
rle <- vec_unrep(x)
nruns <- nrow(rle)
inner <- rep(TRUE, nruns)
i <- unique(c(1, nruns))
inner[i] <- inner[i] & rle$key[i]
rep(inner, rle$times)
}