-
Notifications
You must be signed in to change notification settings - Fork 25
/
breaks.Rd
executable file
·123 lines (111 loc) · 4.24 KB
/
breaks.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/weighted_hist.R
\name{breaks}
\alias{breaks}
\alias{breaks_fixed}
\alias{breaks_Sturges}
\alias{breaks_Scott}
\alias{breaks_FD}
\alias{breaks_quantiles}
\title{Break (bin) selection algorithms for histograms}
\usage{
breaks_fixed(x, weights = NULL, width = 1)
breaks_Sturges(x, weights = NULL)
breaks_Scott(x, weights = NULL)
breaks_FD(x, weights = NULL, digits = 5)
breaks_quantiles(x, weights = NULL, max_n = "Scott", min_width = 0.5)
}
\arguments{
\item{x}{A numeric vector giving a sample.}
\item{weights}{A numeric vector of \code{length(x)} giving sample weights.}
\item{width}{For \code{\link[=breaks_fixed]{breaks_fixed()}}, the desired bin width.}
\item{digits}{For \code{\link[=breaks_FD]{breaks_FD()}}, the number of significant digits to keep when
rounding in the Freedman-Diaconis algorithm. For an explanation of this
parameter, see the documentation of the corresponding parameter in
\code{\link[grDevices:nclass]{grDevices::nclass.FD()}}.}
\item{max_n}{For \code{\link[=breaks_quantiles]{breaks_quantiles()}}, either a scalar numeric giving the
maximum number of bins, or another breaks function (or string giving the
suffix of the name of a function prefixed with \code{"breaks_"}) that will
return the maximum number of bins. \code{\link[=breaks_quantiles]{breaks_quantiles()}} will construct
\emph{at most} \code{max_n} bins.}
\item{min_width}{For \code{\link[=breaks_quantiles]{breaks_quantiles()}}, a scalar numeric between \code{0} and
\code{1} giving the minimum bin width as a proportion of \code{diff(range(x)) / max_n}.}
}
\value{
Either a single number (giving the number of bins) or a vector
giving the edges between bins.
}
\description{
Methods for determining breaks (bins) in histograms, as used in the \code{breaks}
argument to \code{\link[=density_histogram]{density_histogram()}}.
Supports \link[=auto_partial]{automatic partial function application} with
\link[=waiver]{waived arguments}.
}
\details{
These functions take a sample and its weights and return a value suitable for
the \code{breaks} argument to \code{\link[=density_histogram]{density_histogram()}} that will determine the histogram
breaks.
\itemize{
\item \code{\link[=breaks_fixed]{breaks_fixed()}} allows you to manually specify a fixed bin width.
\item \code{\link[=breaks_Sturges]{breaks_Sturges()}}, \code{\link[=breaks_Scott]{breaks_Scott()}}, and \code{\link[=breaks_FD]{breaks_FD()}} implement weighted
versions of their corresponding base functions. They return a scalar
numeric giving the number of bins. See \code{\link[=nclass.Sturges]{nclass.Sturges()}}, \code{\link[=nclass.scott]{nclass.scott()}},
and \code{\link[=nclass.FD]{nclass.FD()}}.
\item \code{\link[=breaks_quantiles]{breaks_quantiles()}} constructs irregularly-sized bins using \code{max_n + 1}
(possibly weighted) quantiles of \code{x}. The final number of bins is
\emph{at most} \code{max_n}, as small bins (ones whose bin width is less than half
the range of the data divided by \code{max_n} times \code{min_width}) will be merged
into adjacent bins.
}
}
\examples{
library(ggplot2)
set.seed(1234)
x = rnorm(2000, 1, 2)
# Let's compare the different break-selection algorithms on this data:
ggplot(data.frame(x), aes(x)) +
stat_slab(
aes(y = "breaks_fixed(width = 0.5)"),
density = "histogram",
breaks = breaks_fixed(width = 0.5),
outline_bars = TRUE,
color = "black",
) +
stat_slab(
aes(y = "breaks_Sturges()\nor 'Sturges'"),
density = "histogram",
breaks = "Sturges",
outline_bars = TRUE,
color = "black",
) +
stat_slab(
aes(y = "breaks_Scott()\nor 'Scott'"),
density = "histogram",
breaks = "Scott",
outline_bars = TRUE,
color = "black",
) +
stat_slab(
aes(y = "breaks_FD()\nor 'FD'"),
density = "histogram",
breaks = "FD",
outline_bars = TRUE,
color = "black",
) +
stat_slab(
aes(y = "breaks_quantiles()\nor 'quantiles'"),
density = "histogram",
breaks = "quantiles",
outline_bars = TRUE,
color = "black",
) +
geom_point(aes(y = 0.7), alpha = 0.5) +
labs(
subtitle = "ggdist::stat_slab(density = 'histogram', ...)",
y = "breaks =",
x = NULL
)
}
\seealso{
\code{\link[=density_histogram]{density_histogram()}}, \link{align}
}