Skip to content

Commit

Permalink
Stat refactoring. Fixes #1220
Browse files Browse the repository at this point in the history
  • Loading branch information
hadley committed Aug 5, 2015
1 parent 1ab70de commit 33e167f
Show file tree
Hide file tree
Showing 27 changed files with 85 additions and 78 deletions.
2 changes: 1 addition & 1 deletion R/geom-bar-.r
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ StatBar <- ggproto("StatBar", Stat,
required_aes = "x",
default_aes = aes(y = ..count..),

calculate = function(self, data, ..., width = NULL) {
compute_group = function(self, data, ..., width = NULL) {
x <- data$x
weight <- data$weight %||% rep(1, length(x))
width <- width %||% resolution(x) * 0.9
Expand Down
2 changes: 1 addition & 1 deletion R/layer.r
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ Layer <- ggproto("Layer", NULL,
)

args <- c(list(data = quote(data), scales = quote(scales)), params)
tryCatch(do.call(self$stat$calculate_groups, args), error = function(e) {
tryCatch(do.call(self$stat$compute, args), error = function(e) {
warning("Computation failed in `", snake_class(self$stat), "()`:\n",
e$message, call. = FALSE)
data.frame()
Expand Down
40 changes: 19 additions & 21 deletions R/stat-.r
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,19 @@
#' implement one or more of the following:
#'
#' \itemize{
#' \item \code{calculate}: Calculates a stat for a single group of data.
#' \item \code{calculate_groups}: Calculates stat for all groups. The method
#' typically calls \code{calculate} for each group.
#' \item Override either \code{compute(self, data, scales, ...)} or
#' \code{compute(self, data, scales, ...)}. \code{compute} is
#' called with the complete dataset, \code{compute_group} is called a group
#' at-a-time. If you override \code{compute}, you're responsibly for
#' matching up non-transformed columns.
#'
#' \code{data} is a data frame containing the variables named according
#' to the aesthetics that they're mapped to. \code{scales} contains a list
#' of scales associated with the plot. This is present mostly for historical
#' reasons, and I would discourage you from relying on it. \code{...}
#' contains the parameters returned by \code{compute_defaults()}.
#'
#' Must return a data frame.
#' \item \code{compute_defaults(data, params)}: called once for each layer.
#' Used to compute defaults that need to complete dataset, and to inform
#' the user of important choices.
Expand All @@ -35,35 +45,19 @@ Stat <- ggproto("Stat",

required_aes = c(),

calculate = function(data, scales, ...) {
data
},

compute_defaults = function(data, params) {
params
},

calculate_groups = function(self, data, scales, ...) {
compute = function(self, data, scales, ...) {
if (empty(data)) return(data.frame())

force(data)
force(scales)

# # Alternative approach: cleaner, but much slower
# # Compute statistic for each group
# stats <- ddply(data, "group", function(group) {
# self$calculate(group, scales, ...)
# })
# stats$ORDER <- seq_len(nrow(stats))
#
# # Combine statistics with original columns
# unique <- ddply(data, .(group), uniquecols)
# stats <- merge(stats, unique, by = "group")
# stats[stats$ORDER, ]

groups <- split(data, data$group)
stats <- lapply(groups, function(group)
self$calculate(data = group, scales = scales, ...))
self$compute_group(data = group, scales = scales, ...))

stats <- mapply(function(new, old) {
if (empty(new)) return(data.frame())
Expand All @@ -76,6 +70,10 @@ Stat <- ggproto("Stat",
}, stats, groups, SIMPLIFY = FALSE)

do.call(plyr::rbind.fill, stats)
},

compute_group = function(self, data, scales, ...) {
stop("Not implemented", call. = FALSE)
}
)

Expand Down
2 changes: 1 addition & 1 deletion R/stat-bin.r
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ StatBin <- ggproto("StatBin", Stat,
params
},

calculate = function(self, data, scales, binwidth = NULL, bins = NULL,
compute_group = function(self, data, scales, binwidth = NULL, bins = NULL,
origin = NULL, breaks = NULL, width = 0.9, drop = FALSE,
right = FALSE, ...) {
range <- scale_dimension(scales$x, c(0, 0))
Expand Down
4 changes: 2 additions & 2 deletions R/stat-bin2d.r
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ StatBin2d <- ggproto("StatBin2d", Stat,
default_aes = aes(fill = ..count..),
required_aes = c("x", "y"),

calculate = function(data, scales, binwidth = NULL, bins = 30,
breaks = NULL, origin = NULL, drop = TRUE, ...) {
compute_group = function(data, scales, binwidth = NULL, bins = 30,
breaks = NULL, origin = NULL, drop = TRUE, ...) {
range <- list(
x = scale_dimension(scales$x, c(0, 0)),
y = scale_dimension(scales$y, c(0, 0))
Expand Down
16 changes: 8 additions & 8 deletions R/stat-bindot.r
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ StatBindot <- ggproto("StatBindot", Stat,
params
},

calculate_groups = function(self, data, na.rm = FALSE, binwidth = NULL,
binaxis = "x", method = "dotdensity",
binpositions = "bygroup", ...) {
compute = function(self, data, na.rm = FALSE, binwidth = NULL,
binaxis = "x", method = "dotdensity",
binpositions = "bygroup", ...) {
data <- remove_missing(data, na.rm, c(binaxis, "weight"), name = "stat_bindot",
finite = TRUE)

Expand Down Expand Up @@ -41,14 +41,14 @@ StatBindot <- ggproto("StatBindot", Stat,

}

ggproto_parent(Stat, self)$calculate_groups(data, binwidth = binwidth,
ggproto_parent(Stat, self)$compute(data, binwidth = binwidth,
binaxis = binaxis, method = method, binpositions = binpositions, ...)
},

calculate = function(self, data, scales, binwidth = NULL, binaxis = "x",
method = "dotdensity", binpositions = "bygroup",
origin = NULL, breaks = NULL, width = 0.9, drop = FALSE,
right = TRUE, ...) {
compute_group = function(self, data, scales, binwidth = NULL, binaxis = "x",
method = "dotdensity", binpositions = "bygroup",
origin = NULL, breaks = NULL, width = 0.9, drop = FALSE,
right = TRUE, ...) {

# This function taken from integer help page
is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5) {
Expand Down
3 changes: 2 additions & 1 deletion R/stat-binhex.r
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ StatBinhex <- ggproto("StatBinhex", Stat,

required_aes = c("x", "y"),

calculate = function(data, scales, binwidth = NULL, bins = 30, na.rm = FALSE, ...) {
compute_group = function(data, scales, binwidth = NULL, bins = 30,
na.rm = FALSE, ...) {
data <- remove_missing(data, na.rm, c("x", "y"), name = "stat_hexbin")

if (is.null(binwidth)) {
Expand Down
2 changes: 1 addition & 1 deletion R/stat-boxplot.r
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ StatBoxplot <- ggproto("StatBoxplot", Stat,
params
},

calculate = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5, ...) {
compute_group = function(data, scales, width = NULL, na.rm = FALSE, coef = 1.5, ...) {
qs <- c(0, 0.25, 0.5, 0.75, 1)

data <- remove_missing(data, na.rm, c("x", "y", "weight"), name = "stat_boxplot",
Expand Down
4 changes: 2 additions & 2 deletions R/stat-contour.r
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ stat_contour <- function(mapping = NULL, data = NULL, geom = "contour",
#' @usage NULL
#' @export
StatContour <- ggproto("StatContour", Stat,
calculate = function(data, scales, bins = NULL, binwidth = NULL,
breaks = NULL, complete = FALSE, na.rm = FALSE, ...)
compute_group = function(data, scales, bins = NULL, binwidth = NULL,
breaks = NULL, complete = FALSE, na.rm = FALSE, ...)
{
data <- remove_missing(data, na.rm, name = "stat_contour", finite = TRUE)

Expand Down
5 changes: 3 additions & 2 deletions R/stat-density-2d.r
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ StatDensity2d <- ggproto("StatDensity2d", Stat,

required_aes = c("x", "y"),

calculate = function(data, scales, na.rm = FALSE, h = NULL, contour = TRUE, n = 100, ...) {
compute_group = function(data, scales, na.rm = FALSE, h = NULL,
contour = TRUE, n = 100, ...) {
df <- data.frame(data[, c("x", "y")])
df <- remove_missing(df, na.rm, name = "stat_density2d", finite = TRUE)

Expand All @@ -53,7 +54,7 @@ StatDensity2d <- ggproto("StatDensity2d", Stat,
df$group <- data$group[1]

if (contour) {
StatContour$calculate(df, scales, ...)
StatContour$compute(df, scales, ...)
} else {
names(df) <- c("x", "y", "density", "group")
df$level <- 1
Expand Down
5 changes: 2 additions & 3 deletions R/stat-density.r
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@ stat_density <- function(mapping = NULL, data = NULL, geom = "area",
#' @usage NULL
#' @export
StatDensity <- ggproto("StatDensity", Stat,
calculate = function(data, scales, adjust = 1, kernel = "gaussian",
trim = FALSE, na.rm = FALSE, ...)
{
compute_group = function(data, scales, adjust = 1, kernel = "gaussian",
trim = FALSE, na.rm = FALSE, ...) {
data <- remove_missing(data, na.rm, "x", name = "stat_density",
finite = TRUE)

Expand Down
2 changes: 1 addition & 1 deletion R/stat-ecdf.r
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ stat_ecdf <- function(mapping = NULL, data = NULL, geom = "step",
#' @usage NULL
#' @export
StatEcdf <- ggproto("StatEcdf", Stat,
calculate = function(data, scales, n = NULL, ...) {
compute_group = function(data, scales, n = NULL, ...) {

# If n is NULL, use raw values; otherwise interpolate
if (is.null(n)) {
Expand Down
5 changes: 2 additions & 3 deletions R/stat-ellipse.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,8 @@ stat_ellipse <- function(mapping = NULL, data = NULL, geom = "path",
StatEllipse <- ggproto("StatEllipse", Stat,
required_aes = c("x", "y"),

calculate = function(data, scales, type = "t", level = 0.95, segments = 51,
na.rm = FALSE, ...)
{
compute_group = function(data, scales, type = "t", level = 0.95, segments = 51,
na.rm = FALSE, ...) {
data <- remove_missing(data, na.rm, vars = c("x","y"),
name = "stat_ellipse", finite = TRUE)
calculate_ellipse(data = data, vars = c("x","y"), type = type,
Expand Down
2 changes: 1 addition & 1 deletion R/stat-function.r
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ stat_function <- function(mapping = NULL, data = NULL, geom = "path",
StatFunction <- ggproto("StatFunction", Stat,
default_aes = aes(y = ..y..),

calculate = function(data, scales, fun, n = 101, args = list(), ...) {
compute_group = function(data, scales, fun, n = 101, args = list(), ...) {
range <- scale_dimension(scales$x, c(0, 0))
xseq <- seq(range[1], range[2], length.out = n)

Expand Down
2 changes: 1 addition & 1 deletion R/stat-identity.r
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ stat_identity <- function(mapping = NULL, data = NULL, geom = "point",
#' @usage NULL
#' @export
StatIdentity <- ggproto("StatIdentity", Stat,
calculate_groups = function(data, scales, ...) {
compute = function(data, scales, ...) {
data
}
)
6 changes: 3 additions & 3 deletions R/stat-qq.r
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ StatQq <- ggproto("StatQq", Stat,

required_aes = c("sample"),

calculate = function(data, scales, quantiles = NULL, distribution = stats::qnorm,
dparams = list(), na.rm = FALSE)
{
compute_group = function(data, scales, quantiles = NULL,
distribution = stats::qnorm, dparams = list(),
na.rm = FALSE) {
data <- remove_missing(data, na.rm, "sample", name = "stat_qq")

sample <- sort(data$sample)
Expand Down
7 changes: 3 additions & 4 deletions R/stat-quantile.r
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,9 @@ stat_quantile <- function(mapping = NULL, data = NULL, geom = "quantile",
StatQuantile <- ggproto("StatQuantile", Stat,
required_aes = c("x", "y"),

calculate = function(data, scales, quantiles = c(0.25, 0.5, 0.75),
formula = NULL, xseq = NULL, method = "rq", lambda = 1, na.rm = FALSE,
...)
{
compute_group = function(data, scales, quantiles = c(0.25, 0.5, 0.75),
formula = NULL, xseq = NULL, method = "rq",
lambda = 1, na.rm = FALSE, ...) {
try_require("quantreg", "stat_quantile")

if (is.null(formula)) {
Expand Down
8 changes: 4 additions & 4 deletions R/stat-smooth.r
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ StatSmooth <- ggproto("StatSmooth", Stat,
params
},

calculate = function(data, scales, method = "auto", formula = y~x,
se = TRUE, n = 80, fullrange = FALSE, xseq = NULL, level = 0.95,
method.args = list(), na.rm = FALSE, ...)
{
compute_group = function(data, scales, method = "auto", formula = y~x,
se = TRUE, n = 80, fullrange = FALSE, xseq = NULL,
level = 0.95, method.args = list(), na.rm = FALSE,
...) {
data <- remove_missing(data, na.rm, c("x", "y"), name = "stat_smooth")
if (length(unique(data$x)) < 2) {
# Not enough data to perform fit
Expand Down
2 changes: 1 addition & 1 deletion R/stat-spoke.r
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ stat_spoke <- function(mapping = NULL, data = NULL, geom = "segment",
StatSpoke <- ggproto("StatSpoke", Stat,
retransform = FALSE,

calculate = function(data, scales, radius = 1, ...) {
compute = function(data, scales, radius = 1, ...) {
transform(data,
xend = x + cos(angle) * radius,
yend = y + sin(angle) * radius
Expand Down
2 changes: 1 addition & 1 deletion R/stat-sum.r
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ StatSum <- ggproto("StatSum", Stat,

required_aes = c("x", "y"),

calculate_groups = function(data, scales, ...) {
compute = function(data, scales, ...) {

if (is.null(data$weight)) data$weight <- 1

Expand Down
6 changes: 3 additions & 3 deletions R/stat-summary-2d.r
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ StatSummary2d <- ggproto("StatSummary2d", Stat,

required_aes = c("x", "y", "z"),

calculate = function(data, scales, binwidth = NULL, bins = 30, breaks = NULL,
origin = NULL, drop = TRUE, fun = "mean",
fun.args = list(), ...) {
compute_group = function(data, scales, binwidth = NULL, bins = 30,
breaks = NULL, origin = NULL, drop = TRUE,
fun = "mean", fun.args = list(), ...) {
data <- remove_missing(data, FALSE, c("x", "y", "z"),
name = "stat_summary2d")

Expand Down
4 changes: 2 additions & 2 deletions R/stat-summary-hex.r
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ StatSummaryHex <- ggproto("StatSummaryHex", Stat,

required_aes = c("x", "y", "z"),

calculate = function(data, scales, binwidth = NULL, bins = 30, drop = TRUE,
fun = "mean", fun.args = list(), ...) {
compute_group = function(data, scales, binwidth = NULL, bins = 30, drop = TRUE,
fun = "mean", fun.args = list(), ...) {
data <- remove_missing(data, FALSE, c("x", "y", "z"),
name = "stat_summary_hex")

Expand Down
2 changes: 1 addition & 1 deletion R/stat-summary.r
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ stat_summary <- function(mapping = NULL, data = NULL, geom = "pointrange",
StatSummary <- ggproto("StatSummary", Stat,
required_aes = c("x", "y"),

calculate_groups = function(data, scales, fun.data = NULL, fun.y = NULL,
compute = function(data, scales, fun.data = NULL, fun.y = NULL,
fun.ymax = NULL, fun.ymin = NULL, fun.args = list(), na.rm = FALSE, ...) {
data <- remove_missing(data, na.rm, c("x", "y"), name = "stat_summary")

Expand Down
2 changes: 1 addition & 1 deletion R/stat-unique.r
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ stat_unique <- function(mapping = NULL, data = NULL, geom = "point",
#' @usage NULL
#' @export
StatUnique <- ggproto("StatUnique", Stat,
calculate_groups = function(data, scales, ...) unique(data)
compute = function(data, scales, ...) unique(data)
)
6 changes: 3 additions & 3 deletions R/stat-ydensity.r
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ stat_ydensity <- function(mapping = NULL, data = NULL, geom = "violin",
StatYdensity <- ggproto("StatYdensity", Stat,
required_aes = c("x", "y"),

calculate = function(data, scales, width = NULL, adjust = 1,
compute_group = function(data, scales, width = NULL, adjust = 1,
kernel = "gaussian", trim = TRUE, na.rm = FALSE, ...) {
data <- remove_missing(data, na.rm, c("x", "y", "weight"),
name = "stat_ydensity", finite = TRUE)
Expand All @@ -78,8 +78,8 @@ StatYdensity <- ggproto("StatYdensity", Stat,
dens
},

calculate_groups = function(self, data, scales, ..., scale = "area") {
data <- ggproto_parent(Stat, self)$calculate_groups(data, scales, ...)
compute = function(self, data, scales, ..., scale = "area") {
data <- ggproto_parent(Stat, self)$compute(data, scales, ...)

# choose how violins are scaled relative to each other
data$violinwidth <- switch(scale,
Expand Down
16 changes: 13 additions & 3 deletions man/ggplot2-ggproto.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,19 @@ fields. To create a new type of Stat object, you typically will want to
implement one or more of the following:

\itemize{
\item \code{calculate}: Calculates a stat for a single group of data.
\item \code{calculate_groups}: Calculates stat for all groups. The method
typically calls \code{calculate} for each group.
\item Override either \code{compute(self, data, scales, ...)} or
\code{compute(self, data, scales, ...)}. \code{compute} is
called with the complete dataset, \code{compute_group} is called a group
at-a-time. If you override \code{compute}, you're responsibly for
matching up non-transformed columns.
\code{data} is a data frame containing the variables named according
to the aesthetics that they're mapped to. \code{scales} contains a list
of scales associated with the plot. This is present mostly for historical
reasons, and I would discourage you from relying on it. \code{...}
contains the parameters returned by \code{compute_defaults()}.

Must return a data frame.
\item \code{compute_defaults(data, params)}: called once for each layer.
Used to compute defaults that need to complete dataset, and to inform
the user of important choices.
Expand Down
Loading

0 comments on commit 33e167f

Please sign in to comment.