Skip to content

Commit

Permalink
New data frame (#2994)
Browse files Browse the repository at this point in the history
Add performant data.frame constructors and use them throughout the code
  • Loading branch information
thomasp85 committed Nov 15, 2018
1 parent a330da3 commit 92d2777
Show file tree
Hide file tree
Showing 114 changed files with 418 additions and 359 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ Imports:
stats,
tibble,
viridisLite,
withr (>= 2.0.0)
withr (>= 2.0.0),
grDevices
Suggests:
covr,
dplyr,
Expand Down
40 changes: 40 additions & 0 deletions R/aaa-.r
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,43 @@ NULL
#' @keywords internal
#' @name ggplot2-ggproto
NULL

# Fast data.frame constructor and indexing
# No checking, recycling etc. unless asked for
new_data_frame <- function(x = list(), n = NULL) {
if (length(x) != 0 && is.null(names(x))) stop("Elements must be named", call. = FALSE)
lengths <- vapply(x, length, integer(1))
if (is.null(n)) {
n <- if (length(x) == 0) 0 else max(lengths)
}
for (i in seq_along(x)) {
if (lengths[i] == n) next
if (lengths[i] != 1) stop("Elements must equal the number of rows or 1", call. = FALSE)
x[[i]] <- rep(x[[i]], n)
}

class(x) <- "data.frame"

attr(x, "row.names") <- .set_row_names(n)
x
}

data_frame <- function(...) {
new_data_frame(list(...))
}

data.frame <- function(...) {
stop('Please use `data_frame()` or `new_data_frame()` instead of `data.frame()` for better performance. See the vignette "ggplot2 internal programming guidelines" for details.', call. = FALSE)
}

mat_2_df <- function(x, col_names = colnames(x), .check = FALSE) {
x <- lapply(seq_len(ncol(x)), function(i) x[, i])
if (!is.null(col_names)) names(x) <- col_names
new_data_frame(x)
}

df_col <- function(x, name) .subset2(x, name)

df_rows <- function(x, i) {
new_data_frame(lapply(x, `[`, i = i))
}
2 changes: 1 addition & 1 deletion R/annotation-custom.r
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ GeomCustomAnn <- ggproto("GeomCustomAnn", Geom,
stop("annotation_custom only works with Cartesian coordinates",
call. = FALSE)
}
corners <- data.frame(x = c(xmin, xmax), y = c(ymin, ymax))
corners <- new_data_frame(list(x = c(xmin, xmax), y = c(ymin, ymax)), n = 2)
data <- coord$transform(corners, panel_params)

x_rng <- range(data$x, na.rm = TRUE)
Expand Down
2 changes: 1 addition & 1 deletion R/annotation-logticks.r
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ calc_logticks <- function(base = 10, ticks_per_base = base - 1,
longtick_after_base <- floor(ticks_per_base/2)
tickend[ cycleIdx == longtick_after_base ] <- midend

tickdf <- data.frame(value = ticks, start = start, end = tickend)
tickdf <- new_data_frame(list(value = ticks, start = start, end = tickend), n = length(ticks))

return(tickdf)
}
2 changes: 1 addition & 1 deletion R/annotation-raster.r
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ GeomRasterAnn <- ggproto("GeomRasterAnn", Geom,
stop("annotation_raster only works with Cartesian coordinates",
call. = FALSE)
}
corners <- data.frame(x = c(xmin, xmax), y = c(ymin, ymax))
corners <- new_data_frame(list(x = c(xmin, xmax), y = c(ymin, ymax)), n = 2)
data <- coord$transform(corners, panel_params)

x_rng <- range(data$x, na.rm = TRUE)
Expand Down
2 changes: 1 addition & 1 deletion R/annotation.r
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ annotate <- function(geom, x = NULL, y = NULL, xmin = NULL, xmax = NULL,
stop("Unequal parameter lengths: ", details, call. = FALSE)
}

data <- data.frame(position)
data <- new_data_frame(position, n = max(lengths))
layer(
geom = geom,
params = list(
Expand Down
2 changes: 1 addition & 1 deletion R/axis-secondary.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ AxisSecondary <- ggproto("AxisSecondary", NULL,
},

transform_range = function(self, range) {
range <- structure(data.frame(range), names = '.')
range <- new_data_frame(list(. = range))
rlang::eval_tidy(
rlang::f_rhs(self$trans),
data = range,
Expand Down
2 changes: 1 addition & 1 deletion R/bench.r
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ benchplot <- function(x) {

times <- rbind(construct, build, render, draw)[, 1:3]

plyr::unrowname(data.frame(
plyr::unrowname(base::data.frame(
step = c("construct", "build", "render", "draw", "TOTAL"),
rbind(times, colSums(times))))
}
7 changes: 3 additions & 4 deletions R/bin.R
Original file line number Diff line number Diff line change
Expand Up @@ -157,15 +157,14 @@ bin_out <- function(count = integer(0), x = numeric(0), width = numeric(0),
xmin = x - width / 2, xmax = x + width / 2) {
density <- count / width / sum(abs(count))

data.frame(
new_data_frame(list(
count = count,
x = x,
xmin = xmin,
xmax = xmax,
width = width,
density = density,
ncount = count / max(abs(count)),
ndensity = density / max(abs(density)),
stringsAsFactors = FALSE
)
ndensity = density / max(abs(density))
), n = length(count))
}
8 changes: 4 additions & 4 deletions R/coord-map.r
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,10 @@ CoordMap <- ggproto("CoordMap", Coord,
))
}

x_intercept <- with(panel_params, data.frame(
x_intercept <- with(panel_params, new_data_frame(list(
x = x.major,
y = y.range[1]
))
), n = length(x.major)))
pos <- self$transform(x_intercept, panel_params)

axes <- list(
Expand All @@ -272,10 +272,10 @@ CoordMap <- ggproto("CoordMap", Coord,
))
}

x_intercept <- with(panel_params, data.frame(
x_intercept <- with(panel_params, new_data_frame(list(
x = x.range[1],
y = y.major
))
), n = length(y.major)))
pos <- self$transform(x_intercept, panel_params)

axes <- list(
Expand Down
6 changes: 3 additions & 3 deletions R/coord-munch.r
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ munch_data <- function(data, dist = NULL, segment_length = 0.01) {
id <- c(rep(seq_len(nrow(data) - 1), extra), nrow(data))
aes_df <- data[id, setdiff(names(data), c("x", "y")), drop = FALSE]

plyr::unrowname(data.frame(x = x, y = y, aes_df))
new_data_frame(c(list(x = x, y = y), unclass(aes_df)))
}

# Interpolate.
Expand Down Expand Up @@ -171,9 +171,9 @@ find_line_formula <- function(x, y) {
slope <- diff(y) / diff(x)
yintercept <- y[-1] - (slope * x[-1])
xintercept <- x[-1] - (y[-1] / slope)
data.frame(x1 = x[-length(x)], y1 = y[-length(y)],
new_data_frame(list(x1 = x[-length(x)], y1 = y[-length(y)],
x2 = x[-1], y2 = y[-1],
slope = slope, yintercept = yintercept, xintercept = xintercept)
slope = slope, yintercept = yintercept, xintercept = xintercept))
}

# Spiral arc length
Expand Down
16 changes: 8 additions & 8 deletions R/facet-.r
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ eval_facet <- function(facet, data, env = emptyenv()) {

layout_null <- function() {
# PANEL needs to be a factor to be consistent with other facet types
data.frame(PANEL = factor(1), ROW = 1, COL = 1, SCALE_X = 1, SCALE_Y = 1)
new_data_frame(list(PANEL = factor(1), ROW = 1, COL = 1, SCALE_X = 1, SCALE_Y = 1))
}

check_layout <- function(x) {
Expand Down Expand Up @@ -493,12 +493,12 @@ find_panel <- function(table) {
layout <- table$layout
panels <- layout[grepl("^panel", layout$name), , drop = FALSE]

data.frame(
t = min(panels$t),
r = max(panels$r),
b = max(panels$b),
l = min(panels$l)
)
new_data_frame(list(
t = min(.subset2(panels, "t")),
r = max(.subset2(panels, "r")),
b = max(.subset2(panels, "b")),
l = min(.subset2(panels, "l"))
), n = 1)
}
#' @rdname find_panel
#' @export
Expand Down Expand Up @@ -526,7 +526,7 @@ panel_rows <- function(table) {
#' @keywords internal
#' @export
combine_vars <- function(data, env = emptyenv(), vars = NULL, drop = TRUE) {
if (length(vars) == 0) return(data.frame())
if (length(vars) == 0) return(new_data_frame())

# For each layer, compute the facet values
values <- compact(plyr::llply(data, eval_facets, facets = vars, env = env))
Expand Down
7 changes: 3 additions & 4 deletions R/facet-grid-.r
Original file line number Diff line number Diff line change
Expand Up @@ -232,11 +232,10 @@ FacetGrid <- ggproto("FacetGrid", Facet,
panel <- plyr::id(base, drop = TRUE)
panel <- factor(panel, levels = seq_len(attr(panel, "n")))

rows <- if (!length(names(rows))) 1L else plyr::id(base[names(rows)], drop = TRUE)
cols <- if (!length(names(cols))) 1L else plyr::id(base[names(cols)], drop = TRUE)
rows <- if (!length(names(rows))) rep(1L, length(panel)) else plyr::id(base[names(rows)], drop = TRUE)
cols <- if (!length(names(cols))) rep(1L, length(panel)) else plyr::id(base[names(cols)], drop = TRUE)

panels <- data.frame(PANEL = panel, ROW = rows, COL = cols, base,
check.names = FALSE, stringsAsFactors = FALSE)
panels <- new_data_frame(c(list(PANEL = panel, ROW = rows, COL = cols), base))
panels <- panels[order(panels$PANEL), , drop = FALSE]
rownames(panels) <- NULL

Expand Down
4 changes: 2 additions & 2 deletions R/facet-null.r
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ FacetNull <- ggproto("FacetNull", Facet,
# Need the is.waive check for special case where no data, but aesthetics
# are mapped to vectors
if (is.waive(data))
return(tibble(PANEL = factor()))
return(new_data_frame(list(PANEL = factor())))

if (empty(data))
return(cbind(data, PANEL = factor()))
return(new_data_frame(c(data, list(PANEL = factor()))))

# Needs to be a factor to be consistent with other facet types
data$PANEL <- factor(1)
Expand Down
2 changes: 1 addition & 1 deletion R/facet-wrap.r
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ FacetWrap <- ggproto("FacetWrap", Facet,
n <- attr(id, "n")

dims <- wrap_dims(n, params$nrow, params$ncol)
layout <- data.frame(PANEL = factor(id, levels = seq_len(n)))
layout <- new_data_frame(list(PANEL = factor(id, levels = seq_len(n))))

if (params$as.table) {
layout$ROW <- as.integer((id - 1L) %/% dims[2] + 1L)
Expand Down
10 changes: 6 additions & 4 deletions R/fortify-map.r
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@
#' geom_polygon(aes(group = group), colour = "white")
#' }
fortify.map <- function(model, data, ...) {
df <- as.data.frame(model[c("x", "y")])
names(df) <- c("long", "lat")
df$group <- cumsum(is.na(df$long) & is.na(df$lat)) + 1
df$order <- 1:nrow(df)
df <- new_data_frame(list(
long = model$x,
lat = model$y,
group = cumsum(is.na(model$x) & is.na(model$y)) + 1,
order = seq_along(model$x)
), n = length(model$x))

names <- do.call("rbind", lapply(strsplit(model$names, "[:,]"), "[", 1:2))
df$region <- names[df$group, 1]
Expand Down
8 changes: 4 additions & 4 deletions R/fortify-multcomp.r
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ NULL
#' @rdname fortify-multcomp
#' @export
fortify.glht <- function(model, data, ...) {
plyr::unrowname(data.frame(
plyr::unrowname(base::data.frame(
lhs = rownames(model$linfct),
rhs = model$rhs,
estimate = stats::coef(model),
Expand All @@ -48,7 +48,7 @@ fortify.confint.glht <- function(model, data, ...) {
coef <- model$confint
colnames(coef) <- tolower(colnames(coef))

plyr::unrowname(data.frame(
plyr::unrowname(base::data.frame(
lhs = rownames(coef),
rhs = model$rhs,
coef,
Expand All @@ -64,7 +64,7 @@ fortify.summary.glht <- function(model, data, ...) {
model$test[c("coefficients", "sigma", "tstat", "pvalues")])
names(coef) <- c("estimate", "se", "t", "p")

plyr::unrowname(data.frame(
plyr::unrowname(base::data.frame(
lhs = rownames(coef),
rhs = model$rhs,
coef,
Expand All @@ -77,7 +77,7 @@ fortify.summary.glht <- function(model, data, ...) {
#' @rdname fortify-multcomp
#' @export
fortify.cld <- function(model, data, ...) {
plyr::unrowname(data.frame(
plyr::unrowname(base::data.frame(
lhs = names(model$mcletters$Letters),
letters = model$mcletters$Letters,
check.names = FALSE,
Expand Down
6 changes: 5 additions & 1 deletion R/geom-abline.r
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,12 @@ geom_abline <- function(mapping = NULL, data = NULL,
if (!missing(slope) || !missing(intercept)) {
if (missing(slope)) slope <- 1
if (missing(intercept)) intercept <- 0
n_slopes <- max(length(slope), length(intercept))

data <- data.frame(intercept = intercept, slope = slope)
data <- new_data_frame(list(
intercept = intercept,
slope = slope
), n = n_slopes)
mapping <- aes(intercept = intercept, slope = slope)
show.legend <- FALSE
}
Expand Down
58 changes: 29 additions & 29 deletions R/geom-boxplot.r
Original file line number Diff line number Diff line change
Expand Up @@ -200,41 +200,42 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom,
outlier.alpha = NULL,
notch = FALSE, notchwidth = 0.5, varwidth = FALSE) {

common <- data.frame(
common <- list(
colour = data$colour,
size = data$size,
linetype = data$linetype,
fill = alpha(data$fill, data$alpha),
group = data$group,
stringsAsFactors = FALSE
group = data$group
)

whiskers <- data.frame(
x = data$x,
xend = data$x,
y = c(data$upper, data$lower),
yend = c(data$ymax, data$ymin),
alpha = NA,
common,
stringsAsFactors = FALSE
)
whiskers <- new_data_frame(c(
list(
x = c(data$x, data$x),
xend = c(data$x, data$x),
y = c(data$upper, data$lower),
yend = c(data$ymax, data$ymin),
alpha = c(NA_real_, NA_real_)
),
common
), n = 2)

box <- data.frame(
xmin = data$xmin,
xmax = data$xmax,
ymin = data$lower,
y = data$middle,
ymax = data$upper,
ynotchlower = ifelse(notch, data$notchlower, NA),
ynotchupper = ifelse(notch, data$notchupper, NA),
notchwidth = notchwidth,
alpha = data$alpha,
common,
stringsAsFactors = FALSE
)
box <- new_data_frame(c(
list(
xmin = data$xmin,
xmax = data$xmax,
ymin = data$lower,
y = data$middle,
ymax = data$upper,
ynotchlower = ifelse(notch, data$notchlower, NA),
ynotchupper = ifelse(notch, data$notchupper, NA),
notchwidth = notchwidth,
alpha = data$alpha
),
common
))

if (!is.null(data$outliers) && length(data$outliers[[1]] >= 1)) {
outliers <- data.frame(
outliers <- new_data_frame(list(
y = data$outliers[[1]],
x = data$x[1],
colour = outlier.colour %||% data$colour[1],
Expand All @@ -243,9 +244,8 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom,
size = outlier.size %||% data$size[1],
stroke = outlier.stroke %||% data$stroke[1],
fill = NA,
alpha = outlier.alpha %||% data$alpha[1],
stringsAsFactors = FALSE
)
alpha = outlier.alpha %||% data$alpha[1]
), n = length(data$outliers[[1]]))
outliers_grob <- GeomPoint$draw_panel(outliers, panel_params, coord)
} else {
outliers_grob <- NULL
Expand Down
Loading

0 comments on commit 92d2777

Please sign in to comment.