Permalink
Cannot retrieve contributors at this time
#' Points | |
#' | |
#' The point geom is used to create scatterplots. The scatterplot is most | |
#' useful for displaying the relationship between two continuous variables. | |
#' It can be used to compare one continuous and one categorical variable, or | |
#' two categorical variables, but a variation like [geom_jitter()], | |
#' [geom_count()], or [geom_bin2d()] is usually more | |
#' appropriate. A _bubblechart_ is a scatterplot with a third variable | |
#' mapped to the size of points. | |
#' | |
#' @section Overplotting: | |
#' The biggest potential problem with a scatterplot is overplotting: whenever | |
#' you have more than a few points, points may be plotted on top of one | |
#' another. This can severely distort the visual appearance of the plot. | |
#' There is no one solution to this problem, but there are some techniques | |
#' that can help. You can add additional information with | |
#' [geom_smooth()], [geom_quantile()] or | |
#' [geom_density_2d()]. If you have few unique `x` values, | |
#' [geom_boxplot()] may also be useful. | |
#' | |
#' Alternatively, you can | |
#' summarise the number of points at each location and display that in some | |
#' way, using [geom_count()], [geom_hex()], or | |
#' [geom_density2d()]. | |
#' | |
#' Another technique is to make the points transparent (e.g. | |
#' `geom_point(alpha = 0.05)`) or very small (e.g. | |
#' `geom_point(shape = ".")`). | |
#' | |
#' @eval rd_aesthetics("geom", "point") | |
#' @inheritParams layer | |
#' @param na.rm If `FALSE`, the default, missing values are removed with | |
#' a warning. If `TRUE`, missing values are silently removed. | |
#' @param ... Other arguments passed on to [layer()]. These are | |
#' often aesthetics, used to set an aesthetic to a fixed value, like | |
#' `colour = "red"` or `size = 3`. They may also be parameters | |
#' to the paired geom/stat. | |
#' @export | |
#' @examples | |
#' p <- ggplot(mtcars, aes(wt, mpg)) | |
#' p + geom_point() | |
#' | |
#' # Add aesthetic mappings | |
#' p + geom_point(aes(colour = factor(cyl))) | |
#' p + geom_point(aes(shape = factor(cyl))) | |
#' # A "bubblechart": | |
#' p + geom_point(aes(size = qsec)) | |
#' | |
#' # Set aesthetics to fixed value | |
#' ggplot(mtcars, aes(wt, mpg)) + geom_point(colour = "red", size = 3) | |
#' | |
#' \donttest{ | |
#' # Varying alpha is useful for large datasets | |
#' d <- ggplot(diamonds, aes(carat, price)) | |
#' d + geom_point(alpha = 1/10) | |
#' d + geom_point(alpha = 1/20) | |
#' d + geom_point(alpha = 1/100) | |
#' } | |
#' | |
#' # For shapes that have a border (like 21), you can colour the inside and | |
#' # outside separately. Use the stroke aesthetic to modify the width of the | |
#' # border | |
#' ggplot(mtcars, aes(wt, mpg)) + | |
#' geom_point(shape = 21, colour = "black", fill = "white", size = 5, stroke = 5) | |
#' | |
#' \donttest{ | |
#' # You can create interesting shapes by layering multiple points of | |
#' # different sizes | |
#' p <- ggplot(mtcars, aes(mpg, wt, shape = factor(cyl))) | |
#' p + | |
#' geom_point(aes(colour = factor(cyl)), size = 4) + | |
#' geom_point(colour = "grey90", size = 1.5) | |
#' p + | |
#' geom_point(colour = "black", size = 4.5) + | |
#' geom_point(colour = "pink", size = 4) + | |
#' geom_point(aes(shape = factor(cyl))) | |
#' | |
#' # geom_point warns when missing values have been dropped from the data set | |
#' # and not plotted, you can turn this off by setting na.rm = TRUE | |
#' mtcars2 <- transform(mtcars, mpg = ifelse(runif(32) < 0.2, NA, mpg)) | |
#' ggplot(mtcars2, aes(wt, mpg)) + | |
#' geom_point() | |
#' ggplot(mtcars2, aes(wt, mpg)) + | |
#' geom_point(na.rm = TRUE) | |
#' } | |
geom_point <- function(mapping = NULL, data = NULL, | |
stat = "identity", position = "identity", | |
..., | |
na.rm = FALSE, | |
show.legend = NA, | |
inherit.aes = TRUE) { | |
layer( | |
data = data, | |
mapping = mapping, | |
stat = stat, | |
geom = GeomPoint, | |
position = position, | |
show.legend = show.legend, | |
inherit.aes = inherit.aes, | |
params = list( | |
na.rm = na.rm, | |
... | |
) | |
) | |
} | |
#' @rdname ggplot2-ggproto | |
#' @format NULL | |
#' @usage NULL | |
#' @export | |
GeomPoint <- ggproto("GeomPoint", Geom, | |
required_aes = c("x", "y"), | |
non_missing_aes = c("size", "shape", "colour"), | |
default_aes = aes( | |
shape = 19, colour = "black", size = 1.5, fill = NA, | |
alpha = NA, stroke = 0.5 | |
), | |
draw_panel = function(data, panel_params, coord, na.rm = FALSE) { | |
if (is.character(data$shape)) { | |
data$shape <- translate_shape_string(data$shape) | |
} | |
coords <- coord$transform(data, panel_params) | |
ggname("geom_point", | |
pointsGrob( | |
coords$x, coords$y, | |
pch = coords$shape, | |
gp = gpar( | |
col = alpha(coords$colour, coords$alpha), | |
fill = alpha(coords$fill, coords$alpha), | |
# Stroke is added around the outside of the point | |
fontsize = coords$size * .pt + coords$stroke * .stroke / 2, | |
lwd = coords$stroke * .stroke / 2 | |
) | |
) | |
) | |
}, | |
draw_key = draw_key_point | |
) | |
translate_shape_string <- function(shape_string) { | |
# strings of length 0 or 1 are interpreted as symbols by grid | |
if (nchar(shape_string[1]) <= 1) { | |
return(shape_string) | |
} | |
pch_table <- c( | |
"square open" = 0, | |
"circle open" = 1, | |
"triangle open" = 2, | |
"plus" = 3, | |
"cross" = 4, | |
"diamond open" = 5, | |
"triangle down open" = 6, | |
"square cross" = 7, | |
"asterisk" = 8, | |
"diamond plus" = 9, | |
"circle plus" = 10, | |
"star" = 11, | |
"square plus" = 12, | |
"circle cross" = 13, | |
"square triangle" = 14, | |
"triangle square" = 14, | |
"square" = 15, | |
"circle small" = 16, | |
"triangle" = 17, | |
"diamond" = 18, | |
"circle" = 19, | |
"bullet" = 20, | |
"circle filled" = 21, | |
"square filled" = 22, | |
"diamond filled" = 23, | |
"triangle filled" = 24, | |
"triangle down filled" = 25 | |
) | |
shape_match <- charmatch(shape_string, names(pch_table)) | |
invalid_strings <- is.na(shape_match) | |
nonunique_strings <- shape_match == 0 | |
if (any(invalid_strings)) { | |
bad_string <- unique(shape_string[invalid_strings]) | |
n_bad <- length(bad_string) | |
collapsed_names <- sprintf("\n* '%s'", bad_string[1:min(5, n_bad)]) | |
more_problems <- if (n_bad > 5) { | |
sprintf("\n* ... and %d more problem%s", n_bad - 5, ifelse(n_bad > 6, "s", "")) | |
} else { | |
"" | |
} | |
abort(glue("Can't find shape name:", collapsed_names, more_problems)) | |
} | |
if (any(nonunique_strings)) { | |
bad_string <- unique(shape_string[nonunique_strings]) | |
n_bad <- length(bad_string) | |
n_matches <- vapply( | |
bad_string[1:min(5, n_bad)], | |
function(shape_string) sum(grepl(paste0("^", shape_string), names(pch_table))), | |
integer(1) | |
) | |
collapsed_names <- sprintf( | |
"\n* '%s' partially matches %d shape names", | |
bad_string[1:min(5, n_bad)], n_matches | |
) | |
more_problems <- if (n_bad > 5) { | |
sprintf("\n* ... and %d more problem%s", n_bad - 5, ifelse(n_bad > 6, "s", "")) | |
} else { | |
"" | |
} | |
abort(glue("Shape names must be unambiguous:", collapsed_names, more_problems)) | |
} | |
unname(pch_table[shape_match]) | |
} |