Skip to content

Commit

Permalink
Elucidate & implement consistent principles for discrete NA
Browse files Browse the repository at this point in the history
Fixes #1584
  • Loading branch information
hadley committed Oct 6, 2016
1 parent 6619fa6 commit db2cbba
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 18 deletions.
28 changes: 28 additions & 0 deletions NEWS.md
@@ -1,5 +1,33 @@
# ggplot2 2.1.0.9000

* Overhaul of how discrete missing values are treated (#1584). The underlying
principle is that we can naturally represent missing values on discrete
variables (by treating just like another level), so by default
we should.

This principle applies to:

* character vectors
* factors with implicit NA
* factors with explicit NA

And to all scales (both position and non-position.)

Compared to the previous version of ggplot2, that means there are 3 main
changes:

* `scale_x_discrete()` and `scale_y_discrete()` always show discrete NA,
regardless of their source

* If present, `NA`s are shown in discete legends.

* All discrete scales gain a `na.translate` argument that allows you to
control whether `NA`s are translated to something that can be visualised
or left as missing. Note that if you leave as is (i.e.
`na.translate = FALSE)` they will passed on to the layer, which
will create warnings about dropping missing values. To suppress those,
you'll also need to add `na.rm = TRUE` to the layer call.

* Added scales `scale_x_time()` and `scale_y_time()` which are applied
automatically when you plot objects of type hms (#1752).

Expand Down
4 changes: 2 additions & 2 deletions R/geom-point.r
Expand Up @@ -28,7 +28,7 @@
#'
#' @seealso \code{\link{scale_size}} to see scale area of points, instead of
#' radius, \code{\link{geom_jitter}} to jitter points to reduce (mild)
#' overplotting, \code{\link{geom_count}} to count the number of points
#' overplotting, \code{\link{geom_count}} to count the number of points
#' at unique locations on a scatterplot and map the size of the point to
#' the count.
#' @inheritParams layer
Expand Down Expand Up @@ -119,7 +119,7 @@ geom_point <- function(mapping = NULL, data = NULL,
#' @export
GeomPoint <- ggproto("GeomPoint", Geom,
required_aes = c("x", "y"),
non_missing_aes = c("size", "shape"),
non_missing_aes = c("size", "shape", "colour"),
default_aes = aes(
shape = 19, colour = "black", size = 1.5, fill = NA,
alpha = NA, stroke = 0.5
Expand Down
11 changes: 2 additions & 9 deletions R/guide-legend.r
Expand Up @@ -216,16 +216,9 @@ guide_train.legend <- function(guide, scale) {
stringsAsFactors = FALSE)
key$.label <- scale$get_labels(breaks)

# this is a quick fix for #118
# some scales have NA as na.value (e.g., size)
# some scales have non NA as na.value (e.g., "grey50" for colour)
# drop rows if data (instead of the mapped value) is NA
#
# Also, drop out-of-range values for continuous scale
# Drop out-of-range values for continuous scale
# (should use scale$oob?)
if (scale$is_discrete()) {
key <- key[!is.na(breaks), , drop = FALSE]
} else {
if (!scale$is_discrete()) {
limits <- scale$get_limits()
noob <- !is.na(breaks) & limits[1] <= breaks & breaks <= limits[2]
key <- key[noob, , drop = FALSE]
Expand Down
4 changes: 2 additions & 2 deletions R/range.r
Expand Up @@ -13,8 +13,8 @@ Range <- ggproto("Range", NULL,
)

RangeDiscrete <- ggproto("RangeDiscrete", Range,
train = function(self, x, drop = FALSE) {
self$range <- scales::train_discrete(x, self$range, drop)
train = function(self, x, drop = FALSE, na.rm = FALSE) {
self$range <- scales::train_discrete(x, self$range, drop = drop, na.rm = na.rm)
}
)

Expand Down
19 changes: 15 additions & 4 deletions R/scale-.r
Expand Up @@ -368,7 +368,7 @@ ScaleDiscrete <- ggproto("ScaleDiscrete", Scale,

train = function(self, x) {
if (length(x) == 0) return()
self$range$train(x, drop = self$drop)
self$range$train(x, drop = self$drop, na.rm = !self$na.translate)
},

transform = function(x) {
Expand All @@ -393,7 +393,11 @@ ScaleDiscrete <- ggproto("ScaleDiscrete", Scale,
pal_match <- unname(pal_match)
}

ifelse(is.na(x) | is.na(pal_match), self$na.value, pal_match)
if (self$na.translate) {
ifelse(is.na(x) | is.na(pal_match), self$na.value, pal_match)
} else {
pal_match
}
},

dimension = function(self, expand = c(0, 0)) {
Expand Down Expand Up @@ -631,7 +635,12 @@ continuous_scale <- function(aesthetics, scale_name, palette, name = waiver(),
#' additive constant used to expand the range of the scales so that there
#' is a small gap between the data and the axes. The defaults are (0,0.6)
#' for discrete scales and (0.05,0) for continuous scales.
#' @param na.value how should missing values be displayed?
#' @param na.translate Unlike continuous scales, discrete scales can easily show
#' missing values, and do so by default. If you want to remove missing values
#' from a discrete scale, specify \code{na.translate = FALSE}.
#' @param na.value If \code{na.translate = TRUE}, what value aesthetic
#' value should missing be displayed as? Does not apply to position scales
#' where \code{NA} is always placed at the far right.
#' @param guide the name of, or actual function, used to create the
#' guide. See \code{\link{guides}} for more info.
#' @param position The position of the axis. "left" or "right" for vertical
Expand All @@ -640,7 +649,8 @@ continuous_scale <- function(aesthetics, scale_name, palette, name = waiver(),
#' @keywords internal
discrete_scale <- function(aesthetics, scale_name, palette, name = waiver(),
breaks = waiver(), labels = waiver(), limits = NULL, expand = waiver(),
na.value = NA, drop = TRUE, guide = "legend", position = "left", super = ScaleDiscrete) {
na.translate = TRUE, na.value = NA, drop = TRUE,
guide = "legend", position = "left", super = ScaleDiscrete) {

check_breaks_labels(breaks, labels)

Expand All @@ -660,6 +670,7 @@ discrete_scale <- function(aesthetics, scale_name, palette, name = waiver(),
range = discrete_range(),
limits = limits,
na.value = na.value,
na.translate = na.translate,
expand = expand,

name = name,
Expand Down
2 changes: 1 addition & 1 deletion R/scale-discrete-.r
Expand Up @@ -77,7 +77,7 @@ scale_y_discrete <- function(..., expand = waiver(), position = "left") {
ScaleDiscretePosition <- ggproto("ScaleDiscretePosition", ScaleDiscrete,
train = function(self, x) {
if (is.discrete(x)) {
self$range$train(x, drop = self$drop)
self$range$train(x, drop = self$drop, na.rm = !self$na.translate)
} else {
self$range_c$train(x)
}
Expand Down
46 changes: 46 additions & 0 deletions tests/testthat/test-scale-discrete.R
@@ -1,5 +1,51 @@
context("scale_discrete")

# Missing values ----------------------------------------------------------

df <- tibble::tibble(
x1 = c("a", "b", NA),
x2 = factor(x1),
x3 = addNA(x2),

y = 1:3
)

test_that("NAs translated/preserved for position scales", {
p1a <- ggplot(df, aes(x1, y)) + geom_point()
p2a <- ggplot(df, aes(x2, y)) + geom_point()
p3a <- ggplot(df, aes(x3, y)) + geom_point()

expect_equal(layer_data(p1a)$x, c(1, 2, 3))
expect_equal(layer_data(p2a)$x, c(1, 2, 3))
expect_equal(layer_data(p3a)$x, c(1, 2, 3))

rm_na_x <- scale_x_discrete(na.translate = FALSE)
p1b <- p1a + rm_na_x
p2b <- p2a + rm_na_x
p3b <- p3a + rm_na_x

expect_equal(layer_data(p1b)$x, c(1, 2, NA))
expect_equal(layer_data(p2b)$x, c(1, 2, NA))
expect_equal(layer_data(p3b)$x, c(1, 2, NA))
})

test_that("NAs translated/preserved for non-position scales", {
p1a <- ggplot(df, aes(y, y, colour = x1)) + geom_point()
p2a <- ggplot(df, aes(y, y, colour = x2)) + geom_point()
p3a <- ggplot(df, aes(y, y, colour = x3)) + geom_point()
expect_equal(layer_data(p1a)$colour, c("#F8766D", "#00BFC4", "grey50"))
expect_equal(layer_data(p2a)$colour, c("#F8766D", "#00BFC4", "grey50"))
expect_equal(layer_data(p3a)$colour, c("#F8766D", "#00BFC4", "grey50"))

rm_na_colour <- scale_colour_discrete(na.translate = FALSE)
p1b <- p1a + rm_na_colour
p2b <- p2a + rm_na_colour
p3b <- p3a + rm_na_colour
expect_equal(layer_data(p1b)$colour, c("#F8766D", "#00BFC4", NA))
expect_equal(layer_data(p2b)$colour, c("#F8766D", "#00BFC4", NA))
expect_equal(layer_data(p3b)$colour, c("#F8766D", "#00BFC4", NA))
})

# Ranges ------------------------------------------------------------------

test_that("discrete ranges also encompas continuous values", {
Expand Down

0 comments on commit db2cbba

Please sign in to comment.