Skip to content

Commit

Permalink
distinct() errors on missing columns
Browse files Browse the repository at this point in the history
Fixes #4656
  • Loading branch information
hadley authored and romainfrancois committed Jan 6, 2020
1 parent 0e42df8 commit eee6685
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 77 deletions.
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
`eval_tbls2()` are now deprecated. That were only used in a handful of
packages, and we now believe that you're better off performing comparisons
more directly (#4675).
* `distinct()` errors if you request it use variables that don't exist
(this was previously a warning) (#4656).

* `tally()` and `count()` now error if the default output `name` (n), already
exists in the data frame. You'll now need to specify it yourself; this
Expand Down
27 changes: 7 additions & 20 deletions R/distinct.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,33 +78,20 @@ distinct_prepare <- function(.data, vars, group_vars = character(), .keep_all =
# Once we've done the mutate, we no longer need lazy objects, and
# can instead just use their names
missing_vars <- setdiff(distinct_vars, names(.data))

if (length(missing_vars) > 0) {
missing_items <- fmt_items(fmt_obj(missing_vars))
distinct_vars <- distinct_vars[distinct_vars %in% names(.data)]
if (length(distinct_vars) > 0) {
true_vars <- glue("The following variables will be used:
{fmt_items(distinct_vars)}")
} else {
true_vars <- "The operation will return the input unchanged."
}
msg <- glue("Trying to compute distinct() for variables not found in the data:
{missing_items}
This is an error, but only a warning is raised for compatibility reasons.
{true_vars}
")
warn(msg)
abort(c(
"distinct() must use existing variables",
glue("`{missing_vars}` not found in `.data`")
))
}

new_vars <- unique(c(distinct_vars, group_vars))

# Keep the order of the variables
out_vars <- intersect(names(.data), new_vars)
# Always include grouping variables preserving input order
out_vars <- intersect(names(.data), c(distinct_vars, group_vars))

if (.keep_all) {
keep <- seq_along(.data)
} else {
keep <- unique(out_vars)
keep <- out_vars
}

list(data = .data, vars = out_vars, keep = keep)
Expand Down
13 changes: 13 additions & 0 deletions tests/testthat/test-distinct-error.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
> df %>% distinct(aa, x)
Error: distinct() must use existing variables
* `aa` not found in `.data`

> df %>% distinct(aa, bb)
Error: distinct() must use existing variables
* `aa` not found in `.data`
* `bb` not found in `.data`

> df %>% distinct(.data$aa)
Error: distinct() must use existing variables
* `aa` not found in `.data`

62 changes: 5 additions & 57 deletions tests/testthat/test-distinct.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,63 +70,11 @@ test_that("empty grouped distinct equivalent to empty ungrouped", {
test_that("distinct gives a warning when selecting an unknown column (#3140)", {
df <- tibble(g = c(1, 2), x = c(1, 2))

expect_warning(
distinct(df, aa),
glue("Trying to compute distinct() for variables not found in the data:
- `aa`
This is an error, but only a warning is raised for compatibility reasons.
The operation will return the input unchanged."),
fixed = TRUE
)
expect_warning(
distinct(df, .data$aa),
glue("Trying to compute distinct() for variables not found in the data:
- `aa`
This is an error, but only a warning is raised for compatibility reasons.
The operation will return the input unchanged."),
fixed = TRUE
)


expect_warning(
distinct(df, aa, x),
glue("Trying to compute distinct() for variables not found in the data:
- `aa`
This is an error, but only a warning is raised for compatibility reasons.
The following variables will be used:
- x"),
fixed = TRUE
)
expect_warning(
distinct(df, .data$aa, x),
glue("Trying to compute distinct() for variables not found in the data:
- `aa`
This is an error, but only a warning is raised for compatibility reasons.
The following variables will be used:
- x"),
fixed = TRUE
)

expect_warning(
distinct(df, g, aa, x),
glue("Trying to compute distinct() for variables not found in the data:
- `aa`
This is an error, but only a warning is raised for compatibility reasons.
The following variables will be used:
- g
- x"),
fixed = TRUE
)
expect_warning(
distinct(df, g, .data$aa, x),
glue("Trying to compute distinct() for variables not found in the data:
- `aa`
This is an error, but only a warning is raised for compatibility reasons.
The following variables will be used:
- g
- x"),
fixed = TRUE
)
verify_output(test_path("test-distinct-error.txt"), {
df %>% distinct(aa, x)
df %>% distinct(aa, bb)
df %>% distinct(.data$aa)
})
})

test_that("distinct on a new, mutated variable is equivalent to mutate followed by distinct", {
Expand Down

0 comments on commit eee6685

Please sign in to comment.