Skip to content

Commit

Permalink
ARROW-11700: [R] Internationalize error handling in tidy eval
Browse files Browse the repository at this point in the history
Also fixes ARROW-11785 along the way.

Closes apache#9717 from nealrichardson/i18n

Authored-by: Neal Richardson <neal.p.richardson@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
  • Loading branch information
nealrichardson authored and michalursa committed Jun 13, 2021
1 parent 0642f70 commit 3a88f7f
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 21 deletions.
36 changes: 25 additions & 11 deletions r/R/dplyr.R
Expand Up @@ -268,17 +268,29 @@ arrow_eval <- function (expr, mask) {
# else, for things not supported by Arrow return a "try-error",
# which we'll handle differently
msg <- conditionMessage(e)
# TODO(ARROW-11700): internationalization
if (grepl("object '.*'.not.found", msg)) {
stop(e)
patterns <- dplyr_functions$i18ized_error_pattern
if (is.null(patterns)) {
patterns <- i18ize_error_messages()
# Memoize it
dplyr_functions$i18ized_error_pattern <- patterns
}
if (grepl('could not find function ".*"', msg)) {
if (grepl(patterns, msg)) {
stop(e)
}
invisible(structure(msg, class = "try-error", condition = e))
})
}

i18ize_error_messages <- function() {
# Figure out what the error messages will be with this LANGUAGE
# so that we can look for them
out <- list(
obj = tryCatch(X_____X, error = function(e) conditionMessage(e)),
fun = tryCatch(X_____X(), error = function(e) conditionMessage(e))
)
paste(map(out, ~sub("X_____X", ".*", .)), collapse = "|")
}

# Helper to assemble the functions that go in the NSE data mask
# The only difference between the Dataset and the Table/RecordBatch versions
# is that they use a different wrapping function (FUN) to hold the unevaluated
Expand Down Expand Up @@ -351,13 +363,15 @@ arrow_mask <- function(.data) {
}

set_filters <- function(.data, expressions) {
# expressions is a list of Expressions. AND them together and set them on .data
new_filter <- Reduce("&", expressions)
if (isTRUE(.data$filtered_rows)) {
# TRUE is default (i.e. no filter yet), so we don't need to & with it
.data$filtered_rows <- new_filter
} else {
.data$filtered_rows <- .data$filtered_rows & new_filter
if (length(expressions)) {
# expressions is a list of Expressions. AND them together and set them on .data
new_filter <- Reduce("&", expressions)
if (isTRUE(.data$filtered_rows)) {
# TRUE is default (i.e. no filter yet), so we don't need to & with it
.data$filtered_rows <- new_filter
} else {
.data$filtered_rows <- .data$filtered_rows & new_filter
}
}
.data
}
Expand Down
20 changes: 20 additions & 0 deletions r/tests/testthat/helper-arrow.R
Expand Up @@ -30,6 +30,26 @@ MAX_INT <- 2147483647L

# Make sure this is unset
Sys.setenv(ARROW_PRE_0_15_IPC_FORMAT = "")
# Set English language so that error messages aren't internationalized
# (R CMD check does this, but in case you're running outside of check)
Sys.setenv(LANGUAGE = "en")

with_language <- function(lang, expr) {
old <- Sys.getenv("LANGUAGE")
# Check what this message is before changing languages; this will
# trigger caching the transations if the OS does that (some do).
# If the OS does cache, then we can't test changing languages safely.
before <- i18ize_error_messages()
Sys.setenv(LANGUAGE = lang)
on.exit({
Sys.setenv(LANGUAGE = old)
dplyr_functions$i18ized_error_pattern <<- NULL
})
if (!identical(before, i18ize_error_messages())) {
skip(paste("This OS either does not support changing languages to", lang, "or it caches translations"))
}
force(expr)
}

test_that <- function(what, code) {
testthat::test_that(what, {
Expand Down
27 changes: 22 additions & 5 deletions r/tests/testthat/test-dplyr-filter.R
Expand Up @@ -302,11 +302,28 @@ test_that("filter environment scope", {
})

test_that("Filtering on a column that doesn't exist errors correctly", {
skip("Error handling in arrow_eval() needs to be internationalized (ARROW-11700)")
expect_error(
batch %>% filter(not_a_col == 42) %>% collect(),
"object 'not_a_col' not found"
)
with_language("fr", {
# expect_warning(., NA) because the usual behavior when it hits a filter
# that it can't evaluate is to raise a warning, collect() to R, and retry
# the filter. But we want this to error the first time because it's
# a user error, not solvable by retrying in R
expect_warning(
expect_error(
tbl %>% record_batch() %>% filter(not_a_col == 42) %>% collect(),
"objet 'not_a_col' introuvable"
),
NA
)
})
with_language("en", {
expect_warning(
expect_error(
tbl %>% record_batch() %>% filter(not_a_col == 42) %>% collect(),
"object 'not_a_col' not found"
),
NA
)
})
})

test_that("Filtering with a function that doesn't have an Array/expr method still works", {
Expand Down
18 changes: 13 additions & 5 deletions r/tests/testthat/test-dplyr-mutate.R
Expand Up @@ -301,11 +301,19 @@ test_that("handle bad expressions", {
# TODO: search for functions other than mean() (see above test)
# that need to be forced to fail because they error ambiguously

skip("Error handling in arrow_eval() needs to be internationalized (ARROW-11700)")
expect_error(
Table$create(tbl) %>% mutate(newvar = NOTAVAR + 2),
"object 'NOTAVAR' not found"
)
with_language("fr", {
# expect_warning(., NA) because the usual behavior when it hits a filter
# that it can't evaluate is to raise a warning, collect() to R, and retry
# the filter. But we want this to error the first time because it's
# a user error, not solvable by retrying in R
expect_warning(
expect_error(
Table$create(tbl) %>% mutate(newvar = NOTAVAR + 2),
"objet 'NOTAVAR' introuvable"
),
NA
)
})
})

test_that("print a mutated table", {
Expand Down

0 comments on commit 3a88f7f

Please sign in to comment.