Skip to content

Commit

Permalink
Merge pull request #499 from yjunechoe/null-column-fails
Browse files Browse the repository at this point in the history
Bad column selections fail gracefully at interrogation
  • Loading branch information
yjunechoe committed Nov 2, 2023
2 parents a9e269f + 9aa03dd commit 5905874
Show file tree
Hide file tree
Showing 13 changed files with 338 additions and 664 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@

These dynamic values may be useful for validations that get expanded into multiple steps.

## Minor improvements and bug fixes

* When no columns are returned from a `{tidyselect}` expression in `columns`, the agent's report now displays the originally supplied *expression* instead of simply blank (e.g., in `create_agent(small_table) %>% col_vals_null(matches("z"))`).

# pointblank 0.11.4

* Fixes issue with gt `0.9.0` compatibility.
Expand Down
20 changes: 6 additions & 14 deletions R/col_exists.R
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ NULL
#' @export
col_exists <- function(
x,
columns,
columns = NULL,
actions = NULL,
step_id = NULL,
label = NULL,
Expand All @@ -242,13 +242,6 @@ col_exists <- function(
columns <- rlang::enquo(columns)
# Get `columns` as a label
columns_expr <- as_columns_expr(columns)
# Require columns to be specified
if (rlang::quo_is_missing(columns)) {
stop('argument "columns" is missing, with no default')
}
if (rlang::quo_is_null(columns)) {
columns <- rlang::quo(tidyselect::everything())
}

# Resolve the columns based on the expression
## Only for `col_exists()`: error gracefully if column not found
Expand All @@ -257,15 +250,14 @@ col_exists <- function(
allow_empty = FALSE),
error = function(cnd) cnd$i %||% cnd
)
## Missing column selection
if (rlang::is_error(columns)) {
cnd <- columns
# tidyselect 0-column selection should contextualize attempted column
if (is.null(cnd$parent)) {
columns <- columns_expr
} else {
# Evaluation errors should be chained and rethrown
rlang::abort("Evaluation error in `columns`", parent = cnd$parent)
if (inherits(cnd, "resolve_eval_err")) {
# Evaluation errors should be rethrown
rlang::cnd_signal(cnd)
}
columns <- NA_character_
}

if (is_a_table_object(x)) {
Expand Down
44 changes: 34 additions & 10 deletions R/get_agent_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -751,16 +751,40 @@ get_agent_report <- function(
}
}

if (
is.null(column_i) |
(is.list(column_i) && is.na(unlist(column_i)))
) {

NA_character_

} else if (is.na(column_i)) {

NA_character_
# If column missing
if (is.null(column_i) || identical(unlist(column_i), NA_character_)) {

columns_expr <- validation_set$columns_expr[[x]]
not_interrogated <- is.na(validation_set$eval_error[[x]])
eval_error <- isTRUE(validation_set$eval_error[[x]])

# If column selection attempted AND:
# - in validation planning, OR
# - the evaluation errors, OR
# - is a col_exists() step
show_column_expr <- columns_expr != "NULL" &&
(not_interrogated || eval_error || assertion_str == "col_exists")
# Then display the original column selection expression for debugging
if (show_column_expr) {
as.character(
htmltools::tags$p(
title = columns_expr,
style = htmltools::css(
`margin-top` = "0",
`margin-bottom` = "0",
`font-family` = "monospace",
`white-space` = "nowrap",
`text-overflow` = "ellipsis",
overflow = "hidden",
color = if (eval_error) "firebrick",
`font-face` = "maroon"
),
columns_expr
)
)
} else {
NA_character_
}

} else {

Expand Down
118 changes: 45 additions & 73 deletions R/interrogate.R
Original file line number Diff line number Diff line change
Expand Up @@ -251,15 +251,6 @@ interrogate <- function(
agent$validation_set[[i, "eval_active"]] <- FALSE
}

# Set the validation step as `active = FALSE` if there is a
# no column available as a result of a select expression
if (!is.null(agent$validation_set$column[[i]]) &&
is.na(agent$validation_set$column[[i]]) &&
agent$validation_set$assertion_type[[i]] %in%
column_expansion_fns_vec()) {
agent$validation_set[[i, "eval_active"]] <- FALSE
}

# Skip the validation step if `active = FALSE`
if (!agent$validation_set[[i, "eval_active"]]) {

Expand Down Expand Up @@ -1070,7 +1061,7 @@ interrogate_comparison <- function(
# Obtain the target column as a label
column <-
get_column_as_sym_at_idx(agent = agent, idx = idx) %>%
rlang::as_label()
as.character()

# Determine whether NAs should be allowed
na_pass <- get_column_na_pass_at_idx(agent = agent, idx = idx)
Expand Down Expand Up @@ -2226,13 +2217,21 @@ interrogate_col_exists <- function(
# Obtain the target column as a symbol
column <- get_column_as_sym_at_idx(agent = agent, idx = idx)

# Get `column_expr` to signal error if user didn't supply `columns`
column_input_missing <- agent$validation_set$columns_expr[idx] == "NULL"

# Create function for validating the `col_exists()` step function
tbl_col_exists <- function(
table,
column,
column_names
column_names,
column_input_missing
) {

if (column_input_missing) {
stop("`columns` argument must be supplied.", call. = FALSE)
}

# Ensure that the input `table` is actually a table object
tbl_validity_check(table = table)

Expand All @@ -2244,7 +2243,8 @@ interrogate_col_exists <- function(
tbl_col_exists(
table = table,
column = {{ column }},
column_names = column_names
column_names = column_names,
column_input_missing = column_input_missing
)
)
}
Expand Down Expand Up @@ -2313,31 +2313,22 @@ interrogate_distinct <- function(
table
) {

# Determine if grouping columns are provided in the test
# for distinct rows and parse the column names
if (!is.na(agent$validation_set$column[idx] %>% unlist())) {

column_names <-
get_column_as_sym_at_idx(agent = agent, idx = idx) %>%
as.character()

if (grepl("(,|&)", column_names)) {
column_names <-
get_column_as_sym_at_idx(agent = agent, idx = idx) %>%
as.character()

if (grepl("(,|&)", column_names)) {
column_names <-
strsplit(split = "(, |,|&)", column_names) %>%
unlist()
}

if (length(column_names) == 1 && column_names == "NA") {
if (uses_tidyselect(expr_text = agent$validation_set$columns_expr[idx])) {
column_names <- character(0)
}
}

} else if (is.na(agent$validation_set$column[idx] %>% unlist())) {
column_names <- get_all_cols(agent = agent)
strsplit(split = "(, |,|&)", column_names) %>%
unlist()
}

col_syms <- rlang::syms(column_names)
if (identical(column_names, NA_character_)) {
# If column is missing, let it get caught by `column_validity_has_columns`
col_syms <- NULL
} else {
col_syms <- rlang::syms(column_names)
}

# Create function for validating the `rows_distinct()` step function
tbl_rows_distinct <- function(
Expand Down Expand Up @@ -2413,31 +2404,22 @@ interrogate_complete <- function(
table
) {

# Determine if grouping columns are provided in the test
# for distinct rows and parse the column names
if (!is.na(agent$validation_set$column[idx] %>% unlist())) {

column_names <-
get_column_as_sym_at_idx(agent = agent, idx = idx) %>%
as.character()

if (grepl("(,|&)", column_names)) {
column_names <-
get_column_as_sym_at_idx(agent = agent, idx = idx) %>%
as.character()

if (grepl("(,|&)", column_names)) {
column_names <-
strsplit(split = "(, |,|&)", column_names) %>%
unlist()
}

if (length(column_names) == 1 && column_names == "NA") {
if (uses_tidyselect(expr_text = agent$validation_set$columns_expr[idx])) {
column_names <- character(0)
}
}

} else if (is.na(agent$validation_set$column[idx] %>% unlist())) {
column_names <- get_all_cols(agent = agent)
strsplit(split = "(, |,|&)", column_names) %>%
unlist()
}

col_syms <- rlang::syms(column_names)
if (identical(column_names, NA_character_)) {
# If column is missing, let it get caught by `column_validity_has_columns`
col_syms <- NULL
} else {
col_syms <- rlang::syms(column_names)
}

# Create function for validating the `rows_complete()` step function
tbl_rows_complete <- function(
Expand Down Expand Up @@ -2823,15 +2805,9 @@ column_validity_checks_column_value <- function(
value
) {

table_colnames <- colnames(table)
column_validity_checks_column(table, column)

if (!(as.character(column) %in% table_colnames)) {

stop(
"The value for `column` doesn't correspond to a column name.",
call. = FALSE
)
}
table_colnames <- colnames(table)

if (inherits(value, "name")) {

Expand All @@ -2851,7 +2827,7 @@ column_validity_checks_column_value <- function(
# Validity check for presence of columns
column_validity_has_columns <- function(columns) {

if (length(columns) < 1) {
if (length(columns) < 1 || identical(columns, NA_character_)) {
stop(
"The column selection statement that was used yielded no columns.",
call. = FALSE
Expand All @@ -2865,6 +2841,8 @@ column_validity_checks_column <- function(
column
) {

column_validity_has_columns(as.character(column))

table_colnames <- colnames(table)

if (!(as.character(column) %in% table_colnames)) {
Expand All @@ -2884,15 +2862,9 @@ column_validity_checks_ib_nb <- function(
right
) {

table_colnames <- colnames(table)
column_validity_checks_column(table, column)

if (!(as.character(column) %in% table_colnames)) {

stop(
"The value for `column` doesn't correspond to a column name.",
call. = FALSE
)
}
table_colnames <- colnames(table)

if (inherits(left, "name")) {

Expand Down
8 changes: 2 additions & 6 deletions R/rows_complete.R
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ rows_complete <- function(
# Capture the `columns` expression
columns <- rlang::enquo(columns)
# `rows_*()` functions treat `NULL` as `everything()`
if (rlang::quo_is_null(columns)) {
if (rlang::quo_is_null(columns) || rlang::quo_is_missing(columns)) {
columns <- rlang::quo(tidyselect::everything())
}
# Get `columns` as a label
Expand Down Expand Up @@ -333,12 +333,8 @@ rows_complete <- function(

agent <- x

if (length(columns) > 0) {
if (length(columns) > 1) {
columns <- paste(columns, collapse = ", ")
} else if (length(columns) == 1) {
columns <- columns
} else {
columns <- NULL
}

if (is.null(brief)) {
Expand Down
8 changes: 2 additions & 6 deletions R/rows_distinct.R
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ rows_distinct <- function(
# Capture the `columns` expression
columns <- rlang::enquo(columns)
# `rows_*()` functions treat `NULL` as `everything()`
if (rlang::quo_is_null(columns)) {
if (rlang::quo_is_null(columns) || rlang::quo_is_missing(columns)) {
columns <- rlang::quo(tidyselect::everything())
}
# Get `columns` as a label
Expand Down Expand Up @@ -334,12 +334,8 @@ rows_distinct <- function(

agent <- x

if (length(columns) > 0) {
if (length(columns) > 1) {
columns <- paste(columns, collapse = ", ")
} else if (length(columns) == 1) {
columns <- columns
} else {
columns <- NULL
}

if (is.null(brief)) {
Expand Down

0 comments on commit 5905874

Please sign in to comment.