diff --git a/DESCRIPTION b/DESCRIPTION index a7f5d81e3..9bc1611fe 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,6 +73,7 @@ Collate: 'dataframe__frame.R' 'datatype.R' 'docs.R' + 'dotdotdot.R' 'error__rpolarserr.R' 'error__string.R' 'error__trait.R' diff --git a/NEWS.md b/NEWS.md index fa6322c47..70ed9b7ad 100644 --- a/NEWS.md +++ b/NEWS.md @@ -34,6 +34,7 @@ precision (#589). - `$filter()`, `$filter()`, and `pl$when()` now allow multiple conditions to be separated by commas, like `lf$filter(pl$col("foo") == 1, pl$col("bar") != 2)` (#598). +- Better error messages for trailing argument commas such as `pl$DataFrame()$select("a",)` (#607). ## polars 0.11.0 diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 1091dc542..c63d1921f 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -143,10 +143,12 @@ NULL #' # custom schema #' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8)) pl$DataFrame = function(..., make_names_unique = TRUE, schema = NULL) { - largs = unpack_list(...) - uw = \(res) unwrap(res, "in $DataFrame():") + largs = unpack_list(...) |> + result() |> + uw() + if (!is.null(schema) && !all(names(schema) %in% names(largs))) { Err_plain("Some columns in `schema` are not in the DataFrame.") |> uw() @@ -633,7 +635,7 @@ DataFrame_sort = function( #' (pl$col("Sepal.Length") + 2)$alias("add_2_SL") #' ) DataFrame_select = function(...) { - .pr$DataFrame$select(self, unpack_list(...)) |> + .pr$DataFrame$select(self, unpack_list(..., .context = "in $select()")) |> unwrap("in $select()") } @@ -740,7 +742,7 @@ DataFrame_shift_and_fill = function(fill_value, periods = 1) { #' SW_add_2 = (pl$col("Sepal.Width") + 2) #' ) DataFrame_with_columns = function(...) { - .pr$DataFrame$with_columns(self, unpack_list(...)) |> + .pr$DataFrame$with_columns(self, unpack_list(..., .context = "in $with_columns()")) |> unwrap("in $with_columns()") } @@ -831,7 +833,11 @@ DataFrame_filter = function(...) { #' ) DataFrame_group_by = function(..., maintain_order = pl$options$maintain_order) { # clone the DataFrame, bundle args as attributes. Non fallible. - construct_group_by(self, groupby_input = unpack_list(...), maintain_order = maintain_order) + construct_group_by( + self, + groupby_input = unpack_list(..., .context = "$group_by()"), + maintain_order = maintain_order + ) } diff --git a/R/dotdotdot.R b/R/dotdotdot.R new file mode 100644 index 000000000..bf55b424a --- /dev/null +++ b/R/dotdotdot.R @@ -0,0 +1,80 @@ +#' list2 - one day like rlang +#' list2 placeholder for future rust-impl +#' @noRd +#' @return An R list +#' @details rlang has this wonderful list2 implemented in c/c++, that is agnostic about trailing +#' commas in ... params. One day r-polars will have a list2-impl written in rust, which also allows +#' trailing commas. +#' https://github.com/extendr/extendr/pull/578 see progress here. +list2 = function(..., .context = NULL, .call = sys.call(1L)) { + list(...) |> + result() |> + map_err(\(err) { + if (identical(err$contexts()$PlainErrorMessage, "argument is missing, with no default")) { + err$plain("trailing argument commas are not (yet) supported with polars") + } else { + err + } + }) |> + unwrap(context = .context, call = .call) +} + + + + +#' Internal unpack list +#' @noRd +#' +#' @param ... args to unwrap +#' @param .context a context passed to unwrap +#' @param .call a call passed to unwrap +#' @param skip_classes char vec, do not unpack list inherits skip_classes. +#' +#' @details py-polars syntax only allows e.g. `df.select([expr1, expr2,])` and not +#' `df.select(expr1, expr2,)`. r-polars also allows user to directly write +#' `df$select(expr1, expr2)` or `df$select(list(expr1,expr2))`. Unpack list +#' checks whether first and only arg is a list and unpacks it, to bridge the +#' allowed patterns of passing expr to methods with ... param input. +#' Will throw an error if trailing comma. +#' @return a list +#' @examples +#' f = \(...) unpack_list(list(...)) +#' identical(f(list(1L, 2L, 3L)), f(1L, 2L, 3L)) # is TRUE +#' identical(f(list(1L, 2L), 3L), f(1L, 2L, 3L)) # is FALSE +unpack_list = function(..., .context = NULL, .call = sys.call(1L), skip_classes = NULL) { + l = list2(..., .context = .context, .call = .call) + if ( + length(l) == 1L && + is.list(l[[1L]]) && + !(!is.null(skip_classes) && inherits(l[[1L]], skip_classes)) + ) { + l[[1L]] + } else { + l + } +} + + +#' Convert dot-dot-dot to bool expression +#' @noRd +#' @return Result, a list has `ok` (RPolarsExpr class) and `err` (RPolarsErr class) +#' @examples +#' unpack_bool_expr_result(pl$lit(TRUE), pl$lit(FALSE)) +unpack_bool_expr_result = function(...) { + unpack_list(...) |> + result() |> + and_then(\(l) { + if (!is.null(names(l))) { + Err_plain( + "Detected a named input.", + "This usually means that you've used `=` instead of `==`.", + "Some names seen:", head(names(l)) + ) + } else { + l |> + Reduce(`&`, x = _) |> + result() |> + suppressWarnings() + } + }) +} diff --git a/R/functions__eager.R b/R/functions__eager.R index 60a95353a..288394a14 100644 --- a/R/functions__eager.R +++ b/R/functions__eager.R @@ -62,7 +62,7 @@ pl$concat = function( ), rechunk = TRUE, parallel = TRUE) { - l = unpack_list(..., skip_classes = "data.frame") + l = unpack_list(..., skip_classes = "data.frame", .context = "in pl$concat") if (length(l) == 0L) { return(NULL) diff --git a/R/functions__whenthen.R b/R/functions__whenthen.R index a8df47730..817a5ef7e 100644 --- a/R/functions__whenthen.R +++ b/R/functions__whenthen.R @@ -64,10 +64,8 @@ #' $otherwise(-1) #' ) pl$when = function(...) { - condition = unpack_bool_expr(...) |> - unwrap("in pl$when():") - - .pr$When$new(condition) |> + unpack_bool_expr_result(...) |> + and_then(.pr$When$new) |> unwrap("in pl$when():") } @@ -80,10 +78,8 @@ When_then = function(statement) { } Then_when = function(...) { - condition = unpack_bool_expr(...) |> - unwrap("in $when():") - - .pr$Then$when(self, condition) |> + unpack_bool_expr_result(...) |> + and_then(\(condition) .pr$Then$when(self, condition)) |> unwrap("in $when():") } @@ -98,10 +94,8 @@ ChainedWhen_then = function(statement) { } ChainedThen_when = function(...) { - condition = unpack_bool_expr(...) |> - unwrap("in $when():") - - .pr$ChainedThen$when(self, condition) |> + unpack_bool_expr_result(...) |> + and_then(\(condition) .pr$ChainedThen$when(self, condition)) |> unwrap("in $when():") } diff --git a/R/group_by.R b/R/group_by.R index 7518ba9bd..7c2ec5353 100644 --- a/R/group_by.R +++ b/R/group_by.R @@ -89,7 +89,7 @@ GroupBy_agg = function(...) { .pr$DataFrame$by_agg( self = self, group_exprs = attr(self, "private")$groupby_input, - agg_exprs = unpack_list(...), + agg_exprs = unpack_list(..., .context = "in $agg():"), maintain_order = attr(self, "private")$maintain_order ) |> unwrap("in $agg():") diff --git a/R/lazyframe__group_by.R b/R/lazyframe__group_by.R index bb9385db9..ca81f466c 100644 --- a/R/lazyframe__group_by.R +++ b/R/lazyframe__group_by.R @@ -37,7 +37,7 @@ print.RPolarsLazyGroupBy = function(x, ...) { #' pl$col("bar")$mean()$alias("bar_tail_sum") #' ) LazyGroupBy_agg = agg = function(...) { - .pr$LazyGroupBy$agg(self, unpack_list(...)) |> + .pr$LazyGroupBy$agg(self, unpack_list(..., .context = "in $agg():")) |> unwrap("in $agg():") } diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 88fd8fa72..f05cddad2 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -220,7 +220,7 @@ LazyFrame_describe_plan = "use_extendr_wrapper" #' (pl$col("Sepal.Length") + 2)$alias("add_2_SL") #' ) LazyFrame_select = function(...) { - .pr$LazyFrame$select(self, unpack_list(...)) |> + .pr$LazyFrame$select(self, unpack_list(..., .context = "in $select()")) |> unwrap("in $select()") } @@ -246,7 +246,7 @@ LazyFrame_select = function(...) { #' SW_add_2 = (pl$col("Sepal.Width") + 2) #' ) LazyFrame_with_columns = function(...) { - .pr$LazyFrame$with_columns(self, unpack_list(...)) |> + .pr$LazyFrame$with_columns(self, unpack_list(..., .context = "in $with_columns()")) |> unwrap("in $with_columns()") } @@ -286,7 +286,7 @@ LazyFrame_with_row_count = function(name, offset = NULL) { #' # lf$filter(pl$col("Sepal.Length") > 5 & pl$col("Petal.Width") < 1) #' lf$filter(pl$col("Sepal.Length") > 5, pl$col("Petal.Width") < 1) LazyFrame_filter = function(...) { - bool_expr = unpack_bool_expr(...) |> + bool_expr = unpack_bool_expr_result(...) |> unwrap("in $filter()") .pr$LazyFrame$filter(self, bool_expr) @@ -977,7 +977,7 @@ LazyFrame_unique = function(subset = NULL, keep = "first", maintain_order = FALS #' )$ #' collect() LazyFrame_group_by = function(..., maintain_order = pl$options$maintain_order) { - .pr$LazyFrame$group_by(self, unpack_list(...), maintain_order) |> + .pr$LazyFrame$group_by(self, unpack_list(..., .context = "in $group_by():"), maintain_order) |> unwrap("in $group_by():") } @@ -1060,7 +1060,8 @@ LazyFrame_sort = function( nulls_last = FALSE, maintain_order = FALSE) { .pr$LazyFrame$sort_by_exprs( - self, unpack_list(by), err_on_named_args(...), descending, nulls_last, maintain_order + self, unpack_list(by, .context = "in $sort():"), err_on_named_args(...), + descending, nulls_last, maintain_order ) |> unwrap("in $sort():") } @@ -1517,7 +1518,7 @@ LazyFrame_profile = function( #' df$explode("numbers", "numbers_2")$collect() #' df$explode(pl$col(pl$List(pl$Float64)))$collect() LazyFrame_explode = function(...) { - dotdotdot_args = unpack_list(...) + dotdotdot_args = unpack_list(..., .context = "in explode():") .pr$LazyFrame$explode(self, dotdotdot_args) |> unwrap("in explode():") } diff --git a/R/utils.R b/R/utils.R index cd1d661ff..219b897e2 100644 --- a/R/utils.R +++ b/R/utils.R @@ -60,83 +60,6 @@ verify_method_call = function(Class_env, Method_name, call = sys.call(1L), class invisible(NULL) } - -# #highly experimental work around to use list2 without rlang -# ok.comma <- function(FUN, which=0) { -# function(...) { -# #browser() -# arg.list <- as.list(sys.call(which=which))[-1L] -# len <- length(arg.list) -# if (len > 1L) { -# last <- arg.list[[len]] -# if (missing(last)) { -# arg.list <- arg.list[-len] -# } -# } -# do.call(FUN, arg.list,envir=parent.frame(which+1)) -# } -# } -# list2 = ok.comma(list) -# list3 = ok.comma(list,which = 2) - - -#' list2 - one day like rlang -#' list2 placeholder for future rust-impl -#' @noRd -#' @return An R list -#' @details rlang has this wonderful list2 implemented in c/c++, that is agnostic about trailing -#' commas in ... params. One day r-polars will have a list2-impl written in rust, which also allows -#' trailing commas. -list2 = list - -#' Internal unpack list -#' @noRd -#' @param l any list -#' @param skip_classes char vec, do not unpack list inherits skip_classes. -#' @details py-polars syntax only allows e.g. `df.select([expr1, expr2,])` and not -#' `df.select(expr1, expr2,)`. r-polars also allows user to directly write -#' `df$select(expr1, expr2)` or `df$select(list(expr1,expr2))`. Unpack list -#' checks whether first and only arg is a list and unpacks it, to bridge the -#' allowed patterns of passing expr to methods with ... param input. -#' @return a list -#' @examples -#' f = \(...) unpack_list(list(...)) -#' identical(f(list(1L, 2L, 3L)), f(1L, 2L, 3L)) # is TRUE -#' identical(f(list(1L, 2L), 3L), f(1L, 2L, 3L)) # is FALSE -unpack_list = function(..., skip_classes = NULL) { - l = list2(...) - if ( - length(l) == 1L && - is.list(l[[1L]]) && - !(!is.null(skip_classes) && inherits(l[[1L]], skip_classes)) - ) { - l[[1L]] - } else { - l - } -} - -#' Convert dot-dot-dot to bool expression -#' @noRd -#' @return Result, a list has `ok` (RPolarsExpr class) and `err` (RPolarsErr class) -#' @examples -#' unpack_bool_expr(pl$lit(TRUE), pl$lit(FALSE)) -unpack_bool_expr = function(..., .msg = NULL) { - dots = list2(...) - - if (!is.null(names(dots))) { - return(Err_plain( - "Detected a named input.", - "This usually means that you've used `=` instead of `==`." - )) - } - - dots |> - Reduce(`&`, x = _) |> - result(msg = .msg) |> - suppressWarnings() -} - #' Simple SQL CASE WHEN implementation for R #' @noRd #' @description Inspired by data.table::fcase + dplyr::case_when. diff --git a/tests/testthat/test-dotdotdot.R b/tests/testthat/test-dotdotdot.R new file mode 100644 index 000000000..d69ba76d5 --- /dev/null +++ b/tests/testthat/test-dotdotdot.R @@ -0,0 +1,56 @@ +test_that("unpack_list", { + # unpacks a single list + expect_identical( + unpack_list(list(1, 2, 3)), + unpack_list(1, 2, 3) + ) + + # ... not if more args + expect_failure(expect_equal( + unpack_list(list(1, 2, 3), "a"), + unpack_list(1, 2, 3, "a") + )) + + # skip_classes + expect_identical( + unpack_list(data.frame(a = 1, b = 2), skip_classes = "data.frame"), + list(data.frame(a = 1, b = 2)) + ) + expect_failure(expect_equal( + unpack_list(data.frame(a = 1, b = 2), skip_classes = NULL), + list(data.frame(a = 1, b = 2)) + )) + + # trailing commas not allowed + err = unpack_list(1, .context = "this test", .call = call("foobar"), ) |> + result() |> + unwrap_err() + expect_identical( + err$contexts()[[1]], + "trailing argument commas are not (yet) supported with polars" + ) + expect_identical(err$get_rcall(), "foobar()") + expect_identical(err$get_rinfo(), "this test") +}) + + + +test_that("unpack_bool_expr_result", { + # unpacks a single list, return ok-result, get same polars expression + a = unpack_bool_expr_result(pl$col("a") == 1, pl$col("b") > 2) + b = unpack_bool_expr_result(list(pl$col("a") == 1, pl$col("b") > 2)) + expect_true(a$ok$meta$eq(b$ok)) + + # ... not if more args + expect_failure(expect_equal( + unpack_list(list(1, 2, 3), "a"), + unpack_list(1, 2, 3, "a") + )) + + # trailing commas not allowed + ctx = unpack_list(1, ) |> get_err_ctx() + expect_identical( + ctx[[1]], + "trailing argument commas are not (yet) supported with polars" + ) +})