Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve unpack_list error #607

Merged
merged 5 commits into from
Dec 23, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
precision (#589).
- `<LazyFrame>$filter()`, `<DataFrame>$filter()`, and `pl$when()` now allow multiple conditions
to be separated by commas, like `lf$filter(pl$col("foo") == 1, pl$col("bar") != 2)` (#598).
- Better error messages for trailing argument commas such as `pl$DataFrame()$select("a",)` (#607).

## polars 0.11.0

Expand Down
16 changes: 11 additions & 5 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,12 @@ NULL
#' # custom schema
#' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8))
pl$DataFrame = function(..., make_names_unique = TRUE, schema = NULL) {
largs = unpack_list(...)

uw = \(res) unwrap(res, "in $DataFrame():")

largs = unpack_list(...) |>
result() |>
uw()

if (!is.null(schema) && !all(names(schema) %in% names(largs))) {
Err_plain("Some columns in `schema` are not in the DataFrame.") |>
uw()
Expand Down Expand Up @@ -633,7 +635,7 @@ DataFrame_sort = function(
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
DataFrame_select = function(...) {
.pr$DataFrame$select(self, unpack_list(...)) |>
.pr$DataFrame$select(self, unpack_list(..., .context = "in $select()")) |>
unwrap("in $select()")
}

Expand Down Expand Up @@ -740,7 +742,7 @@ DataFrame_shift_and_fill = function(fill_value, periods = 1) {
#' SW_add_2 = (pl$col("Sepal.Width") + 2)
#' )
DataFrame_with_columns = function(...) {
.pr$DataFrame$with_columns(self, unpack_list(...)) |>
.pr$DataFrame$with_columns(self, unpack_list(..., .context = "in $with_columns()")) |>
unwrap("in $with_columns()")
}

Expand Down Expand Up @@ -831,7 +833,11 @@ DataFrame_filter = function(...) {
#' )
DataFrame_group_by = function(..., maintain_order = pl$options$maintain_order) {
# clone the DataFrame, bundle args as attributes. Non fallible.
construct_group_by(self, groupby_input = unpack_list(...), maintain_order = maintain_order)
construct_group_by(
self,
groupby_input = unpack_list(..., .context = "$group_by()"),
maintain_order = maintain_order
)
}


Expand Down
2 changes: 1 addition & 1 deletion R/functions__eager.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ pl$concat = function(
),
rechunk = TRUE,
parallel = TRUE) {
l = unpack_list(..., skip_classes = "data.frame")
l = unpack_list(..., skip_classes = "data.frame", .context = "in pl$concat")

if (length(l) == 0L) {
return(NULL)
Expand Down
18 changes: 6 additions & 12 deletions R/functions__whenthen.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,8 @@
#' $otherwise(-1)
#' )
pl$when = function(...) {
condition = unpack_bool_expr(...) |>
unwrap("in pl$when():")

.pr$When$new(condition) |>
unpack_bool_expr_result(...) |>
and_then(.pr$When$new) |>
unwrap("in pl$when():")
}

Expand All @@ -80,10 +78,8 @@ When_then = function(statement) {
}

Then_when = function(...) {
condition = unpack_bool_expr(...) |>
unwrap("in $when():")

.pr$Then$when(self, condition) |>
unpack_bool_expr_result(...) |>
and_then(\(condition) .pr$Then$when(self, condition)) |>
unwrap("in $when():")
}

Expand All @@ -98,10 +94,8 @@ ChainedWhen_then = function(statement) {
}

ChainedThen_when = function(...) {
condition = unpack_bool_expr(...) |>
unwrap("in $when():")

.pr$ChainedThen$when(self, condition) |>
unpack_bool_expr_result(...) |>
and_then(\(condition) .pr$ChainedThen$when(self, condition)) |>
unwrap("in $when():")
}

Expand Down
2 changes: 1 addition & 1 deletion R/group_by.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ GroupBy_agg = function(...) {
.pr$DataFrame$by_agg(
self = self,
group_exprs = attr(self, "private")$groupby_input,
agg_exprs = unpack_list(...),
agg_exprs = unpack_list(..., .context = "in $agg():"),
maintain_order = attr(self, "private")$maintain_order
) |>
unwrap("in $agg():")
Expand Down
2 changes: 1 addition & 1 deletion R/lazyframe__group_by.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ print.RPolarsLazyGroupBy = function(x, ...) {
#' pl$col("bar")$mean()$alias("bar_tail_sum")
#' )
LazyGroupBy_agg = agg = function(...) {
.pr$LazyGroupBy$agg(self, unpack_list(...)) |>
.pr$LazyGroupBy$agg(self, unpack_list(..., .context = "in $agg():")) |>
unwrap("in $agg():")
}

Expand Down
13 changes: 7 additions & 6 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ LazyFrame_describe_plan = "use_extendr_wrapper"
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
LazyFrame_select = function(...) {
.pr$LazyFrame$select(self, unpack_list(...)) |>
.pr$LazyFrame$select(self, unpack_list(..., .context = "in $select()")) |>
unwrap("in $select()")
}

Expand All @@ -246,7 +246,7 @@ LazyFrame_select = function(...) {
#' SW_add_2 = (pl$col("Sepal.Width") + 2)
#' )
LazyFrame_with_columns = function(...) {
.pr$LazyFrame$with_columns(self, unpack_list(...)) |>
.pr$LazyFrame$with_columns(self, unpack_list(..., .context = "in $with_columns()")) |>
unwrap("in $with_columns()")
}

Expand Down Expand Up @@ -286,7 +286,7 @@ LazyFrame_with_row_count = function(name, offset = NULL) {
#' # lf$filter(pl$col("Sepal.Length") > 5 & pl$col("Petal.Width") < 1)
#' lf$filter(pl$col("Sepal.Length") > 5, pl$col("Petal.Width") < 1)
LazyFrame_filter = function(...) {
bool_expr = unpack_bool_expr(...) |>
bool_expr = unpack_bool_expr_result(...) |>
unwrap("in $filter()")

.pr$LazyFrame$filter(self, bool_expr)
Expand Down Expand Up @@ -977,7 +977,7 @@ LazyFrame_unique = function(subset = NULL, keep = "first", maintain_order = FALS
#' )$
#' collect()
LazyFrame_group_by = function(..., maintain_order = pl$options$maintain_order) {
.pr$LazyFrame$group_by(self, unpack_list(...), maintain_order) |>
.pr$LazyFrame$group_by(self, unpack_list(..., .context = "in $group_by():"), maintain_order) |>
unwrap("in $group_by():")
}

Expand Down Expand Up @@ -1060,7 +1060,8 @@ LazyFrame_sort = function(
nulls_last = FALSE,
maintain_order = FALSE) {
.pr$LazyFrame$sort_by_exprs(
self, unpack_list(by), err_on_named_args(...), descending, nulls_last, maintain_order
self, unpack_list(by, .context = "in $sort():"), err_on_named_args(...),
descending, nulls_last, maintain_order
) |>
unwrap("in $sort():")
}
Expand Down Expand Up @@ -1517,7 +1518,7 @@ LazyFrame_profile = function(
#' df$explode("numbers", "numbers_2")$collect()
#' df$explode(pl$col(pl$List(pl$Float64)))$collect()
LazyFrame_explode = function(...) {
dotdotdot_args = unpack_list(...)
dotdotdot_args = unpack_list(..., .context = "in explode():")
.pr$LazyFrame$explode(self, dotdotdot_args) |>
unwrap("in explode():")
}
Expand Down
61 changes: 42 additions & 19 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,24 +87,43 @@ verify_method_call = function(Class_env, Method_name, call = sys.call(1L), class
#' @details rlang has this wonderful list2 implemented in c/c++, that is agnostic about trailing
#' commas in ... params. One day r-polars will have a list2-impl written in rust, which also allows
#' trailing commas.
list2 = list
list2 = function(..., .context = NULL, .call = sys.call(1L)) {
list(...) |>
result() |>
map_err(\(err) {
if (identical(err$contexts()$PlainErrorMessage, "argument is missing, with no default")) {
err$plain("trailing argument commas are not (yet) supported with polars")
} else {
err
}
}) |>
unwrap(context = .context, call = .call)
}




#' Internal unpack list
#' @noRd
#' @param l any list
#'
#' @param ... args to unwrap
#' @param .context a context passed to unwrap
#' @param .call a call passed to unwrap
#' @param skip_classes char vec, do not unpack list inherits skip_classes.
#'
#' @details py-polars syntax only allows e.g. `df.select([expr1, expr2,])` and not
#' `df.select(expr1, expr2,)`. r-polars also allows user to directly write
#' `df$select(expr1, expr2)` or `df$select(list(expr1,expr2))`. Unpack list
#' checks whether first and only arg is a list and unpacks it, to bridge the
#' allowed patterns of passing expr to methods with ... param input.
#' Will throw an error if trailing comma.
#' @return a list
#' @examples
#' f = \(...) unpack_list(list(...))
#' identical(f(list(1L, 2L, 3L)), f(1L, 2L, 3L)) # is TRUE
#' identical(f(list(1L, 2L), 3L), f(1L, 2L, 3L)) # is FALSE
unpack_list = function(..., skip_classes = NULL) {
l = list2(...)
unpack_list = function(..., .context = NULL, .call = sys.call(1L), skip_classes = NULL) {
l = list2(..., .context = .context, .call = .call)
if (
length(l) == 1L &&
is.list(l[[1L]]) &&
Expand All @@ -116,25 +135,29 @@ unpack_list = function(..., skip_classes = NULL) {
}
}


#' Convert dot-dot-dot to bool expression
#' @noRd
#' @return Result, a list has `ok` (RPolarsExpr class) and `err` (RPolarsErr class)
#' @examples
#' unpack_bool_expr(pl$lit(TRUE), pl$lit(FALSE))
unpack_bool_expr = function(..., .msg = NULL) {
dots = list2(...)

if (!is.null(names(dots))) {
return(Err_plain(
"Detected a named input.",
"This usually means that you've used `=` instead of `==`."
))
}

dots |>
Reduce(`&`, x = _) |>
result(msg = .msg) |>
suppressWarnings()
#' unpack_bool_expr_result(pl$lit(TRUE), pl$lit(FALSE))
unpack_bool_expr_result = function(...) {
unpack_list(...) |>
result() |>
and_then(\(l) {
if (!is.null(names(l))) {
Err_plain(
"Detected a named input.",
"This usually means that you've used `=` instead of `==`.",
"Some names seen:", head(names(l))
)
} else {
l |>
Reduce(`&`, x = _) |>
result() |>
suppressWarnings()
}
})
}

#' Simple SQL CASE WHEN implementation for R
Expand Down
56 changes: 56 additions & 0 deletions tests/testthat/test-dotdotdot.R
eitsupi marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
test_that("unpack_list", {
# unpacks a single list
expect_identical(
unpack_list(list(1, 2, 3)),
unpack_list(1, 2, 3)
)

# ... not if more args
expect_failure(expect_equal(
unpack_list(list(1, 2, 3), "a"),
unpack_list(1, 2, 3, "a")
))
eitsupi marked this conversation as resolved.
Show resolved Hide resolved

# skip_classes
expect_identical(
unpack_list(data.frame(a = 1, b = 2), skip_classes = "data.frame"),
list(data.frame(a = 1, b = 2))
)
expect_failure(expect_equal(
unpack_list(data.frame(a = 1, b = 2), skip_classes = NULL),
list(data.frame(a = 1, b = 2))
))

# trailing commas not allowed
err = unpack_list(1, .context = "this test", .call = call("foobar"), ) |>
result() |>
unwrap_err()
expect_identical(
err$contexts()[[1]],
"trailing argument commas are not (yet) supported with polars"
)
expect_identical(err$get_rcall(), "foobar()")
expect_identical(err$get_rinfo(), "this test")
})



test_that("unpack_bool_expr_result", {
# unpacks a single list, return ok-result, get same polars expression
a = unpack_bool_expr_result(pl$col("a") == 1, pl$col("b") > 2)
b = unpack_bool_expr_result(list(pl$col("a") == 1, pl$col("b") > 2))
expect_true(a$ok$meta$eq(b$ok))

# ... not if more args
expect_failure(expect_equal(
unpack_list(list(1, 2, 3), "a"),
unpack_list(1, 2, 3, "a")
))
eitsupi marked this conversation as resolved.
Show resolved Hide resolved

# trailing commas not allowed
ctx = unpack_list(1, ) |> get_err_ctx()
expect_identical(
ctx[[1]],
"trailing argument commas are not (yet) supported with polars"
)
})
Loading