Skip to content

Commit

Permalink
improve unpack_list error (#607)
Browse files Browse the repository at this point in the history
  • Loading branch information
sorhawell committed Dec 23, 2023
1 parent e234d3e commit 7590e75
Show file tree
Hide file tree
Showing 11 changed files with 165 additions and 103 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ Collate:
'dataframe__frame.R'
'datatype.R'
'docs.R'
'dotdotdot.R'
'error__rpolarserr.R'
'error__string.R'
'error__trait.R'
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
precision (#589).
- `<LazyFrame>$filter()`, `<DataFrame>$filter()`, and `pl$when()` now allow multiple conditions
to be separated by commas, like `lf$filter(pl$col("foo") == 1, pl$col("bar") != 2)` (#598).
- Better error messages for trailing argument commas such as `pl$DataFrame()$select("a",)` (#607).

## polars 0.11.0

Expand Down
16 changes: 11 additions & 5 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -143,10 +143,12 @@ NULL
#' # custom schema
#' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8))
pl$DataFrame = function(..., make_names_unique = TRUE, schema = NULL) {
largs = unpack_list(...)

uw = \(res) unwrap(res, "in $DataFrame():")

largs = unpack_list(...) |>
result() |>
uw()

if (!is.null(schema) && !all(names(schema) %in% names(largs))) {
Err_plain("Some columns in `schema` are not in the DataFrame.") |>
uw()
Expand Down Expand Up @@ -633,7 +635,7 @@ DataFrame_sort = function(
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
DataFrame_select = function(...) {
.pr$DataFrame$select(self, unpack_list(...)) |>
.pr$DataFrame$select(self, unpack_list(..., .context = "in $select()")) |>
unwrap("in $select()")
}

Expand Down Expand Up @@ -740,7 +742,7 @@ DataFrame_shift_and_fill = function(fill_value, periods = 1) {
#' SW_add_2 = (pl$col("Sepal.Width") + 2)
#' )
DataFrame_with_columns = function(...) {
.pr$DataFrame$with_columns(self, unpack_list(...)) |>
.pr$DataFrame$with_columns(self, unpack_list(..., .context = "in $with_columns()")) |>
unwrap("in $with_columns()")
}

Expand Down Expand Up @@ -831,7 +833,11 @@ DataFrame_filter = function(...) {
#' )
DataFrame_group_by = function(..., maintain_order = pl$options$maintain_order) {
# clone the DataFrame, bundle args as attributes. Non fallible.
construct_group_by(self, groupby_input = unpack_list(...), maintain_order = maintain_order)
construct_group_by(
self,
groupby_input = unpack_list(..., .context = "$group_by()"),
maintain_order = maintain_order
)
}


Expand Down
80 changes: 80 additions & 0 deletions R/dotdotdot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#' list2 - one day like rlang
#' list2 placeholder for future rust-impl
#' @noRd
#' @return An R list
#' @details rlang has this wonderful list2 implemented in c/c++, that is agnostic about trailing
#' commas in ... params. One day r-polars will have a list2-impl written in rust, which also allows
#' trailing commas.
#' https://github.com/extendr/extendr/pull/578 see progress here.
list2 = function(..., .context = NULL, .call = sys.call(1L)) {
list(...) |>
result() |>
map_err(\(err) {
if (identical(err$contexts()$PlainErrorMessage, "argument is missing, with no default")) {
err$plain("trailing argument commas are not (yet) supported with polars")
} else {
err
}
}) |>
unwrap(context = .context, call = .call)
}




#' Internal unpack list
#' @noRd
#'
#' @param ... args to unwrap
#' @param .context a context passed to unwrap
#' @param .call a call passed to unwrap
#' @param skip_classes char vec, do not unpack list inherits skip_classes.
#'
#' @details py-polars syntax only allows e.g. `df.select([expr1, expr2,])` and not
#' `df.select(expr1, expr2,)`. r-polars also allows user to directly write
#' `df$select(expr1, expr2)` or `df$select(list(expr1,expr2))`. Unpack list
#' checks whether first and only arg is a list and unpacks it, to bridge the
#' allowed patterns of passing expr to methods with ... param input.
#' Will throw an error if trailing comma.
#' @return a list
#' @examples
#' f = \(...) unpack_list(list(...))
#' identical(f(list(1L, 2L, 3L)), f(1L, 2L, 3L)) # is TRUE
#' identical(f(list(1L, 2L), 3L), f(1L, 2L, 3L)) # is FALSE
unpack_list = function(..., .context = NULL, .call = sys.call(1L), skip_classes = NULL) {
l = list2(..., .context = .context, .call = .call)
if (
length(l) == 1L &&
is.list(l[[1L]]) &&
!(!is.null(skip_classes) && inherits(l[[1L]], skip_classes))
) {
l[[1L]]
} else {
l
}
}


#' Convert dot-dot-dot to bool expression
#' @noRd
#' @return Result, a list has `ok` (RPolarsExpr class) and `err` (RPolarsErr class)
#' @examples
#' unpack_bool_expr_result(pl$lit(TRUE), pl$lit(FALSE))
unpack_bool_expr_result = function(...) {
unpack_list(...) |>
result() |>
and_then(\(l) {
if (!is.null(names(l))) {
Err_plain(
"Detected a named input.",
"This usually means that you've used `=` instead of `==`.",
"Some names seen:", head(names(l))
)
} else {
l |>
Reduce(`&`, x = _) |>
result() |>
suppressWarnings()
}
})
}
2 changes: 1 addition & 1 deletion R/functions__eager.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ pl$concat = function(
),
rechunk = TRUE,
parallel = TRUE) {
l = unpack_list(..., skip_classes = "data.frame")
l = unpack_list(..., skip_classes = "data.frame", .context = "in pl$concat")

if (length(l) == 0L) {
return(NULL)
Expand Down
18 changes: 6 additions & 12 deletions R/functions__whenthen.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,8 @@
#' $otherwise(-1)
#' )
pl$when = function(...) {
condition = unpack_bool_expr(...) |>
unwrap("in pl$when():")

.pr$When$new(condition) |>
unpack_bool_expr_result(...) |>
and_then(.pr$When$new) |>
unwrap("in pl$when():")
}

Expand All @@ -80,10 +78,8 @@ When_then = function(statement) {
}

Then_when = function(...) {
condition = unpack_bool_expr(...) |>
unwrap("in $when():")

.pr$Then$when(self, condition) |>
unpack_bool_expr_result(...) |>
and_then(\(condition) .pr$Then$when(self, condition)) |>
unwrap("in $when():")
}

Expand All @@ -98,10 +94,8 @@ ChainedWhen_then = function(statement) {
}

ChainedThen_when = function(...) {
condition = unpack_bool_expr(...) |>
unwrap("in $when():")

.pr$ChainedThen$when(self, condition) |>
unpack_bool_expr_result(...) |>
and_then(\(condition) .pr$ChainedThen$when(self, condition)) |>
unwrap("in $when():")
}

Expand Down
2 changes: 1 addition & 1 deletion R/group_by.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ GroupBy_agg = function(...) {
.pr$DataFrame$by_agg(
self = self,
group_exprs = attr(self, "private")$groupby_input,
agg_exprs = unpack_list(...),
agg_exprs = unpack_list(..., .context = "in $agg():"),
maintain_order = attr(self, "private")$maintain_order
) |>
unwrap("in $agg():")
Expand Down
2 changes: 1 addition & 1 deletion R/lazyframe__group_by.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ print.RPolarsLazyGroupBy = function(x, ...) {
#' pl$col("bar")$mean()$alias("bar_tail_sum")
#' )
LazyGroupBy_agg = agg = function(...) {
.pr$LazyGroupBy$agg(self, unpack_list(...)) |>
.pr$LazyGroupBy$agg(self, unpack_list(..., .context = "in $agg():")) |>
unwrap("in $agg():")
}

Expand Down
13 changes: 7 additions & 6 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ LazyFrame_describe_plan = "use_extendr_wrapper"
#' (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
#' )
LazyFrame_select = function(...) {
.pr$LazyFrame$select(self, unpack_list(...)) |>
.pr$LazyFrame$select(self, unpack_list(..., .context = "in $select()")) |>
unwrap("in $select()")
}

Expand All @@ -246,7 +246,7 @@ LazyFrame_select = function(...) {
#' SW_add_2 = (pl$col("Sepal.Width") + 2)
#' )
LazyFrame_with_columns = function(...) {
.pr$LazyFrame$with_columns(self, unpack_list(...)) |>
.pr$LazyFrame$with_columns(self, unpack_list(..., .context = "in $with_columns()")) |>
unwrap("in $with_columns()")
}

Expand Down Expand Up @@ -286,7 +286,7 @@ LazyFrame_with_row_count = function(name, offset = NULL) {
#' # lf$filter(pl$col("Sepal.Length") > 5 & pl$col("Petal.Width") < 1)
#' lf$filter(pl$col("Sepal.Length") > 5, pl$col("Petal.Width") < 1)
LazyFrame_filter = function(...) {
bool_expr = unpack_bool_expr(...) |>
bool_expr = unpack_bool_expr_result(...) |>
unwrap("in $filter()")

.pr$LazyFrame$filter(self, bool_expr)
Expand Down Expand Up @@ -977,7 +977,7 @@ LazyFrame_unique = function(subset = NULL, keep = "first", maintain_order = FALS
#' )$
#' collect()
LazyFrame_group_by = function(..., maintain_order = pl$options$maintain_order) {
.pr$LazyFrame$group_by(self, unpack_list(...), maintain_order) |>
.pr$LazyFrame$group_by(self, unpack_list(..., .context = "in $group_by():"), maintain_order) |>
unwrap("in $group_by():")
}

Expand Down Expand Up @@ -1060,7 +1060,8 @@ LazyFrame_sort = function(
nulls_last = FALSE,
maintain_order = FALSE) {
.pr$LazyFrame$sort_by_exprs(
self, unpack_list(by), err_on_named_args(...), descending, nulls_last, maintain_order
self, unpack_list(by, .context = "in $sort():"), err_on_named_args(...),
descending, nulls_last, maintain_order
) |>
unwrap("in $sort():")
}
Expand Down Expand Up @@ -1517,7 +1518,7 @@ LazyFrame_profile = function(
#' df$explode("numbers", "numbers_2")$collect()
#' df$explode(pl$col(pl$List(pl$Float64)))$collect()
LazyFrame_explode = function(...) {
dotdotdot_args = unpack_list(...)
dotdotdot_args = unpack_list(..., .context = "in explode():")
.pr$LazyFrame$explode(self, dotdotdot_args) |>
unwrap("in explode():")
}
Expand Down
77 changes: 0 additions & 77 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -60,83 +60,6 @@ verify_method_call = function(Class_env, Method_name, call = sys.call(1L), class
invisible(NULL)
}


# #highly experimental work around to use list2 without rlang
# ok.comma <- function(FUN, which=0) {
# function(...) {
# #browser()
# arg.list <- as.list(sys.call(which=which))[-1L]
# len <- length(arg.list)
# if (len > 1L) {
# last <- arg.list[[len]]
# if (missing(last)) {
# arg.list <- arg.list[-len]
# }
# }
# do.call(FUN, arg.list,envir=parent.frame(which+1))
# }
# }
# list2 = ok.comma(list)
# list3 = ok.comma(list,which = 2)


#' list2 - one day like rlang
#' list2 placeholder for future rust-impl
#' @noRd
#' @return An R list
#' @details rlang has this wonderful list2 implemented in c/c++, that is agnostic about trailing
#' commas in ... params. One day r-polars will have a list2-impl written in rust, which also allows
#' trailing commas.
list2 = list

#' Internal unpack list
#' @noRd
#' @param l any list
#' @param skip_classes char vec, do not unpack list inherits skip_classes.
#' @details py-polars syntax only allows e.g. `df.select([expr1, expr2,])` and not
#' `df.select(expr1, expr2,)`. r-polars also allows user to directly write
#' `df$select(expr1, expr2)` or `df$select(list(expr1,expr2))`. Unpack list
#' checks whether first and only arg is a list and unpacks it, to bridge the
#' allowed patterns of passing expr to methods with ... param input.
#' @return a list
#' @examples
#' f = \(...) unpack_list(list(...))
#' identical(f(list(1L, 2L, 3L)), f(1L, 2L, 3L)) # is TRUE
#' identical(f(list(1L, 2L), 3L), f(1L, 2L, 3L)) # is FALSE
unpack_list = function(..., skip_classes = NULL) {
l = list2(...)
if (
length(l) == 1L &&
is.list(l[[1L]]) &&
!(!is.null(skip_classes) && inherits(l[[1L]], skip_classes))
) {
l[[1L]]
} else {
l
}
}

#' Convert dot-dot-dot to bool expression
#' @noRd
#' @return Result, a list has `ok` (RPolarsExpr class) and `err` (RPolarsErr class)
#' @examples
#' unpack_bool_expr(pl$lit(TRUE), pl$lit(FALSE))
unpack_bool_expr = function(..., .msg = NULL) {
dots = list2(...)

if (!is.null(names(dots))) {
return(Err_plain(
"Detected a named input.",
"This usually means that you've used `=` instead of `==`."
))
}

dots |>
Reduce(`&`, x = _) |>
result(msg = .msg) |>
suppressWarnings()
}

#' Simple SQL CASE WHEN implementation for R
#' @noRd
#' @description Inspired by data.table::fcase + dplyr::case_when.
Expand Down
Loading

0 comments on commit 7590e75

Please sign in to comment.