Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better handling of int64 #706

Merged
merged 24 commits into from
Jan 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
variable (#708).
- New methods for the `list` subnamespace: `$set_union()`, `$set_intersection()`,
`$set_difference()`, `$set_symmetric_difference()` (#712).
- New option `int64_conversion` to specify how Int64 columns (that don't have
equivalent in base R) should be converted. This option can either be set
globally with `pl$set_options()` or on a case-by-case basis, e.g with
`$to_data_frame(int64_conversion =)` (#706).

## polars 0.12.2

Expand Down
15 changes: 10 additions & 5 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -845,15 +845,16 @@ DataFrame_group_by = function(..., maintain_order = pl$options$maintain_order) {
#' Return Polars DataFrame as R data.frame
#'
#' @param ... Any args pased to `as.data.frame()`.
#' @inheritParams pl_set_options
#'
#' @return An R data.frame
#' @keywords DataFrame
#' @examples
#' df = pl$DataFrame(iris[1:3, ])
#' df$to_data_frame()
DataFrame_to_data_frame = function(...) {
DataFrame_to_data_frame = function(..., int64_conversion = pl$options$int64_conversion ) {
# do not unnest structs and mark with I to also preserve categoricals as is
l = lapply(self$to_list(unnest_structs = FALSE), I)
l = lapply(self$to_list(unnest_structs = FALSE, int64_conversion = int64_conversion), I)

# similar to as.data.frame, but avoid checks, whcih would edit structs
df = data.frame(seq_along(l[[1L]]), ...)
Expand All @@ -870,6 +871,7 @@ DataFrame_to_data_frame = function(...) {
#'
#' @param unnest_structs Boolean. If `TRUE` (default), then `$unnest()` is applied
#' on any struct column.
#' @inheritParams pl_set_options
#'
#' @details
#' For simplicity reasons, this implementation relies on unnesting all structs
Expand All @@ -881,11 +883,14 @@ DataFrame_to_data_frame = function(...) {
#' @keywords DataFrame
#' @examples
#' pl$DataFrame(iris)$to_list()
DataFrame_to_list = function(unnest_structs = TRUE) {
DataFrame_to_list = function(unnest_structs = TRUE, ..., int64_conversion = pl$options$int64_conversion ) {
if (unnest_structs) {
unwrap(.pr$DataFrame$to_list(self))
.pr$DataFrame$to_list(self, int64_conversion ) |>
unwrap("in $to_list():")
} else {
restruct_list(unwrap(.pr$DataFrame$to_list_tag_structs(self)))
.pr$DataFrame$to_list_tag_structs(self, int64_conversion ) |>
unwrap("in $to_list():") |>
restruct_list()
}
}

Expand Down
7 changes: 4 additions & 3 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -3177,17 +3177,18 @@ Expr_rep_extend = function(expr, n, rechunk = TRUE, upcast = TRUE) {
#' Otherwise, provide a DataFrame that the Expr should be evaluated in.
#' @param i Numeric column to extract. Default is zero (which gives the first
#' column).
#' @inheritParams pl_set_options
#' @return R object
#' @examples
#' pl$lit(1:3)$to_r()
Expr_to_r = function(df = NULL, i = 0) {
Expr_to_r = function(df = NULL, i = 0, ..., int64_conversion = pl$options$int64_conversion ) {
if (is.null(df)) {
pl$select(self)$to_series(i)$to_r()
pl$select(self)$to_series(i)$to_r(int64_conversion )
} else {
if (!inherits(df, c("RPolarsDataFrame"))) {
stop("Expr_to_r: input is not NULL or a DataFrame/Lazyframe")
}
df$select(self)$to_series(i)$to_r()
df$select(self)$to_series(i)$to_r(int64_conversion )
}
}

Expand Down
8 changes: 4 additions & 4 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,11 +161,11 @@ RPolarsDataFrame$dtype_strings <- function() .Call(wrap__RPolarsDataFrame__dtype

RPolarsDataFrame$schema <- function() .Call(wrap__RPolarsDataFrame__schema, self)

RPolarsDataFrame$to_list <- function() .Call(wrap__RPolarsDataFrame__to_list, self)
RPolarsDataFrame$to_list <- function(int64_conversion) .Call(wrap__RPolarsDataFrame__to_list, self, int64_conversion)

RPolarsDataFrame$to_list_unwind <- function() .Call(wrap__RPolarsDataFrame__to_list_unwind, self)
RPolarsDataFrame$to_list_unwind <- function(int64_conversion) .Call(wrap__RPolarsDataFrame__to_list_unwind, self, int64_conversion)

RPolarsDataFrame$to_list_tag_structs <- function() .Call(wrap__RPolarsDataFrame__to_list_tag_structs, self)
RPolarsDataFrame$to_list_tag_structs <- function(int64_conversion) .Call(wrap__RPolarsDataFrame__to_list_tag_structs, self, int64_conversion)

RPolarsDataFrame$equals <- function(other) .Call(wrap__RPolarsDataFrame__equals, self, other)

Expand Down Expand Up @@ -1165,7 +1165,7 @@ RPolarsSeries$sleep <- function(millis) .Call(wrap__RPolarsSeries__sleep, self,

RPolarsSeries$panic <- function() .Call(wrap__RPolarsSeries__panic, self)

RPolarsSeries$to_r <- function() .Call(wrap__RPolarsSeries__to_r, self)
RPolarsSeries$to_r <- function(int64_conversion) .Call(wrap__RPolarsSeries__to_r, self, int64_conversion)

RPolarsSeries$rename_mut <- function(name) invisible(.Call(wrap__RPolarsSeries__rename_mut, self, name))

Expand Down
21 changes: 18 additions & 3 deletions R/options.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ polars_optreq$debug_polars = list(must_be_bool = is_bool)
# polars_optenv$rpool_cap # active binding for getting value, not for
polars_optreq$rpool_cap = list() # rust-side options already check args

polars_optenv$int64_conversion = "double"
polars_optreq$int64_conversion = list(
acceptable_choices = function(x) !is.null(x) && x %in% c("bit64", "double", "string"),
bit64_is_attached = function(x) if (x == "bit64") x %in% .packages() else TRUE
)

## END OF DEFINED OPTIONS

Expand Down Expand Up @@ -62,9 +67,15 @@ polars_optreq$rpool_cap = list() # rust-side options already check args
#' @param no_messages Hide messages.
#' @param rpool_cap The maximum number of R sessions that can be used to process
#' R code in the background. See Details.
#' @param int64_conversion How should Int64 values be handled when converting a
#' polars object to R?
#'
#' * `"double"` (default) converts the integer values to double.
#' * `"bit64"` uses `bit64::as.integer64()` to do the conversion (requires
#' the package `bit64` to be attached).
#' * `"string"` converts Int64 values to character.
#'
#' @rdname pl_options
#' @docType NULL
#'
#' @return
#' `pl$options` returns a named list with the value (`TRUE` or `FALSE`) of
Expand Down Expand Up @@ -93,7 +104,8 @@ pl_set_options = function(
do_not_repeat_call = FALSE,
debug_polars = FALSE,
no_messages = FALSE,
rpool_cap = 4) {
rpool_cap = 4,
int64_conversion = c("bit64", "double", "string")) {
# only modify arguments that were explicitly written in the function call
# (otherwise calling set_options() twice in a row would reset the args
# modified in the first call)
Expand Down Expand Up @@ -152,14 +164,17 @@ pl_reset_options = function() {
assign("debug_polars", FALSE, envir = polars_optenv)
assign("no_messages", FALSE, envir = polars_optenv)
assign("rpool_cap", 4, envir = polars_optenv)
assign("int64_conversion ", "double", envir = polars_optenv)
}


translate_failures = \(x) {
lookups = c(
"must_be_scalar" = "Input must be of length one.",
"must_be_integer" = "Input must be an integer.",
"must_be_bool" = "Input must be TRUE or FALSE"
"must_be_bool" = "Input must be TRUE or FALSE.",
"acceptable_choices" = "`int64_conversion ` must be one of \"float\", \"string\", \"bit64\".",
"bit64_is_attached" = "Package `bit64` must be attached to use `int64_conversion = \"bit64\"`."
Comment on lines +176 to +177
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about use raw strings here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it would add much

)
trans = lookups[x]
trans[is.na(trans)] = x[is.na(trans)]
Expand Down
19 changes: 11 additions & 8 deletions R/series__series.R
Original file line number Diff line number Diff line change
Expand Up @@ -240,8 +240,9 @@ Series_shape = method_as_property(function() {

#' Get r vector/list
#' @description return R list (if polars Series is list) or vector (any other polars Series type)
#' @name Series_to_r
#' @rdname Series_to_r
#'
#' @inheritParams pl_set_options
#'
#' @return R list or vector
#' @keywords Series
#' @details
Expand Down Expand Up @@ -274,20 +275,21 @@ Series_shape = method_as_property(function() {
#' series_list$to_r() # as list because Series DataType is list
#' series_list$to_r_list() # implicit call as.list(), same as to_r() as already list
#' series_list$to_vector() # implicit call unlist(), append into a vector
Series_to_r = \() {
unwrap(.pr$Series$to_r(self), "in $to_r():")
Series_to_r = \(int64_conversion = pl$options$int64_conversion ) {
unwrap(.pr$Series$to_r(self, int64_conversion ), "in $to_r():")
}
# TODO replace list example with Series only syntax

#' @rdname Series_to_r
#' @name Series_to_vector
#' @description return R vector (implicit unlist)
#' @inheritParams pl_set_options
#' @return R vector
#' @keywords Series
#' series_vec = pl$Series(letters[1:3])
#' series_vec$to_vector()
Series_to_vector = \() {
unlist(unwrap(.pr$Series$to_r(self)), "in $to_vector():")
Series_to_vector = \(int64_conversion = pl$options$int64_conversion ) {
unlist(unwrap(.pr$Series$to_r(self, int64_conversion )), "in $to_vector():")
}

#' Alias to Series_to_vector (backward compatibility)
Expand All @@ -298,11 +300,12 @@ Series_to_r_vector = Series_to_vector
#' @rdname Series_to_r
#' @name Series_to_r_list
#' @description return R list (implicit as.list)
#' @inheritParams pl_set_options
#' @return R list
#' @keywords Series
#' @examples #
Series_to_r_list = \() {
as.list(unwrap(.pr$Series$to_r(self)), "in $to_r_list():")
Series_to_r_list = \(int64_conversion = pl$options$int64_conversion ) {
as.list(unwrap(.pr$Series$to_r(self, int64_conversion )), "in $to_r_list():")
}


Expand Down
11 changes: 10 additions & 1 deletion man/DataFrame_to_data_frame.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 16 additions & 1 deletion man/DataFrame_to_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 17 additions & 1 deletion man/Expr_to_r.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 13 additions & 3 deletions man/Series_to_r.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion man/pl_options.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading