From 1d962a42cc9b927a870879dc6de926d5682afd83 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sun, 14 Jan 2024 09:53:17 +0100 Subject: [PATCH 01/20] docs(website): do not use directory urls --- altdoc/mkdocs_static.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/altdoc/mkdocs_static.yml b/altdoc/mkdocs_static.yml index b090584bb..f918478d8 100644 --- a/altdoc/mkdocs_static.yml +++ b/altdoc/mkdocs_static.yml @@ -44,6 +44,8 @@ markdown_extensions: extra_css: - stylesheets/extra.css +use_directory_urls: false + ### Navigation tree nav: - Home: README.md From 4cb331cc83a76b83fb7556b12eeabf3840b3e98d Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sun, 14 Jan 2024 10:46:33 +0100 Subject: [PATCH 02/20] fix links --- DEVELOPMENT.md | 2 +- README.Rmd | 4 ++-- README.md | 29 ++++++++++++++++------------- vignettes/polars.Rmd | 12 ++++++------ 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 0fc05f0dd..bc85089de 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -31,7 +31,7 @@ About Rust code for R packages, see also ## Implementing new functions on the Rust side Here are the steps required for an example contribution, where we are implementing the -[cosine expression](https://rpolars.github.io/reference/Expr_cos/): +[cosine expression](https://rpolars.github.io/man/Expr_cos.html): 1. Look up the [polars.Expr.cos method in py-polars documentation](https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.cos.html). 2. Press the `[source]` button to see the [Python implementation](https://github.com/pola-rs/polars/blob/d23bbd2f14f1cd7ae2e27e1954a2dc4276501eef/py-polars/polars/expr/expr.py#L5892-L5914) diff --git a/README.Rmd b/README.Rmd index a713a06f7..c294143db 100644 --- a/README.Rmd +++ b/README.Rmd @@ -57,7 +57,7 @@ Sys.setenv(NOT_CRAN = "true") install.packages("polars", repos = "https://rpolars.r-universe.dev") ``` -[The "Install" vignette](https://rpolars.github.io/vignettes/install/) (`vignette("install", "polars")`) +[The "Install" vignette](https://rpolars.github.io/vignettes/install.html) (`vignette("install", "polars")`) gives more details on how to install this package and other ways to install it. ## Quickstart example @@ -92,7 +92,7 @@ df$sort("fruits")$select( ) ``` -The [Get Started vignette](https://rpolars.github.io/vignettes/polars/) (`vignette("polars")`) provides +The [Get Started vignette](https://rpolars.github.io/vignettes/polars.html) (`vignette("polars")`) provides a more detailed introduction to **polars**. ## Extensions diff --git a/README.md b/README.md index 976914f8f..c89e52a00 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,8 @@ Sys.setenv(NOT_CRAN = "true") install.packages("polars", repos = "https://rpolars.r-universe.dev") ``` -[The “Install” vignette](https://rpolars.github.io/vignettes/install/) +[The “Install” +vignette](https://rpolars.github.io/vignettes/install.html) (`vignette("install", "polars")`) gives more details on how to install this package and other ways to install it. @@ -83,20 +84,22 @@ df$sort("fruits")$select( pl$col("A")$sort_by("B")$over("fruits")$alias("sort_A_by_B_by_fruits") ) #> shape: (5, 8) -#> ┌────────┬────────┬───────────────────────┬─────┬───────────────┬─────────────────┬─────────────────┬───────────────────────┐ -#> │ fruits ┆ cars ┆ literal_string_fruits ┆ B ┆ sum_A_by_cars ┆ sum_A_by_fruits ┆ rev_A_by_fruits ┆ sort_A_by_B_by_fruits │ -#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ -#> │ str ┆ str ┆ str ┆ i32 ┆ i32 ┆ i32 ┆ i32 ┆ i32 │ -#> ╞════════╪════════╪═══════════════════════╪═════╪═══════════════╪═════════════════╪═════════════════╪═══════════════════════╡ -#> │ apple ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 7 ┆ 4 ┆ 4 │ -#> │ apple ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 7 ┆ 3 ┆ 3 │ -#> │ banana ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 8 ┆ 5 ┆ 5 │ -#> │ banana ┆ audi ┆ fruits ┆ 11 ┆ 2 ┆ 8 ┆ 2 ┆ 2 │ -#> │ banana ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 8 ┆ 1 ┆ 1 │ -#> └────────┴────────┴───────────────────────┴─────┴───────────────┴─────────────────┴─────────────────┴───────────────────────┘ +#> ┌────────┬────────┬──────────────┬─────┬──────────────┬──────────────┬──────────────┬──────────────┐ +#> │ fruits ┆ cars ┆ literal_stri ┆ B ┆ sum_A_by_car ┆ sum_A_by_fru ┆ rev_A_by_fru ┆ sort_A_by_B_ │ +#> │ --- ┆ --- ┆ ng_fruits ┆ --- ┆ s ┆ its ┆ its ┆ by_fruits │ +#> │ str ┆ str ┆ --- ┆ i32 ┆ --- ┆ --- ┆ --- ┆ --- │ +#> │ ┆ ┆ str ┆ ┆ i32 ┆ i32 ┆ i32 ┆ i32 │ +#> ╞════════╪════════╪══════════════╪═════╪══════════════╪══════════════╪══════════════╪══════════════╡ +#> │ apple ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 7 ┆ 4 ┆ 4 │ +#> │ apple ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 7 ┆ 3 ┆ 3 │ +#> │ banana ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 8 ┆ 5 ┆ 5 │ +#> │ banana ┆ audi ┆ fruits ┆ 11 ┆ 2 ┆ 8 ┆ 2 ┆ 2 │ +#> │ banana ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 8 ┆ 1 ┆ 1 │ +#> └────────┴────────┴──────────────┴─────┴──────────────┴──────────────┴──────────────┴──────────────┘ ``` -The [Get Started vignette](https://rpolars.github.io/vignettes/polars/) +The [Get Started +vignette](https://rpolars.github.io/vignettes/polars.html) (`vignette("polars")`) provides a more detailed introduction to **polars**. diff --git a/vignettes/polars.Rmd b/vignettes/polars.Rmd index dfed2f669..98e8e0550 100644 --- a/vignettes/polars.Rmd +++ b/vignettes/polars.Rmd @@ -225,9 +225,9 @@ dat$group_by("cyl")$mean() We can now start chaining together various methods (expressions) to manipulate it in different ways. For example, we can subset the data by rows -([`filter()`](https://rpolars.github.io/reference/DataFrame_filter/)) +([`filter()`](https://rpolars.github.io/man/DataFrame_filter.html)) and also columns -([`select()`](https://rpolars.github.io/reference/DataFrame_select/)): +([`select()`](https://rpolars.github.io/man/DataFrame_select.html)): ```{r} dat$filter(pl$col("cyl") == 6) @@ -254,7 +254,7 @@ so you can simultaneously transform it while you are subsetting. However, the result will exclude any columns that weren't specified as part of the expression. To modify or add some columns---whilst preserving all others in the dataset---it is therefore better to use the -[`with_columns()`](https://rpolars.github.io/reference/DataFrame_with_columns/) +[`with_columns()`](https://rpolars.github.io/man/DataFrame_with_columns.html) method. This next code chunk is equivalent to `mtcars |> dplyr::mutate(sum_mpg=sum(mpg), sum_hp=sum(hp), .by = cyl)`. @@ -276,7 +276,7 @@ dat$with_columns( Similarly, here's how we could have aggregated (i.e., collapsed) the dataset by groups instead of modifying them. We need simply invoke the `group_by()` and -[`agg()`](https://rpolars.github.io/reference/Expr_agg_groups/) methods. +[`agg()`](https://rpolars.github.io/man/Expr_agg_groups.html) methods. ```{r} dat$group_by( @@ -371,7 +371,7 @@ flights$join( ``` More information on the **polars** joining method can be found in the -[reference manual](https://rpolars.github.io/reference/DataFrame_join/). +[reference manual](https://rpolars.github.io/man/DataFrame_join.html). The package supports many other data manipulation operations, which we won't cover here. Hopefully, you will already have a sense of the key syntax features. @@ -387,7 +387,7 @@ performance. Delaying execution until the last possible moment allows Polars to apply automatic optimization to every query. Let's take a quick look. To create a so-called -"[LazyFrame](https://rpolars.github.io/reference/LazyFrame_class/)" from an +"[LazyFrame](https://rpolars.github.io/man/LazyFrame_class.html)" from an existing object in memory, we can invoke the `lazy()` constructor. ```{r} From bb6a8f7deb1da7a3fe21bfb11e451ceb26a17a5d Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sun, 14 Jan 2024 14:31:34 +0100 Subject: [PATCH 03/20] readme: try to revert change in table --- README.md | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index c89e52a00..9feb5ae14 100644 --- a/README.md +++ b/README.md @@ -84,18 +84,17 @@ df$sort("fruits")$select( pl$col("A")$sort_by("B")$over("fruits")$alias("sort_A_by_B_by_fruits") ) #> shape: (5, 8) -#> ┌────────┬────────┬──────────────┬─────┬──────────────┬──────────────┬──────────────┬──────────────┐ -#> │ fruits ┆ cars ┆ literal_stri ┆ B ┆ sum_A_by_car ┆ sum_A_by_fru ┆ rev_A_by_fru ┆ sort_A_by_B_ │ -#> │ --- ┆ --- ┆ ng_fruits ┆ --- ┆ s ┆ its ┆ its ┆ by_fruits │ -#> │ str ┆ str ┆ --- ┆ i32 ┆ --- ┆ --- ┆ --- ┆ --- │ -#> │ ┆ ┆ str ┆ ┆ i32 ┆ i32 ┆ i32 ┆ i32 │ -#> ╞════════╪════════╪══════════════╪═════╪══════════════╪══════════════╪══════════════╪══════════════╡ -#> │ apple ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 7 ┆ 4 ┆ 4 │ -#> │ apple ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 7 ┆ 3 ┆ 3 │ -#> │ banana ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 8 ┆ 5 ┆ 5 │ -#> │ banana ┆ audi ┆ fruits ┆ 11 ┆ 2 ┆ 8 ┆ 2 ┆ 2 │ -#> │ banana ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 8 ┆ 1 ┆ 1 │ -#> └────────┴────────┴──────────────┴─────┴──────────────┴──────────────┴──────────────┴──────────────┘ +#> ┌────────┬────────┬───────────────────────┬─────┬───────────────┬─────────────────┬─────────────────┬───────────────────────┐ +#> │ fruits ┆ cars ┆ literal_string_fruits ┆ B ┆ sum_A_by_cars ┆ sum_A_by_fruits ┆ rev_A_by_fruits ┆ sort_A_by_B_by_fruits │ +#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ +#> │ str ┆ str ┆ str ┆ i32 ┆ i32 ┆ i32 ┆ i32 ┆ i32 │ +#> ╞════════╪════════╪═══════════════════════╪═════╪═══════════════╪═════════════════╪═════════════════╪═══════════════════════╡ +#> │ apple ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 7 ┆ 4 ┆ 4 │ +#> │ apple ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 7 ┆ 3 ┆ 3 │ +#> │ banana ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 8 ┆ 5 ┆ 5 │ +#> │ banana ┆ audi ┆ fruits ┆ 11 ┆ 2 ┆ 8 ┆ 2 ┆ 2 │ +#> │ banana ┆ beetle ┆ fruits ┆ 11 ┆ 4 ┆ 8 ┆ 1 ┆ 1 │ +#> └────────┴────────┴───────────────────────┴─────┴───────────────┴─────────────────┴─────────────────┴───────────────────────┘ ``` The [Get Started From 66148a97f986ea0041f85f23fc2ace76f2816ddc Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sun, 14 Jan 2024 14:34:23 +0100 Subject: [PATCH 04/20] add pkgdown.yml --- pkgdown.yml | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 pkgdown.yml diff --git a/pkgdown.yml b/pkgdown.yml new file mode 100644 index 000000000..5eb60a1bf --- /dev/null +++ b/pkgdown.yml @@ -0,0 +1,8 @@ +articles: + polars: polars.html + install: install.html + userguide: userguide.html + performance: performance.html +urls: + reference: https://rpolars.github.io/man + article: https://rpolars.github.io/vignettes From bd3ba323a09c7b69a2cecd2b9d9f8fce39519be6 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sun, 14 Jan 2024 15:06:47 +0100 Subject: [PATCH 05/20] put pkgdown.yml in buildignore --- .Rbuildignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.Rbuildignore b/.Rbuildignore index c651a9022..22b6f570a 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -30,3 +30,4 @@ ^\.venv_altdoc$ ^Taskfile\.yml$ ^\.editorconfig$ +^pkgdown.yml$ From 934e9e9c7e6d8b763750d6158f4a9c3eacfe077e Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Sun, 14 Jan 2024 15:21:02 +0100 Subject: [PATCH 06/20] move pkgdown.yml to altdoc folder --- .Rbuildignore | 1 - pkgdown.yml => altdoc/pkgdown.yml | 0 2 files changed, 1 deletion(-) rename pkgdown.yml => altdoc/pkgdown.yml (100%) diff --git a/.Rbuildignore b/.Rbuildignore index 22b6f570a..c651a9022 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -30,4 +30,3 @@ ^\.venv_altdoc$ ^Taskfile\.yml$ ^\.editorconfig$ -^pkgdown.yml$ diff --git a/pkgdown.yml b/altdoc/pkgdown.yml similarity index 100% rename from pkgdown.yml rename to altdoc/pkgdown.yml From 838eafd6d846da026b8b11ac02a94e0cf6b74f3c Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Mon, 15 Jan 2024 13:03:40 +0100 Subject: [PATCH 07/20] try some stuff --- altdoc/altdoc_postprocessing.R | 128 +++++++++++++++++---------------- 1 file changed, 68 insertions(+), 60 deletions(-) diff --git a/altdoc/altdoc_postprocessing.R b/altdoc/altdoc_postprocessing.R index c0f3481d6..aded87963 100644 --- a/altdoc/altdoc_postprocessing.R +++ b/altdoc/altdoc_postprocessing.R @@ -1,83 +1,91 @@ ### To be run after altdoc::render_docs() +### +### Make the "Usage" section prettier (if there is one): +### DataFrame_describe(...) -> $describe() list_man_html = list.files("docs/man", pattern = "\\.html$", full.names = TRUE, recursive = TRUE ) -### Make the "Usage" section prettier (if there is one): -### DataFrame_describe(...) -> $describe() +patterns_replacements = rbind( + c("DataFrame_", "$"), + c("DynamicGroupBy_", "$"), + c("Expr_", "$"), + c("ExprBin_", "$bin$"), + c("ExprCat_", "$cat$"), + c("ExprDT_", "$dt$"), + c("ExprList_", "$list$"), + c("ExprMeta_", "$meta$"), + c("ExprName_", "$name$"), + c("ExprStr_", "$str$"), + c("ExprStruct_", "$struct$"), + c("GroupBy_", "$"), + # file names are "IO_read_", function names are "pl_read_" + c("(IO|pl)_read_", "pl$read_"), + # file names are "IO_scan_", function names are "pl_scan_" + c("(IO|pl)_scan_", "pl$scan_"), + # file names are "IO_sink_", function names are "LazyFrame_sink_" + c("(IO|LazyFrame)_sink_", "$sink_"), + # file names are "IO_write_", function names are "DataFrame_write_" + c("(IO|DataFrame)_write_", "$write_"), + c("LazyFrame_", "$"), + c("LazyGroupBy_", "$"), + c("pl_", "pl$"), + # Category "DataType" in the sidebar, but called with "pl$" + c("DataType_", "pl$"), + c("RField_", "$"), + c("RThreadHandle_", "$"), + c("Series_", "$"), + c("SQLContext_", "$") +) |> as.data.frame() -classes = c( - "Series", "DataFrame", "LazyFrame", "GroupBy", - "LazyGroupBy", "IO", "RField", "RThreadHandle", "SQLContext", "S3", - "Expr", "pl" -) +colnames(patterns_replacements) = c("pattern", "replacement") + + + +replace_in_usage = function(txt, which_class, replacement) { + usage_section = grep("

Usage

", txt) + if (length(usage_section) == 1) { + before_usage_idx = 1:usage_section + usage = txt[-before_usage_idx] + after_usage_idx = grep("", usage)[1] + usage = usage[1:(after_usage_idx - 1)] + new_usage = gsub(which_class, replacement, usage) + c( + txt[before_usage_idx], + new_usage, + txt[(length(before_usage_idx) + length(usage) + after_usage_idx):length(txt)] + ) + } else { + txt + } +} -to_modify = grep( - paste0("/", paste(classes, collapse = "|")), - list_man_html, - value = TRUE -) -for (i in to_modify) { - which_class = gsub("docs/man/([^_]+).*$", "\\1", i, perl = TRUE) + +for (i in list_man_html) { + which_class = paste0(gsub("docs/man/([^_]+).*$", "\\1", i, perl = TRUE), "_") orig = readLines(i, warn = FALSE) if (!any(grepl("

Usage

", orig))) { next } - # IO functions are DataFrame or LazyFrame methods - if (which_class == "IO") { - if (any(grepl("LazyFrame_sink", orig))) { - which_class <<- "LazyFrame" - } else if (any(grepl("DataFrame_write", orig))) { - which_class <<- "DataFrame" - } - } - - # prefix with pl$ for read/scan - if (which_class == "IO") { - which_input = if (any(grepl("read_", orig))) { - "read" - } else if (any(grepl("scan_", orig))) { - "scan" - } else { - "" - } - new = gsub( - paste0("", which_input, "_"), - paste0("pl$", which_input, "_"), - orig - ) - } else if (which_class == "pl") { - new = gsub( - "pl_", - "pl$", - orig - ) - } else if (which_class %in% c( - "ExprBin", "ExprCat", "ExprDT", "ExprList", - "ExprMeta", "ExprName", "ExprStr", "ExprStruct" - )) { - subns = tolower(gsub("Expr", "", which_class)) + replacement = patterns_replacements[ + patterns_replacements$pattern == which_class, + "replacement" + ] |> + gsub("<", "<", x = _) |> + gsub(">", ">", x = _) - new = gsub( - paste0("", which_class, "_"), - paste0("<Expr>$", subns, "$"), - orig - ) - } else { - new = gsub( - paste0("", which_class, "_"), - paste0("<", which_class, ">$"), - orig - ) + if (length(replacement) == 1) { + new = replace_in_usage(orig, which_class, replacement) } - # fix escaping of left-angle brackets (not needed for right-angle brackets) + # fix escaping of angle brackets new = gsub("\\\\<", "<", new) + new = gsub("\\\\>", ">", new) writeLines(new, i) } From 82ab074066a30cfe0384cbc0f9b67fdc28c5c77e Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Mon, 15 Jan 2024 14:05:16 +0100 Subject: [PATCH 08/20] init --- R/dataframe__frame.R | 7 +++- R/options.R | 12 +++++- src/rust/src/conversion_s_to_r.rs | 66 ++++++++++++++++++++----------- src/rust/src/rdataframe/mod.rs | 6 ++- 4 files changed, 64 insertions(+), 27 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 7d99c0a65..ff5df764b 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -883,9 +883,12 @@ DataFrame_to_data_frame = function(...) { #' pl$DataFrame(iris)$to_list() DataFrame_to_list = function(unnest_structs = TRUE) { if (unnest_structs) { - unwrap(.pr$DataFrame$to_list(self)) + .pr$DataFrame$to_list(self, pl$options$bigint_conversion) |> + unwrap("in $to_list():") } else { - restruct_list(unwrap(.pr$DataFrame$to_list_tag_structs(self))) + .pr$DataFrame$to_list_tag_structs(self) |> + unwrap("in $to_list():") |> + restruct_list() } } diff --git a/R/options.R b/R/options.R index e9992b190..97a45df39 100644 --- a/R/options.R +++ b/R/options.R @@ -25,6 +25,8 @@ polars_optreq$debug_polars = list(must_be_bool = is_bool) # polars_optenv$rpool_cap # active binding for getting value, not for polars_optreq$rpool_cap = list() # rust-side options already check args +polars_optenv$bigint_conversion = "real" +polars_optreq$bigint_conversion = c("bit64", "real", "string") ## END OF DEFINED OPTIONS @@ -62,6 +64,13 @@ polars_optreq$rpool_cap = list() # rust-side options already check args #' @param no_messages Hide messages. #' @param rpool_cap The maximum number of R sessions that can be used to process #' R code in the background. See Details. +#' @param bigint_conversion How should Int64 values be handled when converting a +#' polars object to R? +#' +#' * `"real"` (default) converts the values to Float64. +#' * `"bit64"` uses `bit64::as.integer64()` to do the conversion (requires +#' the package `bit64` to be installed). +#' * `"string"` converts Int64 values to character. #' #' @rdname pl_options #' @docType NULL @@ -93,7 +102,8 @@ pl_set_options = function( do_not_repeat_call = FALSE, debug_polars = FALSE, no_messages = FALSE, - rpool_cap = 4) { + rpool_cap = 4, + bigint_conversion = c("bit64", "real", "string")) { # only modify arguments that were explicitly written in the function call # (otherwise calling set_options() twice in a row would reset the args # modified in the first call) diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs index f6d380682..439b2d65d 100644 --- a/src/rust/src/conversion_s_to_r.rs +++ b/src/rust/src/conversion_s_to_r.rs @@ -1,8 +1,11 @@ +use crate::rdataframe::{RPolarsDataFrame, RPolarsLazyFrame}; +use crate::robj_to; +use crate::rpolarserr::{polars_to_rpolars_err, rerr, RResult, WithRctx}; use extendr_api::prelude::*; use polars::prelude::{self as pl}; - -use crate::rdataframe::RPolarsDataFrame; -use pl::PolarsError as pl_error; +use polars_core::chunked_array::ops::ChunkCast; +use polars_core::datatypes::DataType; +use polars_core::error::map_err; // #[extendr] // fn hello_bit64() -> Robj { @@ -24,10 +27,15 @@ use pl::PolarsError as pl_error; pub fn pl_series_to_list( series: &pl::Series, tag_structs: bool, - bit64: bool, + bigint_conversion: Robj, ) -> pl::PolarsResult { use pl::DataType::*; - fn to_list_recursive(s: &pl::Series, tag_structs: bool, bit64: bool) -> pl::PolarsResult { + fn to_list_recursive( + s: &pl::Series, + tag_structs: bool, + bigint_conversion: Robj, + ) -> pl::PolarsResult { + let bigint_conversion = robj_to!(str, bigint_conversion).unwrap(); match s.dtype() { Float64 => s.f64().map(|ca| ca.into_iter().collect_robj()), Float32 => s.f32().map(|ca| ca.into_iter().collect_robj()), @@ -35,23 +43,37 @@ pub fn pl_series_to_list( Int8 => s.i8().map(|ca| ca.into_iter().collect_robj()), Int16 => s.i16().map(|ca| ca.into_iter().collect_robj()), Int32 => s.i32().map(|ca| ca.into_iter().collect_robj()), - Int64 if bit64 => s.i64().map(|ca| { - ca.into_iter() - .map(|opt| match opt { - Some(x) if x != crate::utils::BIT64_NA_ECODING => { - let x = f64::from_bits(x as u64); - Some(x) - } - _ => { - let x = crate::utils::BIT64_NA_ECODING; - let x = f64::from_bits(x as u64); - Some(x) - } - }) - .collect_robj() - .set_class(&["integer64"]) - .expect("internal error could not set class label 'integer64'") - }), + Int64 => || match bigint_conversion { + "real" => s + .cast(&DataType::Float64)? + .f64() + .map(|ca| ca.into_iter().collect_robj()) + .ok(), + "string" => s + .cast(&DataType::String)? + .str() + .map(|ca| ca.into_iter().collect_robj()) + .ok(), + "bit64" => s.i64().map(|ca| { + ca.into_iter() + .map(|opt| match opt { + Some(x) if x != crate::utils::BIT64_NA_ECODING => { + let x = f64::from_bits(x as u64); + Some(x) + } + _ => { + let x = crate::utils::BIT64_NA_ECODING; + let x = f64::from_bits(x as u64); + Some(x) + } + }) + .collect_robj() + .set_class(&["integer64"]) + .expect("internal error could not set class label 'integer64'") + }).ok(), + _ => NA_INTEGER. + + }, Int64 => s.i64().map(|ca| { ca.into_iter() .map(|opt| opt.map(|val| val as f64)) diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 75c28a533..45fea5036 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -225,10 +225,12 @@ impl RPolarsDataFrame { // self.0.compare // } - pub fn to_list(&self) -> List { + pub fn to_list(&self, bigint_conversion: Robj) -> List { let robj_vec_res: Result, _> = collect_hinted_result( self.0.width(), - self.0.iter().map(|x| pl_series_to_list(x, false, true)), + self.0 + .iter() + .map(|x| pl_series_to_list(x, false, bigint_conversion)), ); let robj_list_res = robj_vec_res From 91b3490588eaa4586ef0de3b6f6ad5ad7e37ca01 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 16 Jan 2024 18:57:41 +0100 Subject: [PATCH 09/20] panic if unknown bigint_conversion [skip ci] --- src/rust/src/conversion_s_to_r.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs index 439b2d65d..aedadb939 100644 --- a/src/rust/src/conversion_s_to_r.rs +++ b/src/rust/src/conversion_s_to_r.rs @@ -1,11 +1,8 @@ -use crate::rdataframe::{RPolarsDataFrame, RPolarsLazyFrame}; use crate::robj_to; -use crate::rpolarserr::{polars_to_rpolars_err, rerr, RResult, WithRctx}; +use crate::rpolarserr::{polars_to_rpolars_err}; use extendr_api::prelude::*; use polars::prelude::{self as pl}; -use polars_core::chunked_array::ops::ChunkCast; use polars_core::datatypes::DataType; -use polars_core::error::map_err; // #[extendr] // fn hello_bit64() -> Robj { @@ -43,7 +40,7 @@ pub fn pl_series_to_list( Int8 => s.i8().map(|ca| ca.into_iter().collect_robj()), Int16 => s.i16().map(|ca| ca.into_iter().collect_robj()), Int32 => s.i32().map(|ca| ca.into_iter().collect_robj()), - Int64 => || match bigint_conversion { + Int64 => match bigint_conversion { "real" => s .cast(&DataType::Float64)? .f64() @@ -71,8 +68,7 @@ pub fn pl_series_to_list( .set_class(&["integer64"]) .expect("internal error could not set class label 'integer64'") }).ok(), - _ => NA_INTEGER. - + _ => panic!("foo"), }, Int64 => s.i64().map(|ca| { ca.into_iter() From 47cd85135512a7fd18f76a7e2ea8e81cb55cd322 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:06:42 +0100 Subject: [PATCH 10/20] revert change in altdoc script --- altdoc/altdoc_postprocessing.R | 132 ++++++++++++++++----------------- 1 file changed, 62 insertions(+), 70 deletions(-) diff --git a/altdoc/altdoc_postprocessing.R b/altdoc/altdoc_postprocessing.R index aded87963..0353b35eb 100644 --- a/altdoc/altdoc_postprocessing.R +++ b/altdoc/altdoc_postprocessing.R @@ -1,91 +1,83 @@ ### To be run after altdoc::render_docs() -### -### Make the "Usage" section prettier (if there is one): -### DataFrame_describe(...) -> $describe() list_man_html = list.files("docs/man", - pattern = "\\.html$", full.names = TRUE, - recursive = TRUE + pattern = "\\.html$", full.names = TRUE, + recursive = TRUE ) -patterns_replacements = rbind( - c("DataFrame_", "$"), - c("DynamicGroupBy_", "$"), - c("Expr_", "$"), - c("ExprBin_", "$bin$"), - c("ExprCat_", "$cat$"), - c("ExprDT_", "$dt$"), - c("ExprList_", "$list$"), - c("ExprMeta_", "$meta$"), - c("ExprName_", "$name$"), - c("ExprStr_", "$str$"), - c("ExprStruct_", "$struct$"), - c("GroupBy_", "$"), - # file names are "IO_read_", function names are "pl_read_" - c("(IO|pl)_read_", "pl$read_"), - # file names are "IO_scan_", function names are "pl_scan_" - c("(IO|pl)_scan_", "pl$scan_"), - # file names are "IO_sink_", function names are "LazyFrame_sink_" - c("(IO|LazyFrame)_sink_", "$sink_"), - # file names are "IO_write_", function names are "DataFrame_write_" - c("(IO|DataFrame)_write_", "$write_"), - c("LazyFrame_", "$"), - c("LazyGroupBy_", "$"), - c("pl_", "pl$"), - # Category "DataType" in the sidebar, but called with "pl$" - c("DataType_", "pl$"), - c("RField_", "$"), - c("RThreadHandle_", "$"), - c("Series_", "$"), - c("SQLContext_", "$") -) |> as.data.frame() - -colnames(patterns_replacements) = c("pattern", "replacement") - - - -replace_in_usage = function(txt, which_class, replacement) { - usage_section = grep("

Usage

", txt) - if (length(usage_section) == 1) { - before_usage_idx = 1:usage_section - usage = txt[-before_usage_idx] - after_usage_idx = grep("", usage)[1] - usage = usage[1:(after_usage_idx - 1)] - new_usage = gsub(which_class, replacement, usage) - c( - txt[before_usage_idx], - new_usage, - txt[(length(before_usage_idx) + length(usage) + after_usage_idx):length(txt)] - ) - } else { - txt - } -} +### Make the "Usage" section prettier (if there is one): +### DataFrame_describe(...) -> $describe() +classes = c( + "Series", "DataFrame", "LazyFrame", "GroupBy", + "LazyGroupBy", "IO", "RField", "RThreadHandle", "SQLContext", "S3", + "Expr", "pl" +) +to_modify = grep( + paste0("/", paste(classes, collapse = "|")), + list_man_html, + value = TRUE +) -for (i in list_man_html) { - which_class = paste0(gsub("docs/man/([^_]+).*$", "\\1", i, perl = TRUE), "_") +for (i in to_modify) { + which_class = gsub("docs/man/([^_]+).*$", "\\1", i, perl = TRUE) orig = readLines(i, warn = FALSE) if (!any(grepl("

Usage

", orig))) { next } - replacement = patterns_replacements[ - patterns_replacements$pattern == which_class, - "replacement" - ] |> - gsub("<", "<", x = _) |> - gsub(">", ">", x = _) + # IO functions are DataFrame or LazyFrame methods + if (which_class == "IO") { + if (any(grepl("LazyFrame_sink", orig))) { + which_class <<- "LazyFrame" + } else if (any(grepl("DataFrame_write", orig))) { + which_class <<- "DataFrame" + } + } + + # prefix with pl$ for read/scan + if (which_class == "IO") { + which_input = if (any(grepl("read_", orig))) { + "read" + } else if (any(grepl("scan_", orig))) { + "scan" + } else { + "" + } + new = gsub( + paste0("", which_input, "_"), + paste0("pl$", which_input, "_"), + orig + ) + } else if (which_class == "pl") { + new = gsub( + "pl_", + "pl$", + orig + ) + } else if (which_class %in% c( + "ExprBin", "ExprCat", "ExprDT", "ExprList", + "ExprMeta", "ExprName", "ExprStr", "ExprStruct" + )) { + subns = tolower(gsub("Expr", "", which_class)) - if (length(replacement) == 1) { - new = replace_in_usage(orig, which_class, replacement) + new = gsub( + paste0("", which_class, "_"), + paste0("<Expr>$", subns, "$"), + orig + ) + } else { + new = gsub( + paste0("", which_class, "_"), + paste0("<", which_class, ">$"), + orig + ) } - # fix escaping of angle brackets + # fix escaping of left-angle brackets (not needed for right-angle brackets) new = gsub("\\\\<", "<", new) - new = gsub("\\\\>", ">", new) writeLines(new, i) } From a05940287a31bd903b467bc1c5a1305a094127c3 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 16 Jan 2024 19:07:26 +0100 Subject: [PATCH 11/20] more [skip ci] --- altdoc/altdoc_postprocessing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/altdoc/altdoc_postprocessing.R b/altdoc/altdoc_postprocessing.R index 0353b35eb..c0f3481d6 100644 --- a/altdoc/altdoc_postprocessing.R +++ b/altdoc/altdoc_postprocessing.R @@ -1,8 +1,8 @@ ### To be run after altdoc::render_docs() list_man_html = list.files("docs/man", - pattern = "\\.html$", full.names = TRUE, - recursive = TRUE + pattern = "\\.html$", full.names = TRUE, + recursive = TRUE ) ### Make the "Usage" section prettier (if there is one): From f9bc07cff6cdbaa3e4bc1060a711aeb1cc2b41d5 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 17 Jan 2024 23:50:43 +0100 Subject: [PATCH 12/20] make it compile [skip ci] --- R/extendr-wrappers.R | 8 ++++---- R/series__series.R | 2 +- src/rust/src/conversion_s_to_r.rs | 31 ++++++++++++------------------- src/rust/src/rdataframe/mod.rs | 18 +++++++++++------- src/rust/src/series.rs | 19 ++++++++++--------- 5 files changed, 38 insertions(+), 40 deletions(-) diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 7da6a937e..2ea0f1d84 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -161,11 +161,11 @@ RPolarsDataFrame$dtype_strings <- function() .Call(wrap__RPolarsDataFrame__dtype RPolarsDataFrame$schema <- function() .Call(wrap__RPolarsDataFrame__schema, self) -RPolarsDataFrame$to_list <- function() .Call(wrap__RPolarsDataFrame__to_list, self) +RPolarsDataFrame$to_list <- function(bigint_conversion) .Call(wrap__RPolarsDataFrame__to_list, self, bigint_conversion) -RPolarsDataFrame$to_list_unwind <- function() .Call(wrap__RPolarsDataFrame__to_list_unwind, self) +RPolarsDataFrame$to_list_unwind <- function(bigint_conversion) .Call(wrap__RPolarsDataFrame__to_list_unwind, self, bigint_conversion) -RPolarsDataFrame$to_list_tag_structs <- function() .Call(wrap__RPolarsDataFrame__to_list_tag_structs, self) +RPolarsDataFrame$to_list_tag_structs <- function(bigint_conversion) .Call(wrap__RPolarsDataFrame__to_list_tag_structs, self, bigint_conversion) RPolarsDataFrame$equals <- function(other) .Call(wrap__RPolarsDataFrame__equals, self, other) @@ -1159,7 +1159,7 @@ RPolarsSeries$sleep <- function(millis) .Call(wrap__RPolarsSeries__sleep, self, RPolarsSeries$panic <- function() .Call(wrap__RPolarsSeries__panic, self) -RPolarsSeries$to_r <- function() .Call(wrap__RPolarsSeries__to_r, self) +RPolarsSeries$to_r <- function(bigint_conversion) .Call(wrap__RPolarsSeries__to_r, self, bigint_conversion) RPolarsSeries$rename_mut <- function(name) invisible(.Call(wrap__RPolarsSeries__rename_mut, self, name)) diff --git a/R/series__series.R b/R/series__series.R index 6f3cdecfc..c7e302139 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -275,7 +275,7 @@ Series_shape = method_as_property(function() { #' series_list$to_r_list() # implicit call as.list(), same as to_r() as already list #' series_list$to_vector() # implicit call unlist(), append into a vector Series_to_r = \() { - unwrap(.pr$Series$to_r(self), "in $to_r():") + unwrap(.pr$Series$to_r(self, pl$options$bigint_conversion), "in $to_r():") } # TODO replace list example with Series only syntax diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs index aedadb939..85911bc9d 100644 --- a/src/rust/src/conversion_s_to_r.rs +++ b/src/rust/src/conversion_s_to_r.rs @@ -1,6 +1,6 @@ -use crate::robj_to; -use crate::rpolarserr::{polars_to_rpolars_err}; +use crate::rdataframe::RPolarsDataFrame; use extendr_api::prelude::*; +use pl::PolarsError as pl_error; use polars::prelude::{self as pl}; use polars_core::datatypes::DataType; @@ -24,15 +24,14 @@ use polars_core::datatypes::DataType; pub fn pl_series_to_list( series: &pl::Series, tag_structs: bool, - bigint_conversion: Robj, + bigint_conversion: &str, ) -> pl::PolarsResult { use pl::DataType::*; fn to_list_recursive( s: &pl::Series, tag_structs: bool, - bigint_conversion: Robj, + bigint_conversion: &str, ) -> pl::PolarsResult { - let bigint_conversion = robj_to!(str, bigint_conversion).unwrap(); match s.dtype() { Float64 => s.f64().map(|ca| ca.into_iter().collect_robj()), Float32 => s.f32().map(|ca| ca.into_iter().collect_robj()), @@ -44,13 +43,11 @@ pub fn pl_series_to_list( "real" => s .cast(&DataType::Float64)? .f64() - .map(|ca| ca.into_iter().collect_robj()) - .ok(), + .map(|ca| ca.into_iter().collect_robj()), "string" => s .cast(&DataType::String)? .str() - .map(|ca| ca.into_iter().collect_robj()) - .ok(), + .map(|ca| ca.into_iter().collect_robj()), "bit64" => s.i64().map(|ca| { ca.into_iter() .map(|opt| match opt { @@ -67,14 +64,9 @@ pub fn pl_series_to_list( .collect_robj() .set_class(&["integer64"]) .expect("internal error could not set class label 'integer64'") - }).ok(), - _ => panic!("foo"), + }), + _ => panic!("foo"), }, - Int64 => s.i64().map(|ca| { - ca.into_iter() - .map(|opt| opt.map(|val| val as f64)) - .collect_robj() - }), UInt8 => s.u8().map(|ca| { ca.into_iter() .map(|opt| opt.map(|val| val as i32)) @@ -146,7 +138,8 @@ pub fn pl_series_to_list( Some(s) => { let s_ref = s.as_ref(); // is safe because s is read to generate new Robj, then discarded. - let inner_val = to_list_recursive(s_ref, tag_structs, bit64)?; + let inner_val = + to_list_recursive(s_ref, tag_structs, bigint_conversion)?; v.push(inner_val); } @@ -163,7 +156,7 @@ pub fn pl_series_to_list( } Struct(_) => { let df = s.clone().into_frame().unnest([s.name()]).unwrap(); - let mut l = RPolarsDataFrame(df).to_list_result()?; + let mut l = RPolarsDataFrame(df).to_list_result(bigint_conversion)?; //TODO contribute extendr_api set_attrib mutates &self, change signature to surprise anyone if tag_structs { @@ -242,5 +235,5 @@ pub fn pl_series_to_list( } } - to_list_recursive(series, tag_structs, bit64) + to_list_recursive(series, tag_structs, bigint_conversion) } diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 45fea5036..3cf9329c8 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -225,7 +225,7 @@ impl RPolarsDataFrame { // self.0.compare // } - pub fn to_list(&self, bigint_conversion: Robj) -> List { + pub fn to_list(&self, bigint_conversion: &str) -> List { let robj_vec_res: Result, _> = collect_hinted_result( self.0.width(), self.0 @@ -245,10 +245,12 @@ impl RPolarsDataFrame { } //this methods should only be used for benchmarking - pub fn to_list_unwind(&self) -> Robj { + pub fn to_list_unwind(&self, bigint_conversion: &str) -> Robj { let robj_vec_res: Result, _> = collect_hinted_result( self.0.width(), - self.0.iter().map(|x| pl_series_to_list(x, false, true)), + self.0 + .iter() + .map(|x| pl_series_to_list(x, false, bigint_conversion)), ); let robj_list_res = robj_vec_res @@ -264,11 +266,13 @@ impl RPolarsDataFrame { // to_list have this variant with set_structs = true at pl_series_to_list // does not expose this arg in to_list as it is quite niche and might be deprecated later - pub fn to_list_tag_structs(&self) -> List { + pub fn to_list_tag_structs(&self, bigint_conversion: &str) -> List { //convert DataFrame to Result of to R vectors, error if DataType is not supported let robj_vec_res: Result, _> = collect_hinted_result( self.0.width(), - self.0.iter().map(|x| pl_series_to_list(x, true, true)), + self.0 + .iter() + .map(|x| pl_series_to_list(x, true, bigint_conversion)), ); //rewrap Ok(Vec) as R list @@ -510,12 +514,12 @@ impl RPolarsDataFrame { } impl RPolarsDataFrame { - pub fn to_list_result(&self) -> Result { + pub fn to_list_result(&self, bigint_conversion: &str) -> Result { //convert DataFrame to Result of to R vectors, error if DataType is not supported let robj_vec_res: Result, _> = self .0 .iter() - .map(|s| pl_series_to_list(s, true, true)) + .map(|s| pl_series_to_list(s, true, bigint_conversion)) .collect(); //rewrap Ok(Vec) as R list diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index 31f25b8f9..9be6b158e 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -79,8 +79,9 @@ impl RPolarsSeries { panic!("somebody panicked on purpose"); } - pub fn to_r(&self) -> std::result::Result { - pl_series_to_list(&self.0, true, true).map_err(|err| format!("in to_r: {:?}", err)) + pub fn to_r(&self, bigint_conversion: &str) -> std::result::Result { + pl_series_to_list(&self.0, true, bigint_conversion) + .map_err(|err| format!("in to_r: {:?}", err)) } //any mut method exposed in R suffixed _mut pub fn rename_mut(&mut self, name: &str) { @@ -437,27 +438,27 @@ impl RPolarsSeries { } pub fn mean(&self) -> Result { - RPolarsSeries(self.0.mean_as_series()).to_r() + RPolarsSeries(self.0.mean_as_series()).to_r("float") } pub fn median(&self) -> Result { let s = self.0.median_as_series().map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r() + RPolarsSeries(s).to_r("float") } pub fn min(&self) -> Result { let s = self.0.min_as_series().map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r() + RPolarsSeries(s).to_r("float") } pub fn max(&self) -> Result { let s = self.0.max_as_series().map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r() + RPolarsSeries(s).to_r("float") } pub fn sum(&self) -> Result { let s = self.0.sum_as_series().map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r() + RPolarsSeries(s).to_r("float") } pub fn std(&self, ddof: Robj) -> Result { @@ -465,7 +466,7 @@ impl RPolarsSeries { .0 .std_as_series(robj_to!(u8, ddof)?) .map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r() + RPolarsSeries(s).to_r("float") } pub fn var(&self, ddof: Robj) -> Result { @@ -473,7 +474,7 @@ impl RPolarsSeries { .0 .var_as_series(robj_to!(u8, ddof)?) .map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r() + RPolarsSeries(s).to_r("float") } pub fn ceil(&self) -> List { From 886652860aefeed1c9d24a3e01f7979fe9ec96fd Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 18 Jan 2024 14:37:09 +0100 Subject: [PATCH 13/20] fix check for "bigint_conversion" --- R/options.R | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/R/options.R b/R/options.R index 97a45df39..d15c8ae8e 100644 --- a/R/options.R +++ b/R/options.R @@ -26,7 +26,9 @@ polars_optreq$debug_polars = list(must_be_bool = is_bool) polars_optreq$rpool_cap = list() # rust-side options already check args polars_optenv$bigint_conversion = "real" -polars_optreq$bigint_conversion = c("bit64", "real", "string") +polars_optreq$bigint_conversion = list(acceptable_choices = function(x) { + !is.null(x) && x %in% c("bit64", "real", "string") +}) ## END OF DEFINED OPTIONS @@ -162,6 +164,7 @@ pl_reset_options = function() { assign("debug_polars", FALSE, envir = polars_optenv) assign("no_messages", FALSE, envir = polars_optenv) assign("rpool_cap", 4, envir = polars_optenv) + assign("²bigint_conversion", "real", envir = polars_optenv) } From 4038d6dc4e4e11e5bcf9eee7ed68e8bd1db44497 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 18 Jan 2024 16:15:21 +0100 Subject: [PATCH 14/20] document and test --- R/dataframe__frame.R | 12 ++++---- R/options.R | 22 ++++++++------- R/series__series.R | 9 +++--- man/DataFrame_to_data_frame.Rd | 11 +++++++- man/DataFrame_to_list.Rd | 14 +++++++++- man/Series_to_r.Rd | 12 +++++++- man/pl_options.Rd | 12 +++++++- src/rust/src/conversion_s_to_r.rs | 4 +-- tests/testthat/test-options.R | 46 +++++++++++++++++++++++++++++++ 9 files changed, 117 insertions(+), 25 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 082aa5a8c..dd209670e 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -845,15 +845,16 @@ DataFrame_group_by = function(..., maintain_order = pl$options$maintain_order) { #' Return Polars DataFrame as R data.frame #' #' @param ... Any args pased to `as.data.frame()`. +#' @inheritParams pl_set_options #' #' @return An R data.frame #' @keywords DataFrame #' @examples #' df = pl$DataFrame(iris[1:3, ]) #' df$to_data_frame() -DataFrame_to_data_frame = function(...) { +DataFrame_to_data_frame = function(..., bigint_conversion = pl$options$bigint_conversion) { # do not unnest structs and mark with I to also preserve categoricals as is - l = lapply(self$to_list(unnest_structs = FALSE), I) + l = lapply(self$to_list(unnest_structs = FALSE, bigint_conversion), I) # similar to as.data.frame, but avoid checks, whcih would edit structs df = data.frame(seq_along(l[[1L]]), ...) @@ -870,6 +871,7 @@ DataFrame_to_data_frame = function(...) { #' #' @param unnest_structs Boolean. If `TRUE` (default), then `$unnest()` is applied #' on any struct column. +#' @inheritParams pl_set_options #' #' @details #' For simplicity reasons, this implementation relies on unnesting all structs @@ -881,12 +883,12 @@ DataFrame_to_data_frame = function(...) { #' @keywords DataFrame #' @examples #' pl$DataFrame(iris)$to_list() -DataFrame_to_list = function(unnest_structs = TRUE) { +DataFrame_to_list = function(unnest_structs = TRUE, bigint_conversion = pl$options$bigint_conversion) { if (unnest_structs) { - .pr$DataFrame$to_list(self, pl$options$bigint_conversion) |> + .pr$DataFrame$to_list(self, bigint_conversion) |> unwrap("in $to_list():") } else { - .pr$DataFrame$to_list_tag_structs(self) |> + .pr$DataFrame$to_list_tag_structs(self, bigint_conversion) |> unwrap("in $to_list():") |> restruct_list() } diff --git a/R/options.R b/R/options.R index d15c8ae8e..6b269f7b2 100644 --- a/R/options.R +++ b/R/options.R @@ -25,10 +25,11 @@ polars_optreq$debug_polars = list(must_be_bool = is_bool) # polars_optenv$rpool_cap # active binding for getting value, not for polars_optreq$rpool_cap = list() # rust-side options already check args -polars_optenv$bigint_conversion = "real" -polars_optreq$bigint_conversion = list(acceptable_choices = function(x) { - !is.null(x) && x %in% c("bit64", "real", "string") -}) +polars_optenv$bigint_conversion = "float" +polars_optreq$bigint_conversion = list( + acceptable_choices = function(x) !is.null(x) && x %in% c("bit64", "float", "string"), + bit64_is_attached = function(x) if (x == "bit64") x %in% .packages() else TRUE +) ## END OF DEFINED OPTIONS @@ -69,13 +70,12 @@ polars_optreq$bigint_conversion = list(acceptable_choices = function(x) { #' @param bigint_conversion How should Int64 values be handled when converting a #' polars object to R? #' -#' * `"real"` (default) converts the values to Float64. +#' * `"float"` (default) converts the values to Float64. #' * `"bit64"` uses `bit64::as.integer64()` to do the conversion (requires -#' the package `bit64` to be installed). +#' the package `bit64` to be attached). #' * `"string"` converts Int64 values to character. #' #' @rdname pl_options -#' @docType NULL #' #' @return #' `pl$options` returns a named list with the value (`TRUE` or `FALSE`) of @@ -105,7 +105,7 @@ pl_set_options = function( debug_polars = FALSE, no_messages = FALSE, rpool_cap = 4, - bigint_conversion = c("bit64", "real", "string")) { + bigint_conversion = c("bit64", "float", "string")) { # only modify arguments that were explicitly written in the function call # (otherwise calling set_options() twice in a row would reset the args # modified in the first call) @@ -164,7 +164,7 @@ pl_reset_options = function() { assign("debug_polars", FALSE, envir = polars_optenv) assign("no_messages", FALSE, envir = polars_optenv) assign("rpool_cap", 4, envir = polars_optenv) - assign("²bigint_conversion", "real", envir = polars_optenv) + assign("bigint_conversion", "float", envir = polars_optenv) } @@ -172,7 +172,9 @@ translate_failures = \(x) { lookups = c( "must_be_scalar" = "Input must be of length one.", "must_be_integer" = "Input must be an integer.", - "must_be_bool" = "Input must be TRUE or FALSE" + "must_be_bool" = "Input must be TRUE or FALSE.", + "acceptable_choices" = "`bigint_conversion` must be one of \"float\", \"string\", \"bit64\".", + "bit64_is_attached" = "Package `bit64` must be attached to use `bigint_conversion = \"bit64\"`." ) trans = lookups[x] trans[is.na(trans)] = x[is.na(trans)] diff --git a/R/series__series.R b/R/series__series.R index c7e302139..3d1e533f8 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -240,8 +240,9 @@ Series_shape = method_as_property(function() { #' Get r vector/list #' @description return R list (if polars Series is list) or vector (any other polars Series type) -#' @name Series_to_r -#' @rdname Series_to_r +#' +#' @inheritParams pl_set_options +#' #' @return R list or vector #' @keywords Series #' @details @@ -274,8 +275,8 @@ Series_shape = method_as_property(function() { #' series_list$to_r() # as list because Series DataType is list #' series_list$to_r_list() # implicit call as.list(), same as to_r() as already list #' series_list$to_vector() # implicit call unlist(), append into a vector -Series_to_r = \() { - unwrap(.pr$Series$to_r(self, pl$options$bigint_conversion), "in $to_r():") +Series_to_r = \(bigint_conversion = pl$options$bigint_conversion) { + unwrap(.pr$Series$to_r(self, bigint_conversion), "in $to_r():") } # TODO replace list example with Series only syntax diff --git a/man/DataFrame_to_data_frame.Rd b/man/DataFrame_to_data_frame.Rd index a63c0488b..48cc6b386 100644 --- a/man/DataFrame_to_data_frame.Rd +++ b/man/DataFrame_to_data_frame.Rd @@ -4,10 +4,19 @@ \alias{DataFrame_to_data_frame} \title{Return Polars DataFrame as R data.frame} \usage{ -DataFrame_to_data_frame(...) +DataFrame_to_data_frame(..., bigint_conversion = pl$options$bigint_conversion) } \arguments{ \item{...}{Any args pased to \code{as.data.frame()}.} + +\item{bigint_conversion}{How should Int64 values be handled when converting a +polars object to R? +\itemize{ +\item \code{"float"} (default) converts the values to Float64. +\item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires +the package \code{bit64} to be attached). +\item \code{"string"} converts Int64 values to character. +}} } \value{ An R data.frame diff --git a/man/DataFrame_to_list.Rd b/man/DataFrame_to_list.Rd index d2b67c438..ee8f9b961 100644 --- a/man/DataFrame_to_list.Rd +++ b/man/DataFrame_to_list.Rd @@ -4,11 +4,23 @@ \alias{DataFrame_to_list} \title{Return Polars DataFrame as a list of vectors} \usage{ -DataFrame_to_list(unnest_structs = TRUE) +DataFrame_to_list( + unnest_structs = TRUE, + bigint_conversion = pl$options$bigint_conversion +) } \arguments{ \item{unnest_structs}{Boolean. If \code{TRUE} (default), then \verb{$unnest()} is applied on any struct column.} + +\item{bigint_conversion}{How should Int64 values be handled when converting a +polars object to R? +\itemize{ +\item \code{"float"} (default) converts the values to Float64. +\item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires +the package \code{bit64} to be attached). +\item \code{"string"} converts Int64 values to character. +}} } \value{ R list of vectors diff --git a/man/Series_to_r.Rd b/man/Series_to_r.Rd index 2a8ce2fd5..e1de23a2e 100644 --- a/man/Series_to_r.Rd +++ b/man/Series_to_r.Rd @@ -6,12 +6,22 @@ \alias{Series_to_r_list} \title{Get r vector/list} \usage{ -Series_to_r() +Series_to_r(bigint_conversion = pl$options$bigint_conversion) Series_to_vector() Series_to_r_list() } +\arguments{ +\item{bigint_conversion}{How should Int64 values be handled when converting a +polars object to R? +\itemize{ +\item \code{"float"} (default) converts the values to Float64. +\item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires +the package \code{bit64} to be attached). +\item \code{"string"} converts Int64 values to character. +}} +} \value{ R list or vector diff --git a/man/pl_options.Rd b/man/pl_options.Rd index d16e91ba3..fc1d23074 100644 --- a/man/pl_options.Rd +++ b/man/pl_options.Rd @@ -12,7 +12,8 @@ pl_set_options( do_not_repeat_call = FALSE, debug_polars = FALSE, no_messages = FALSE, - rpool_cap = 4 + rpool_cap = 4, + bigint_conversion = c("bit64", "float", "string") ) pl_reset_options() @@ -36,6 +37,15 @@ messages. The default (\code{FALSE}) is to show them.} \item{rpool_cap}{The maximum number of R sessions that can be used to process R code in the background. See Details.} + +\item{bigint_conversion}{How should Int64 values be handled when converting a +polars object to R? +\itemize{ +\item \code{"float"} (default) converts the values to Float64. +\item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires +the package \code{bit64} to be attached). +\item \code{"string"} converts Int64 values to character. +}} } \value{ \code{pl$options} returns a named list with the value (\code{TRUE} or \code{FALSE}) of diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs index 85911bc9d..c07f35fac 100644 --- a/src/rust/src/conversion_s_to_r.rs +++ b/src/rust/src/conversion_s_to_r.rs @@ -40,7 +40,7 @@ pub fn pl_series_to_list( Int16 => s.i16().map(|ca| ca.into_iter().collect_robj()), Int32 => s.i32().map(|ca| ca.into_iter().collect_robj()), Int64 => match bigint_conversion { - "real" => s + "float" => s .cast(&DataType::Float64)? .f64() .map(|ca| ca.into_iter().collect_robj()), @@ -65,7 +65,7 @@ pub fn pl_series_to_list( .set_class(&["integer64"]) .expect("internal error could not set class label 'integer64'") }), - _ => panic!("foo"), + _ => panic!("`bigint_conversion` must be one of 'float', 'string', 'bit64'"), }, UInt8 => s.u8().map(|ca| { ca.into_iter() diff --git a/tests/testthat/test-options.R b/tests/testthat/test-options.R index e21224c29..4a4c028c6 100644 --- a/tests/testthat/test-options.R +++ b/tests/testthat/test-options.R @@ -52,3 +52,49 @@ test_that("pl$options$ read-write", { "arg-name does not match any defined args of `?set_options`" ) }) + + +test_that("option 'bigint_conversion' works", { + pl$reset_options() + df = pl$DataFrame(a = c(1:3, NA), schema = list(a = pl$Int64)) + + # default is to convert Int64 to float + expect_identical( + df$to_list(), + list(a = c(1, 2, 3, NA)) + ) + + # can convert to string + pl$set_options(bigint_conversion = "string") + expect_identical( + df$to_list(), + list(a = c("1", "2", "3", NA)) + ) + + # can convert to bit64, but *only* if bit64 is attached + try(detach("package:bit64"), silent = TRUE) + expect_error( + pl$set_options(bigint_conversion = "bit64"), + "must be attached" + ) + skip_if_not_installed("bit64") + suppressPackageStartupMessages(library(bit64)) + pl$set_options(bigint_conversion = "bit64") + expect_identical( + df$to_list(), + list(a = as.integer64(c(1, 2, 3, NA))) + ) + + # can override the global option by passing a custom arg + # option currently is "bit64" + expect_identical( + df$to_list(bigint_conversion = "string"), + list(a = c("1", "2", "3", NA)) + ) + + # arg correctly passed from to_data_frame() to to_list() + expect_identical( + df$to_data_frame(bigint_conversion = "string"), + data.frame(a = c("1", "2", "3", NA)) + ) +}) From 39eb2a8cb8773dc3807e37b0f12188d9669804fa Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 18 Jan 2024 16:18:04 +0100 Subject: [PATCH 15/20] bump news --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index b85731b3f..4422f31ff 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,10 @@ - New methods `$list$any()` and `$list$all()` (#709). - New function `pl$from_epoch()` to convert a Unix timestamp to a date(time) variable (#708). +- New option `bigint_conversion` to specify how Int64 columns (that don't have + equivalent in base R) should be converted. This option can either be set + globally with `pl$set_options()` or on a case-by-case basis, e.g with + `$to_data_frame(bigint_conversion =)` (#706). ## polars 0.12.2 From c74f8f88c9d6032d996e3f34a0e6d6b6b1cadeb8 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 18 Jan 2024 16:38:16 +0100 Subject: [PATCH 16/20] fix tests --- R/expr__expr.R | 7 ++++--- R/series__series.R | 10 ++++++---- man/Series_to_r.Rd | 4 ++-- tests/testthat/test-bit64.R | 19 +++++++------------ tests/testthat/test-dataframe.R | 2 -- tests/testthat/test-expr_datetime.R | 24 ++++++++---------------- tests/testthat/test-lazy.R | 1 - tests/testthat/test-options.R | 4 ++-- 8 files changed, 29 insertions(+), 42 deletions(-) diff --git a/R/expr__expr.R b/R/expr__expr.R index 825bde3c9..ef745eb0e 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -3177,17 +3177,18 @@ Expr_rep_extend = function(expr, n, rechunk = TRUE, upcast = TRUE) { #' Otherwise, provide a DataFrame that the Expr should be evaluated in. #' @param i Numeric column to extract. Default is zero (which gives the first #' column). +#' @inheritParams pl_set_options #' @return R object #' @examples #' pl$lit(1:3)$to_r() -Expr_to_r = function(df = NULL, i = 0) { +Expr_to_r = function(df = NULL, i = 0, bigint_conversion = pl$options$bigint_conversion) { if (is.null(df)) { - pl$select(self)$to_series(i)$to_r() + pl$select(self)$to_series(i)$to_r(bigint_conversion) } else { if (!inherits(df, c("RPolarsDataFrame"))) { stop("Expr_to_r: input is not NULL or a DataFrame/Lazyframe") } - df$select(self)$to_series(i)$to_r() + df$select(self)$to_series(i)$to_r(bigint_conversion) } } diff --git a/R/series__series.R b/R/series__series.R index 3d1e533f8..697d1d004 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -283,12 +283,13 @@ Series_to_r = \(bigint_conversion = pl$options$bigint_conversion) { #' @rdname Series_to_r #' @name Series_to_vector #' @description return R vector (implicit unlist) +#' @inheritParams pl_set_options #' @return R vector #' @keywords Series #' series_vec = pl$Series(letters[1:3]) #' series_vec$to_vector() -Series_to_vector = \() { - unlist(unwrap(.pr$Series$to_r(self)), "in $to_vector():") +Series_to_vector = \(bigint_conversion = pl$options$bigint_conversion) { + unlist(unwrap(.pr$Series$to_r(self, bigint_conversion)), "in $to_vector():") } #' Alias to Series_to_vector (backward compatibility) @@ -299,11 +300,12 @@ Series_to_r_vector = Series_to_vector #' @rdname Series_to_r #' @name Series_to_r_list #' @description return R list (implicit as.list) +#' @inheritParams pl_set_options #' @return R list #' @keywords Series #' @examples # -Series_to_r_list = \() { - as.list(unwrap(.pr$Series$to_r(self)), "in $to_r_list():") +Series_to_r_list = \(bigint_conversion = pl$options$bigint_conversion) { + as.list(unwrap(.pr$Series$to_r(self, bigint_conversion)), "in $to_r_list():") } diff --git a/man/Series_to_r.Rd b/man/Series_to_r.Rd index e1de23a2e..ea778b510 100644 --- a/man/Series_to_r.Rd +++ b/man/Series_to_r.Rd @@ -8,9 +8,9 @@ \usage{ Series_to_r(bigint_conversion = pl$options$bigint_conversion) -Series_to_vector() +Series_to_vector(bigint_conversion = pl$options$bigint_conversion) -Series_to_r_list() +Series_to_r_list(bigint_conversion = pl$options$bigint_conversion) } \arguments{ \item{bigint_conversion}{How should Int64 values be handled when converting a diff --git a/tests/testthat/test-bit64.R b/tests/testthat/test-bit64.R index ffb70c7fb..e31e729cb 100644 --- a/tests/testthat/test-bit64.R +++ b/tests/testthat/test-bit64.R @@ -1,29 +1,24 @@ test_that("from r to series and reverse", { + skip_if_not_installed("bit64") # R to series - testthat::skip_if_not_installed("bit64") values = c(-1, 0, 1, NA, 2^61, -2^61) s_act = pl$Series(bit64::as.integer64(values)) s_ref = pl$lit(values)$cast(pl$Int64)$to_series() expect_true(all((s_act == s_ref)$to_r(), na.rm = TRUE)) - # sereis to R - r_act = s_act$to_r() - r_ref = bit64::as.integer64(values) - expect_identical( - r_act, r_ref - ) + # series to R + expect_identical(s_act$to_r(), values) # lit scalar - expect_identical(pl$lit(bit64::as.integer64(5))$to_r(), bit64::as.integer64(5)) - expect_identical(pl$lit(bit64::as.integer64(NA))$to_r(), bit64::as.integer64(NA)) + expect_identical(pl$lit(5)$to_r(), 5) + expect_identical(pl$lit(NA)$to_r(), NA) # lit series - expect_identical(pl$lit(bit64::as.integer64(c(NA, 5)))$to_r(), bit64::as.integer64(c(NA, 5))) + expect_identical(pl$lit(c(NA, 5))$to_r(), c(NA, 5)) }) test_that("robj_to! from bit64", { - testthat::skip_if_not_installed("bit64") - + skip_if_not_installed("bit64") expect_identical( unwrap(test_robj_to_f64(bit64::as.integer64(1))), "1.0" diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R index aa063dd57..55d00cb66 100644 --- a/tests/testthat/test-dataframe.R +++ b/tests/testthat/test-dataframe.R @@ -544,8 +544,6 @@ test_that("simple translations", { test_that("null_count 64bit", { - skip_if_not_installed("bit64") - suppressPackageStartupMessages(library("bit64", quietly = TRUE)) tmp = mtcars tmp[1:2, 1:2] = NA tmp[5, 3] = NA diff --git a/tests/testthat/test-expr_datetime.R b/tests/testthat/test-expr_datetime.R index ed18856a8..cd3c9329f 100644 --- a/tests/testthat/test-expr_datetime.R +++ b/tests/testthat/test-expr_datetime.R @@ -289,7 +289,6 @@ test_that("dt$year iso_year", { test_that("dt$quarter, month, day", { - skip_if_not_installed("bit64") df = pl$DataFrame( date = pl$date_range( as.Date("2020-12-25"), @@ -324,7 +323,6 @@ test_that("dt$quarter, month, day", { test_that("hour minute", { - skip_if_not_installed("bit64") df = pl$DataFrame( date = pl$date_range( as.Date("2020-12-25"), @@ -373,7 +371,6 @@ test_that("hour minute", { test_that("second, milli, micro, nano", { - skip_if_not_installed("bit64") df = pl$DataFrame( date = pl$date_range( @@ -506,7 +503,6 @@ test_that("offset_by", { test_that("dt$epoch", { - skip_if_not_installed("bit64") df = pl$select( pl$date_range(as.Date("2022-1-1"), eager = FALSE)$dt$epoch("ns")$alias("e_ns"), @@ -538,7 +534,6 @@ test_that("dt$epoch", { test_that("dt$timestamp", { - skip_if_not_installed("bit64") df = pl$DataFrame( date = pl$date_range( @@ -742,18 +737,15 @@ test_that("replace_time_zone for ambiguous time", { test_that("dt$days, dt$hours, dt$mminutes, dt$seconds, + ms, us, ns", { - skip_if_not_installed("bit64") # diff with settable units - diffy = \(x, units) bit64::as.integer64(as.numeric(diff(x), units = units)) - diffy2 = \(x, units) (as.numeric(diff(x), units = units)) - NA64 = bit64::NA_integer64_ + diffy = \(x, units) as.numeric(diff(x), units = units) # days df = pl$DataFrame(date = pl$date_range( start = as.Date("2020-3-1"), end = as.Date("2020-5-1"), interval = "1mo", eager = TRUE ))$with_columns( pl$col("date")$diff()$dt$total_days()$alias("diff") )$to_list() - expect_identical(df$diff, c(NA64, diffy(df$date, "days"))) + expect_identical(df$diff, c(NA, diffy(df$date, "days"))) # hours df = pl$DataFrame(date = pl$date_range( @@ -761,7 +753,7 @@ test_that("dt$days, dt$hours, dt$mminutes, dt$seconds, + ms, us, ns", { ))$with_columns( pl$col("date")$diff()$dt$total_hours()$alias("diff") )$to_list() - expect_identical(df$diff, c(NA64, diffy(df$date, "hours"))) + expect_identical(df$diff, c(NA, diffy(df$date, "hours"))) # minutes df = pl$DataFrame(date = pl$date_range( @@ -769,7 +761,7 @@ test_that("dt$days, dt$hours, dt$mminutes, dt$seconds, + ms, us, ns", { ))$with_columns( pl$col("date")$diff()$dt$total_minutes()$alias("diff") )$to_list() - expect_identical(df$diff, c(NA64, diffy(df$date, "mins"))) + expect_identical(df$diff, c(NA, diffy(df$date, "mins"))) # seconds df = pl$DataFrame(date = pl$date_range( @@ -778,7 +770,7 @@ test_that("dt$days, dt$hours, dt$mminutes, dt$seconds, + ms, us, ns", { ))$with_columns( pl$col("date")$diff()$dt$total_seconds()$alias("diff") )$to_list() - expect_identical(df$diff, c(NA64, diffy(df$date, "secs"))) + expect_identical(df$diff, c(NA, diffy(df$date, "secs"))) # milliseconds @@ -788,7 +780,7 @@ test_that("dt$days, dt$hours, dt$mminutes, dt$seconds, + ms, us, ns", { ))$with_columns( pl$col("date")$diff()$dt$total_milliseconds()$alias("diff") )$to_list() - expect_identical(df$diff, bit64::as.integer64(c(NA, diffy2(df$date, "secs")) * 1000)) + expect_identical(df$diff, c(NA, diffy(df$date, "secs")) * 1000) # microseconds df = pl$DataFrame(date = pl$date_range( @@ -797,7 +789,7 @@ test_that("dt$days, dt$hours, dt$mminutes, dt$seconds, + ms, us, ns", { ))$with_columns( pl$col("date")$diff()$dt$total_microseconds()$alias("diff") )$to_list() - expect_identical(df$diff, bit64::as.integer64(c(NA, diffy2(df$date, "secs")) * 1E6)) + expect_identical(df$diff, c(NA, diffy(df$date, "secs")) * 1E6) # nanoseconds df = pl$DataFrame(date = pl$date_range( @@ -806,7 +798,7 @@ test_that("dt$days, dt$hours, dt$mminutes, dt$seconds, + ms, us, ns", { ))$with_columns( pl$col("date")$diff()$dt$total_nanoseconds()$alias("diff") )$to_list() - expect_identical(df$diff, bit64::as.integer64(c(NA, diffy2(df$date, "secs")) * 1E9)) + expect_identical(df$diff, c(NA, diffy(df$date, "secs")) * 1E9) }) test_that("$dt$time()", { diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 06ae818f4..ac1f393cc 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -760,7 +760,6 @@ test_that("fetch", { ) # uszie input can be bit64 - skip_if_not_installed("bit64") expect_identical( lf$select(pl$col("a") * 2L)$fetch(bit64::as.integer64(5))$to_list(), lf$select(pl$col("a") * 2L)$fetch(5)$to_list() diff --git a/tests/testthat/test-options.R b/tests/testthat/test-options.R index 4a4c028c6..b7d317c4d 100644 --- a/tests/testthat/test-options.R +++ b/tests/testthat/test-options.R @@ -26,11 +26,11 @@ test_that("pl$options$ read-write", { # set_options() only accepts booleans ctx = pl$set_options(maintain_order = 42) |> get_err_ctx() expect_identical(ctx$BadArgument, "maintain_order") - expect_identical(ctx$PlainErrorMessage, "Input must be TRUE or FALSE") + expect_identical(ctx$PlainErrorMessage, "Input must be TRUE or FALSE.") ctx = pl$set_options(strictly_immutable = c(TRUE, TRUE)) |> get_err_ctx() expect_identical(ctx$BadArgument, "strictly_immutable") - expect_identical(ctx$PlainErrorMessage, "Input must be TRUE or FALSE") + expect_identical(ctx$PlainErrorMessage, "Input must be TRUE or FALSE.") # reset_options() works pl$reset_options() From fb3ffda6ea742ae15913a9119b42e2679dbb174d Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Thu, 18 Jan 2024 18:04:46 +0100 Subject: [PATCH 17/20] forgot to redoc --- man/Expr_to_r.Rd | 11 ++++++++++- tests/testthat/test-expr_datetime.R | 3 --- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/man/Expr_to_r.Rd b/man/Expr_to_r.Rd index 6369a2bab..e50e6ee29 100644 --- a/man/Expr_to_r.Rd +++ b/man/Expr_to_r.Rd @@ -4,7 +4,7 @@ \alias{Expr_to_r} \title{Convert an Expr to R output} \usage{ -Expr_to_r(df = NULL, i = 0) +Expr_to_r(df = NULL, i = 0, bigint_conversion = pl$options$bigint_conversion) } \arguments{ \item{df}{If \code{NULL} (default), it evaluates the Expr in an empty DataFrame. @@ -12,6 +12,15 @@ Otherwise, provide a DataFrame that the Expr should be evaluated in.} \item{i}{Numeric column to extract. Default is zero (which gives the first column).} + +\item{bigint_conversion}{How should Int64 values be handled when converting a +polars object to R? +\itemize{ +\item \code{"float"} (default) converts the values to Float64. +\item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires +the package \code{bit64} to be attached). +\item \code{"string"} converts Int64 values to character. +}} } \value{ R object diff --git a/tests/testthat/test-expr_datetime.R b/tests/testthat/test-expr_datetime.R index cd3c9329f..d6c78f27a 100644 --- a/tests/testthat/test-expr_datetime.R +++ b/tests/testthat/test-expr_datetime.R @@ -371,7 +371,6 @@ test_that("hour minute", { test_that("second, milli, micro, nano", { - df = pl$DataFrame( date = pl$date_range( as.Date("2020-12-25"), @@ -503,7 +502,6 @@ test_that("offset_by", { test_that("dt$epoch", { - df = pl$select( pl$date_range(as.Date("2022-1-1"), eager = FALSE)$dt$epoch("ns")$alias("e_ns"), pl$date_range(as.Date("2022-1-1"), eager = FALSE)$dt$epoch("us")$alias("e_us"), @@ -534,7 +532,6 @@ test_that("dt$epoch", { test_that("dt$timestamp", { - df = pl$DataFrame( date = pl$date_range( start = as.Date("2001-1-1"), end = as.Date("2001-1-3"), interval = "1d", eager = TRUE From d95a690d472c9e60649d78dceb702f7d0be17b66 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 19 Jan 2024 13:40:09 +0100 Subject: [PATCH 18/20] "float" -> "double", bigint_conversion -> int64_conversion --- NEWS.md | 4 ++-- R/dataframe__frame.R | 10 +++++----- R/expr__expr.R | 6 +++--- R/extendr-wrappers.R | 8 ++++---- R/options.R | 18 +++++++++--------- R/series__series.R | 12 ++++++------ man/DataFrame_to_data_frame.Rd | 6 +++--- man/DataFrame_to_list.Rd | 6 +++--- man/Expr_to_r.Rd | 6 +++--- man/Series_to_r.Rd | 10 +++++----- man/pl_options.Rd | 6 +++--- src/rust/src/conversion_s_to_r.rs | 16 ++++++++-------- src/rust/src/rdataframe/mod.rs | 16 ++++++++-------- src/rust/src/series.rs | 18 +++++++++--------- tests/testthat/test-options.R | 12 ++++++------ 15 files changed, 77 insertions(+), 77 deletions(-) diff --git a/NEWS.md b/NEWS.md index 4422f31ff..4a6ef11f9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,10 +14,10 @@ - New methods `$list$any()` and `$list$all()` (#709). - New function `pl$from_epoch()` to convert a Unix timestamp to a date(time) variable (#708). -- New option `bigint_conversion` to specify how Int64 columns (that don't have +- New option `int64_conversion ` to specify how Int64 columns (that don't have equivalent in base R) should be converted. This option can either be set globally with `pl$set_options()` or on a case-by-case basis, e.g with - `$to_data_frame(bigint_conversion =)` (#706). + `$to_data_frame(int64_conversion =)` (#706). ## polars 0.12.2 diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index dd209670e..92241d403 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -852,9 +852,9 @@ DataFrame_group_by = function(..., maintain_order = pl$options$maintain_order) { #' @examples #' df = pl$DataFrame(iris[1:3, ]) #' df$to_data_frame() -DataFrame_to_data_frame = function(..., bigint_conversion = pl$options$bigint_conversion) { +DataFrame_to_data_frame = function(..., int64_conversion = pl$options$int64_conversion ) { # do not unnest structs and mark with I to also preserve categoricals as is - l = lapply(self$to_list(unnest_structs = FALSE, bigint_conversion), I) + l = lapply(self$to_list(unnest_structs = FALSE, int64_conversion ), I) # similar to as.data.frame, but avoid checks, whcih would edit structs df = data.frame(seq_along(l[[1L]]), ...) @@ -883,12 +883,12 @@ DataFrame_to_data_frame = function(..., bigint_conversion = pl$options$bigint_co #' @keywords DataFrame #' @examples #' pl$DataFrame(iris)$to_list() -DataFrame_to_list = function(unnest_structs = TRUE, bigint_conversion = pl$options$bigint_conversion) { +DataFrame_to_list = function(unnest_structs = TRUE, int64_conversion = pl$options$int64_conversion ) { if (unnest_structs) { - .pr$DataFrame$to_list(self, bigint_conversion) |> + .pr$DataFrame$to_list(self, int64_conversion ) |> unwrap("in $to_list():") } else { - .pr$DataFrame$to_list_tag_structs(self, bigint_conversion) |> + .pr$DataFrame$to_list_tag_structs(self, int64_conversion ) |> unwrap("in $to_list():") |> restruct_list() } diff --git a/R/expr__expr.R b/R/expr__expr.R index ef745eb0e..0d7cb2b28 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -3181,14 +3181,14 @@ Expr_rep_extend = function(expr, n, rechunk = TRUE, upcast = TRUE) { #' @return R object #' @examples #' pl$lit(1:3)$to_r() -Expr_to_r = function(df = NULL, i = 0, bigint_conversion = pl$options$bigint_conversion) { +Expr_to_r = function(df = NULL, i = 0, int64_conversion = pl$options$int64_conversion ) { if (is.null(df)) { - pl$select(self)$to_series(i)$to_r(bigint_conversion) + pl$select(self)$to_series(i)$to_r(int64_conversion ) } else { if (!inherits(df, c("RPolarsDataFrame"))) { stop("Expr_to_r: input is not NULL or a DataFrame/Lazyframe") } - df$select(self)$to_series(i)$to_r(bigint_conversion) + df$select(self)$to_series(i)$to_r(int64_conversion ) } } diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 9d9447dce..526ad8af2 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -161,11 +161,11 @@ RPolarsDataFrame$dtype_strings <- function() .Call(wrap__RPolarsDataFrame__dtype RPolarsDataFrame$schema <- function() .Call(wrap__RPolarsDataFrame__schema, self) -RPolarsDataFrame$to_list <- function(bigint_conversion) .Call(wrap__RPolarsDataFrame__to_list, self, bigint_conversion) +RPolarsDataFrame$to_list <- function(int64_conversion) .Call(wrap__RPolarsDataFrame__to_list, self, int64_conversion) -RPolarsDataFrame$to_list_unwind <- function(bigint_conversion) .Call(wrap__RPolarsDataFrame__to_list_unwind, self, bigint_conversion) +RPolarsDataFrame$to_list_unwind <- function(int64_conversion) .Call(wrap__RPolarsDataFrame__to_list_unwind, self, int64_conversion) -RPolarsDataFrame$to_list_tag_structs <- function(bigint_conversion) .Call(wrap__RPolarsDataFrame__to_list_tag_structs, self, bigint_conversion) +RPolarsDataFrame$to_list_tag_structs <- function(int64_conversion) .Call(wrap__RPolarsDataFrame__to_list_tag_structs, self, int64_conversion) RPolarsDataFrame$equals <- function(other) .Call(wrap__RPolarsDataFrame__equals, self, other) @@ -1163,7 +1163,7 @@ RPolarsSeries$sleep <- function(millis) .Call(wrap__RPolarsSeries__sleep, self, RPolarsSeries$panic <- function() .Call(wrap__RPolarsSeries__panic, self) -RPolarsSeries$to_r <- function(bigint_conversion) .Call(wrap__RPolarsSeries__to_r, self, bigint_conversion) +RPolarsSeries$to_r <- function(int64_conversion) .Call(wrap__RPolarsSeries__to_r, self, int64_conversion) RPolarsSeries$rename_mut <- function(name) invisible(.Call(wrap__RPolarsSeries__rename_mut, self, name)) diff --git a/R/options.R b/R/options.R index 6b269f7b2..6cea4126d 100644 --- a/R/options.R +++ b/R/options.R @@ -25,9 +25,9 @@ polars_optreq$debug_polars = list(must_be_bool = is_bool) # polars_optenv$rpool_cap # active binding for getting value, not for polars_optreq$rpool_cap = list() # rust-side options already check args -polars_optenv$bigint_conversion = "float" -polars_optreq$bigint_conversion = list( - acceptable_choices = function(x) !is.null(x) && x %in% c("bit64", "float", "string"), +polars_optenv$int64_conversion = "double" +polars_optreq$int64_conversion = list( + acceptable_choices = function(x) !is.null(x) && x %in% c("bit64", "double", "string"), bit64_is_attached = function(x) if (x == "bit64") x %in% .packages() else TRUE ) @@ -67,10 +67,10 @@ polars_optreq$bigint_conversion = list( #' @param no_messages Hide messages. #' @param rpool_cap The maximum number of R sessions that can be used to process #' R code in the background. See Details. -#' @param bigint_conversion How should Int64 values be handled when converting a +#' @param int64_conversion How should Int64 values be handled when converting a #' polars object to R? #' -#' * `"float"` (default) converts the values to Float64. +#' * `"double"` (default) converts the integer values to double. #' * `"bit64"` uses `bit64::as.integer64()` to do the conversion (requires #' the package `bit64` to be attached). #' * `"string"` converts Int64 values to character. @@ -105,7 +105,7 @@ pl_set_options = function( debug_polars = FALSE, no_messages = FALSE, rpool_cap = 4, - bigint_conversion = c("bit64", "float", "string")) { + int64_conversion = c("bit64", "double", "string")) { # only modify arguments that were explicitly written in the function call # (otherwise calling set_options() twice in a row would reset the args # modified in the first call) @@ -164,7 +164,7 @@ pl_reset_options = function() { assign("debug_polars", FALSE, envir = polars_optenv) assign("no_messages", FALSE, envir = polars_optenv) assign("rpool_cap", 4, envir = polars_optenv) - assign("bigint_conversion", "float", envir = polars_optenv) + assign("int64_conversion ", "double", envir = polars_optenv) } @@ -173,8 +173,8 @@ translate_failures = \(x) { "must_be_scalar" = "Input must be of length one.", "must_be_integer" = "Input must be an integer.", "must_be_bool" = "Input must be TRUE or FALSE.", - "acceptable_choices" = "`bigint_conversion` must be one of \"float\", \"string\", \"bit64\".", - "bit64_is_attached" = "Package `bit64` must be attached to use `bigint_conversion = \"bit64\"`." + "acceptable_choices" = "`int64_conversion ` must be one of \"float\", \"string\", \"bit64\".", + "bit64_is_attached" = "Package `bit64` must be attached to use `int64_conversion = \"bit64\"`." ) trans = lookups[x] trans[is.na(trans)] = x[is.na(trans)] diff --git a/R/series__series.R b/R/series__series.R index 697d1d004..d9a702837 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -275,8 +275,8 @@ Series_shape = method_as_property(function() { #' series_list$to_r() # as list because Series DataType is list #' series_list$to_r_list() # implicit call as.list(), same as to_r() as already list #' series_list$to_vector() # implicit call unlist(), append into a vector -Series_to_r = \(bigint_conversion = pl$options$bigint_conversion) { - unwrap(.pr$Series$to_r(self, bigint_conversion), "in $to_r():") +Series_to_r = \(int64_conversion = pl$options$int64_conversion ) { + unwrap(.pr$Series$to_r(self, int64_conversion ), "in $to_r():") } # TODO replace list example with Series only syntax @@ -288,8 +288,8 @@ Series_to_r = \(bigint_conversion = pl$options$bigint_conversion) { #' @keywords Series #' series_vec = pl$Series(letters[1:3]) #' series_vec$to_vector() -Series_to_vector = \(bigint_conversion = pl$options$bigint_conversion) { - unlist(unwrap(.pr$Series$to_r(self, bigint_conversion)), "in $to_vector():") +Series_to_vector = \(int64_conversion = pl$options$int64_conversion ) { + unlist(unwrap(.pr$Series$to_r(self, int64_conversion )), "in $to_vector():") } #' Alias to Series_to_vector (backward compatibility) @@ -304,8 +304,8 @@ Series_to_r_vector = Series_to_vector #' @return R list #' @keywords Series #' @examples # -Series_to_r_list = \(bigint_conversion = pl$options$bigint_conversion) { - as.list(unwrap(.pr$Series$to_r(self, bigint_conversion)), "in $to_r_list():") +Series_to_r_list = \(int64_conversion = pl$options$int64_conversion ) { + as.list(unwrap(.pr$Series$to_r(self, int64_conversion )), "in $to_r_list():") } diff --git a/man/DataFrame_to_data_frame.Rd b/man/DataFrame_to_data_frame.Rd index 48cc6b386..a1f4a7fba 100644 --- a/man/DataFrame_to_data_frame.Rd +++ b/man/DataFrame_to_data_frame.Rd @@ -4,15 +4,15 @@ \alias{DataFrame_to_data_frame} \title{Return Polars DataFrame as R data.frame} \usage{ -DataFrame_to_data_frame(..., bigint_conversion = pl$options$bigint_conversion) +DataFrame_to_data_frame(..., int64_conversion = pl$options$int64_conversion) } \arguments{ \item{...}{Any args pased to \code{as.data.frame()}.} -\item{bigint_conversion}{How should Int64 values be handled when converting a +\item{int64_conversion}{How should Int64 values be handled when converting a polars object to R? \itemize{ -\item \code{"float"} (default) converts the values to Float64. +\item \code{"double"} (default) converts the integer values to double. \item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires the package \code{bit64} to be attached). \item \code{"string"} converts Int64 values to character. diff --git a/man/DataFrame_to_list.Rd b/man/DataFrame_to_list.Rd index ee8f9b961..409f12f05 100644 --- a/man/DataFrame_to_list.Rd +++ b/man/DataFrame_to_list.Rd @@ -6,17 +6,17 @@ \usage{ DataFrame_to_list( unnest_structs = TRUE, - bigint_conversion = pl$options$bigint_conversion + int64_conversion = pl$options$int64_conversion ) } \arguments{ \item{unnest_structs}{Boolean. If \code{TRUE} (default), then \verb{$unnest()} is applied on any struct column.} -\item{bigint_conversion}{How should Int64 values be handled when converting a +\item{int64_conversion}{How should Int64 values be handled when converting a polars object to R? \itemize{ -\item \code{"float"} (default) converts the values to Float64. +\item \code{"double"} (default) converts the integer values to double. \item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires the package \code{bit64} to be attached). \item \code{"string"} converts Int64 values to character. diff --git a/man/Expr_to_r.Rd b/man/Expr_to_r.Rd index e50e6ee29..c774c6f8d 100644 --- a/man/Expr_to_r.Rd +++ b/man/Expr_to_r.Rd @@ -4,7 +4,7 @@ \alias{Expr_to_r} \title{Convert an Expr to R output} \usage{ -Expr_to_r(df = NULL, i = 0, bigint_conversion = pl$options$bigint_conversion) +Expr_to_r(df = NULL, i = 0, int64_conversion = pl$options$int64_conversion) } \arguments{ \item{df}{If \code{NULL} (default), it evaluates the Expr in an empty DataFrame. @@ -13,10 +13,10 @@ Otherwise, provide a DataFrame that the Expr should be evaluated in.} \item{i}{Numeric column to extract. Default is zero (which gives the first column).} -\item{bigint_conversion}{How should Int64 values be handled when converting a +\item{int64_conversion}{How should Int64 values be handled when converting a polars object to R? \itemize{ -\item \code{"float"} (default) converts the values to Float64. +\item \code{"double"} (default) converts the integer values to double. \item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires the package \code{bit64} to be attached). \item \code{"string"} converts Int64 values to character. diff --git a/man/Series_to_r.Rd b/man/Series_to_r.Rd index ea778b510..fed09702e 100644 --- a/man/Series_to_r.Rd +++ b/man/Series_to_r.Rd @@ -6,17 +6,17 @@ \alias{Series_to_r_list} \title{Get r vector/list} \usage{ -Series_to_r(bigint_conversion = pl$options$bigint_conversion) +Series_to_r(int64_conversion = pl$options$int64_conversion) -Series_to_vector(bigint_conversion = pl$options$bigint_conversion) +Series_to_vector(int64_conversion = pl$options$int64_conversion) -Series_to_r_list(bigint_conversion = pl$options$bigint_conversion) +Series_to_r_list(int64_conversion = pl$options$int64_conversion) } \arguments{ -\item{bigint_conversion}{How should Int64 values be handled when converting a +\item{int64_conversion}{How should Int64 values be handled when converting a polars object to R? \itemize{ -\item \code{"float"} (default) converts the values to Float64. +\item \code{"double"} (default) converts the integer values to double. \item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires the package \code{bit64} to be attached). \item \code{"string"} converts Int64 values to character. diff --git a/man/pl_options.Rd b/man/pl_options.Rd index fc1d23074..e28211df1 100644 --- a/man/pl_options.Rd +++ b/man/pl_options.Rd @@ -13,7 +13,7 @@ pl_set_options( debug_polars = FALSE, no_messages = FALSE, rpool_cap = 4, - bigint_conversion = c("bit64", "float", "string") + int64_conversion = c("bit64", "double", "string") ) pl_reset_options() @@ -38,10 +38,10 @@ messages. The default (\code{FALSE}) is to show them.} \item{rpool_cap}{The maximum number of R sessions that can be used to process R code in the background. See Details.} -\item{bigint_conversion}{How should Int64 values be handled when converting a +\item{int64_conversion}{How should Int64 values be handled when converting a polars object to R? \itemize{ -\item \code{"float"} (default) converts the values to Float64. +\item \code{"double"} (default) converts the integer values to double. \item \code{"bit64"} uses \code{bit64::as.integer64()} to do the conversion (requires the package \code{bit64} to be attached). \item \code{"string"} converts Int64 values to character. diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs index c07f35fac..694a2a0ae 100644 --- a/src/rust/src/conversion_s_to_r.rs +++ b/src/rust/src/conversion_s_to_r.rs @@ -24,13 +24,13 @@ use polars_core::datatypes::DataType; pub fn pl_series_to_list( series: &pl::Series, tag_structs: bool, - bigint_conversion: &str, + int64_conversion: &str, ) -> pl::PolarsResult { use pl::DataType::*; fn to_list_recursive( s: &pl::Series, tag_structs: bool, - bigint_conversion: &str, + int64_conversion: &str, ) -> pl::PolarsResult { match s.dtype() { Float64 => s.f64().map(|ca| ca.into_iter().collect_robj()), @@ -39,8 +39,8 @@ pub fn pl_series_to_list( Int8 => s.i8().map(|ca| ca.into_iter().collect_robj()), Int16 => s.i16().map(|ca| ca.into_iter().collect_robj()), Int32 => s.i32().map(|ca| ca.into_iter().collect_robj()), - Int64 => match bigint_conversion { - "float" => s + Int64 => match int64_conversion { + "double" => s .cast(&DataType::Float64)? .f64() .map(|ca| ca.into_iter().collect_robj()), @@ -65,7 +65,7 @@ pub fn pl_series_to_list( .set_class(&["integer64"]) .expect("internal error could not set class label 'integer64'") }), - _ => panic!("`bigint_conversion` must be one of 'float', 'string', 'bit64'"), + _ => panic!("`int64_conversion ` must be one of 'float', 'string', 'bit64'"), }, UInt8 => s.u8().map(|ca| { ca.into_iter() @@ -139,7 +139,7 @@ pub fn pl_series_to_list( let s_ref = s.as_ref(); // is safe because s is read to generate new Robj, then discarded. let inner_val = - to_list_recursive(s_ref, tag_structs, bigint_conversion)?; + to_list_recursive(s_ref, tag_structs, int64_conversion)?; v.push(inner_val); } @@ -156,7 +156,7 @@ pub fn pl_series_to_list( } Struct(_) => { let df = s.clone().into_frame().unnest([s.name()]).unwrap(); - let mut l = RPolarsDataFrame(df).to_list_result(bigint_conversion)?; + let mut l = RPolarsDataFrame(df).to_list_result(int64_conversion)?; //TODO contribute extendr_api set_attrib mutates &self, change signature to surprise anyone if tag_structs { @@ -235,5 +235,5 @@ pub fn pl_series_to_list( } } - to_list_recursive(series, tag_structs, bigint_conversion) + to_list_recursive(series, tag_structs, int64_conversion) } diff --git a/src/rust/src/rdataframe/mod.rs b/src/rust/src/rdataframe/mod.rs index 3cf9329c8..ede6262bf 100644 --- a/src/rust/src/rdataframe/mod.rs +++ b/src/rust/src/rdataframe/mod.rs @@ -225,12 +225,12 @@ impl RPolarsDataFrame { // self.0.compare // } - pub fn to_list(&self, bigint_conversion: &str) -> List { + pub fn to_list(&self, int64_conversion: &str) -> List { let robj_vec_res: Result, _> = collect_hinted_result( self.0.width(), self.0 .iter() - .map(|x| pl_series_to_list(x, false, bigint_conversion)), + .map(|x| pl_series_to_list(x, false, int64_conversion)), ); let robj_list_res = robj_vec_res @@ -245,12 +245,12 @@ impl RPolarsDataFrame { } //this methods should only be used for benchmarking - pub fn to_list_unwind(&self, bigint_conversion: &str) -> Robj { + pub fn to_list_unwind(&self, int64_conversion: &str) -> Robj { let robj_vec_res: Result, _> = collect_hinted_result( self.0.width(), self.0 .iter() - .map(|x| pl_series_to_list(x, false, bigint_conversion)), + .map(|x| pl_series_to_list(x, false, int64_conversion)), ); let robj_list_res = robj_vec_res @@ -266,13 +266,13 @@ impl RPolarsDataFrame { // to_list have this variant with set_structs = true at pl_series_to_list // does not expose this arg in to_list as it is quite niche and might be deprecated later - pub fn to_list_tag_structs(&self, bigint_conversion: &str) -> List { + pub fn to_list_tag_structs(&self, int64_conversion: &str) -> List { //convert DataFrame to Result of to R vectors, error if DataType is not supported let robj_vec_res: Result, _> = collect_hinted_result( self.0.width(), self.0 .iter() - .map(|x| pl_series_to_list(x, true, bigint_conversion)), + .map(|x| pl_series_to_list(x, true, int64_conversion)), ); //rewrap Ok(Vec) as R list @@ -514,12 +514,12 @@ impl RPolarsDataFrame { } impl RPolarsDataFrame { - pub fn to_list_result(&self, bigint_conversion: &str) -> Result { + pub fn to_list_result(&self, int64_conversion: &str) -> Result { //convert DataFrame to Result of to R vectors, error if DataType is not supported let robj_vec_res: Result, _> = self .0 .iter() - .map(|s| pl_series_to_list(s, true, bigint_conversion)) + .map(|s| pl_series_to_list(s, true, int64_conversion)) .collect(); //rewrap Ok(Vec) as R list diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index 9be6b158e..f0ccc1ada 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -79,8 +79,8 @@ impl RPolarsSeries { panic!("somebody panicked on purpose"); } - pub fn to_r(&self, bigint_conversion: &str) -> std::result::Result { - pl_series_to_list(&self.0, true, bigint_conversion) + pub fn to_r(&self, int64_conversion: &str) -> std::result::Result { + pl_series_to_list(&self.0, true, int64_conversion) .map_err(|err| format!("in to_r: {:?}", err)) } //any mut method exposed in R suffixed _mut @@ -438,27 +438,27 @@ impl RPolarsSeries { } pub fn mean(&self) -> Result { - RPolarsSeries(self.0.mean_as_series()).to_r("float") + RPolarsSeries(self.0.mean_as_series()).to_r("double") } pub fn median(&self) -> Result { let s = self.0.median_as_series().map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r("float") + RPolarsSeries(s).to_r("double") } pub fn min(&self) -> Result { let s = self.0.min_as_series().map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r("float") + RPolarsSeries(s).to_r("double") } pub fn max(&self) -> Result { let s = self.0.max_as_series().map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r("float") + RPolarsSeries(s).to_r("double") } pub fn sum(&self) -> Result { let s = self.0.sum_as_series().map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r("float") + RPolarsSeries(s).to_r("double") } pub fn std(&self, ddof: Robj) -> Result { @@ -466,7 +466,7 @@ impl RPolarsSeries { .0 .std_as_series(robj_to!(u8, ddof)?) .map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r("float") + RPolarsSeries(s).to_r("double") } pub fn var(&self, ddof: Robj) -> Result { @@ -474,7 +474,7 @@ impl RPolarsSeries { .0 .var_as_series(robj_to!(u8, ddof)?) .map_err(polars_to_rpolars_err)?; - RPolarsSeries(s).to_r("float") + RPolarsSeries(s).to_r("double") } pub fn ceil(&self) -> List { diff --git a/tests/testthat/test-options.R b/tests/testthat/test-options.R index b7d317c4d..658828f9a 100644 --- a/tests/testthat/test-options.R +++ b/tests/testthat/test-options.R @@ -54,7 +54,7 @@ test_that("pl$options$ read-write", { }) -test_that("option 'bigint_conversion' works", { +test_that("option 'int64_conversion ' works", { pl$reset_options() df = pl$DataFrame(a = c(1:3, NA), schema = list(a = pl$Int64)) @@ -65,7 +65,7 @@ test_that("option 'bigint_conversion' works", { ) # can convert to string - pl$set_options(bigint_conversion = "string") + pl$set_options(int64_conversion = "string") expect_identical( df$to_list(), list(a = c("1", "2", "3", NA)) @@ -74,12 +74,12 @@ test_that("option 'bigint_conversion' works", { # can convert to bit64, but *only* if bit64 is attached try(detach("package:bit64"), silent = TRUE) expect_error( - pl$set_options(bigint_conversion = "bit64"), + pl$set_options(int64_conversion = "bit64"), "must be attached" ) skip_if_not_installed("bit64") suppressPackageStartupMessages(library(bit64)) - pl$set_options(bigint_conversion = "bit64") + pl$set_options(int64_conversion = "bit64") expect_identical( df$to_list(), list(a = as.integer64(c(1, 2, 3, NA))) @@ -88,13 +88,13 @@ test_that("option 'bigint_conversion' works", { # can override the global option by passing a custom arg # option currently is "bit64" expect_identical( - df$to_list(bigint_conversion = "string"), + df$to_list(int64_conversion = "string"), list(a = c("1", "2", "3", NA)) ) # arg correctly passed from to_data_frame() to to_list() expect_identical( - df$to_data_frame(bigint_conversion = "string"), + df$to_data_frame(int64_conversion = "string"), data.frame(a = c("1", "2", "3", NA)) ) }) From 19597176cd5aa02df1fca07354f105603f7f0378 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 19 Jan 2024 18:11:45 +0100 Subject: [PATCH 19/20] make int64_conversion a named arg --- R/dataframe__frame.R | 6 +++--- R/expr__expr.R | 2 +- R/options.R | 8 ++++---- R/series__series.R | 6 +++--- man/DataFrame_to_list.Rd | 3 +++ man/Expr_to_r.Rd | 9 ++++++++- tests/testthat/test-options.R | 10 +++++----- 7 files changed, 27 insertions(+), 17 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 92241d403..d18184c04 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -852,9 +852,9 @@ DataFrame_group_by = function(..., maintain_order = pl$options$maintain_order) { #' @examples #' df = pl$DataFrame(iris[1:3, ]) #' df$to_data_frame() -DataFrame_to_data_frame = function(..., int64_conversion = pl$options$int64_conversion ) { +DataFrame_to_data_frame = function(..., int64_conversion = pl$options$int64_conversion ) { # do not unnest structs and mark with I to also preserve categoricals as is - l = lapply(self$to_list(unnest_structs = FALSE, int64_conversion ), I) + l = lapply(self$to_list(unnest_structs = FALSE, int64_conversion = int64_conversion), I) # similar to as.data.frame, but avoid checks, whcih would edit structs df = data.frame(seq_along(l[[1L]]), ...) @@ -883,7 +883,7 @@ DataFrame_to_data_frame = function(..., int64_conversion = pl$options$int64_con #' @keywords DataFrame #' @examples #' pl$DataFrame(iris)$to_list() -DataFrame_to_list = function(unnest_structs = TRUE, int64_conversion = pl$options$int64_conversion ) { +DataFrame_to_list = function(unnest_structs = TRUE, ..., int64_conversion = pl$options$int64_conversion ) { if (unnest_structs) { .pr$DataFrame$to_list(self, int64_conversion ) |> unwrap("in $to_list():") diff --git a/R/expr__expr.R b/R/expr__expr.R index 0d7cb2b28..fa5e4336c 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -3181,7 +3181,7 @@ Expr_rep_extend = function(expr, n, rechunk = TRUE, upcast = TRUE) { #' @return R object #' @examples #' pl$lit(1:3)$to_r() -Expr_to_r = function(df = NULL, i = 0, int64_conversion = pl$options$int64_conversion ) { +Expr_to_r = function(df = NULL, i = 0, ..., int64_conversion = pl$options$int64_conversion ) { if (is.null(df)) { pl$select(self)$to_series(i)$to_r(int64_conversion ) } else { diff --git a/R/options.R b/R/options.R index 6cea4126d..84d2e1fc0 100644 --- a/R/options.R +++ b/R/options.R @@ -25,8 +25,8 @@ polars_optreq$debug_polars = list(must_be_bool = is_bool) # polars_optenv$rpool_cap # active binding for getting value, not for polars_optreq$rpool_cap = list() # rust-side options already check args -polars_optenv$int64_conversion = "double" -polars_optreq$int64_conversion = list( +polars_optenv$int64_conversion = "double" +polars_optreq$int64_conversion = list( acceptable_choices = function(x) !is.null(x) && x %in% c("bit64", "double", "string"), bit64_is_attached = function(x) if (x == "bit64") x %in% .packages() else TRUE ) @@ -105,7 +105,7 @@ pl_set_options = function( debug_polars = FALSE, no_messages = FALSE, rpool_cap = 4, - int64_conversion = c("bit64", "double", "string")) { + int64_conversion = c("bit64", "double", "string")) { # only modify arguments that were explicitly written in the function call # (otherwise calling set_options() twice in a row would reset the args # modified in the first call) @@ -174,7 +174,7 @@ translate_failures = \(x) { "must_be_integer" = "Input must be an integer.", "must_be_bool" = "Input must be TRUE or FALSE.", "acceptable_choices" = "`int64_conversion ` must be one of \"float\", \"string\", \"bit64\".", - "bit64_is_attached" = "Package `bit64` must be attached to use `int64_conversion = \"bit64\"`." + "bit64_is_attached" = "Package `bit64` must be attached to use `int64_conversion = \"bit64\"`." ) trans = lookups[x] trans[is.na(trans)] = x[is.na(trans)] diff --git a/R/series__series.R b/R/series__series.R index d9a702837..f9303af98 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -275,7 +275,7 @@ Series_shape = method_as_property(function() { #' series_list$to_r() # as list because Series DataType is list #' series_list$to_r_list() # implicit call as.list(), same as to_r() as already list #' series_list$to_vector() # implicit call unlist(), append into a vector -Series_to_r = \(int64_conversion = pl$options$int64_conversion ) { +Series_to_r = \(int64_conversion = pl$options$int64_conversion ) { unwrap(.pr$Series$to_r(self, int64_conversion ), "in $to_r():") } # TODO replace list example with Series only syntax @@ -288,7 +288,7 @@ Series_to_r = \(int64_conversion = pl$options$int64_conversion ) { #' @keywords Series #' series_vec = pl$Series(letters[1:3]) #' series_vec$to_vector() -Series_to_vector = \(int64_conversion = pl$options$int64_conversion ) { +Series_to_vector = \(int64_conversion = pl$options$int64_conversion ) { unlist(unwrap(.pr$Series$to_r(self, int64_conversion )), "in $to_vector():") } @@ -304,7 +304,7 @@ Series_to_r_vector = Series_to_vector #' @return R list #' @keywords Series #' @examples # -Series_to_r_list = \(int64_conversion = pl$options$int64_conversion ) { +Series_to_r_list = \(int64_conversion = pl$options$int64_conversion ) { as.list(unwrap(.pr$Series$to_r(self, int64_conversion )), "in $to_r_list():") } diff --git a/man/DataFrame_to_list.Rd b/man/DataFrame_to_list.Rd index 409f12f05..721df9642 100644 --- a/man/DataFrame_to_list.Rd +++ b/man/DataFrame_to_list.Rd @@ -6,6 +6,7 @@ \usage{ DataFrame_to_list( unnest_structs = TRUE, + ..., int64_conversion = pl$options$int64_conversion ) } @@ -13,6 +14,8 @@ DataFrame_to_list( \item{unnest_structs}{Boolean. If \code{TRUE} (default), then \verb{$unnest()} is applied on any struct column.} +\item{...}{Ignored.} + \item{int64_conversion}{How should Int64 values be handled when converting a polars object to R? \itemize{ diff --git a/man/Expr_to_r.Rd b/man/Expr_to_r.Rd index c774c6f8d..adf2b1240 100644 --- a/man/Expr_to_r.Rd +++ b/man/Expr_to_r.Rd @@ -4,7 +4,12 @@ \alias{Expr_to_r} \title{Convert an Expr to R output} \usage{ -Expr_to_r(df = NULL, i = 0, int64_conversion = pl$options$int64_conversion) +Expr_to_r( + df = NULL, + i = 0, + ..., + int64_conversion = pl$options$int64_conversion +) } \arguments{ \item{df}{If \code{NULL} (default), it evaluates the Expr in an empty DataFrame. @@ -13,6 +18,8 @@ Otherwise, provide a DataFrame that the Expr should be evaluated in.} \item{i}{Numeric column to extract. Default is zero (which gives the first column).} +\item{...}{Ignored.} + \item{int64_conversion}{How should Int64 values be handled when converting a polars object to R? \itemize{ diff --git a/tests/testthat/test-options.R b/tests/testthat/test-options.R index 658828f9a..d7716b376 100644 --- a/tests/testthat/test-options.R +++ b/tests/testthat/test-options.R @@ -65,7 +65,7 @@ test_that("option 'int64_conversion ' works", { ) # can convert to string - pl$set_options(int64_conversion = "string") + pl$set_options(int64_conversion = "string") expect_identical( df$to_list(), list(a = c("1", "2", "3", NA)) @@ -74,12 +74,12 @@ test_that("option 'int64_conversion ' works", { # can convert to bit64, but *only* if bit64 is attached try(detach("package:bit64"), silent = TRUE) expect_error( - pl$set_options(int64_conversion = "bit64"), + pl$set_options(int64_conversion = "bit64"), "must be attached" ) skip_if_not_installed("bit64") suppressPackageStartupMessages(library(bit64)) - pl$set_options(int64_conversion = "bit64") + pl$set_options(int64_conversion = "bit64") expect_identical( df$to_list(), list(a = as.integer64(c(1, 2, 3, NA))) @@ -88,13 +88,13 @@ test_that("option 'int64_conversion ' works", { # can override the global option by passing a custom arg # option currently is "bit64" expect_identical( - df$to_list(int64_conversion = "string"), + df$to_list(int64_conversion = "string"), list(a = c("1", "2", "3", NA)) ) # arg correctly passed from to_data_frame() to to_list() expect_identical( - df$to_data_frame(int64_conversion = "string"), + df$to_data_frame(int64_conversion = "string"), data.frame(a = c("1", "2", "3", NA)) ) }) From ca577c888b37648c29259bbcf659001c119c187b Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Fri, 19 Jan 2024 18:18:57 +0100 Subject: [PATCH 20/20] use polars error, add test for wrong input --- src/rust/src/conversion_s_to_r.rs | 4 +++- tests/testthat/test-options.R | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs index 694a2a0ae..6cb3fc84a 100644 --- a/src/rust/src/conversion_s_to_r.rs +++ b/src/rust/src/conversion_s_to_r.rs @@ -65,7 +65,9 @@ pub fn pl_series_to_list( .set_class(&["integer64"]) .expect("internal error could not set class label 'integer64'") }), - _ => panic!("`int64_conversion ` must be one of 'float', 'string', 'bit64'"), + _ => Err(pl::PolarsError::InvalidOperation( + "`int64_conversion ` must be one of 'float', 'string', 'bit64'".into(), + )), }, UInt8 => s.u8().map(|ca| { ca.into_iter() diff --git a/tests/testthat/test-options.R b/tests/testthat/test-options.R index d7716b376..8f8ebc35c 100644 --- a/tests/testthat/test-options.R +++ b/tests/testthat/test-options.R @@ -64,6 +64,12 @@ test_that("option 'int64_conversion ' works", { list(a = c(1, 2, 3, NA)) ) + # check value of int64_conversion + expect_error( + pl$set_options(int64_conversion = "foobar"), + "`int64_conversion ` must be one of" + ) + # can convert to string pl$set_options(int64_conversion = "string") expect_identical(