feat: Bump rust-polars to 0.36.2 (#659)

Co-authored-by: eitsupi <ts1s1andn@gmail.com>
pola-rs · Jan 4, 2024 · 2f631e4 · 2f631e4
1 parent accb214
commit 2f631e4
Show file tree

Hide file tree

Showing 79 changed files with 312 additions and 309 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -113,5 +113,5 @@ Collate:
     'zzz.R'
 Config/rextendr/version: 0.3.1
 VignetteBuilder: knitr
-Config/polars/LibVersion: 0.35.2
+Config/polars/LibVersion: 0.36.0
 Config/polars/RustToolchainVersion: nightly-2023-12-23
diff --git a/NEWS.md b/NEWS.md
@@ -2,17 +2,25 @@
 
 ## polars (development version)
 
+### Rust-polars update
+
+-   rust-polars is updated to 0.36.2 (#659). Most of the changes were covered
+    in 0.12.0. The main change is that `pl$Utf8` is replaced by `pl$String`.
+    `pl$Utf8` is an alias and will keep working, but `pl$String` is now preferred
+    in the documentation and in new code.
+
 ### What's changed
 
 -   New methods `$str$reverse()`, `$str$contains_any()`, and `$str$replace_many()`
     (#641).
 -   New methods `$rle()` and `$rle_id()` (#648).
 -   New functions `is_polars_df()`, `is_polars_lf()`, `is_polars_series()` (#658).
+-   `$gather()` now accepts negative indexing (#659).
 
 ### Miscellaneous
 
--   Remeve the `Makefile` in favor of `Taskfile.yml`.
-    Please use `task` instaed of `make` as a task runner (#654).
+-   Remove the `Makefile` in favor of `Taskfile.yml`.
+    Please use `task` instead of `make` as a task runner (#654).
 
 ## polars 0.12.0
 

diff --git a/R/Field.R b/R/Field.R
@@ -13,7 +13,7 @@
 #' @return A object of with DataType `"RField"` containing its name and its
 #' DataType.
 #' @examples
-#' pl$Field("city_names", pl$Utf8)
+#' pl$Field("city_names", pl$String)
 pl_Field = function(name, datatype) {
   .pr$RField$new(name, datatype)
 }
@@ -71,7 +71,7 @@ RField.property_setters = new.env(parent = emptyenv())
 #'
 #' @rdname RField_name
 #' @examples
-#' field = pl$Field("Cities", pl$Utf8)
+#' field = pl$Field("Cities", pl$String)
 #' field$name
 #'
 #' field$name = "CityPoPulations" #<- is fine too
@@ -90,7 +90,7 @@ RField.property_setters$name = function(self, value) {
 #'
 #' @keywords DataFrame
 #' @examples
-#' field = pl$Field("Cities", pl$Utf8)
+#' field = pl$Field("Cities", pl$String)
 #' field$datatype
 #'
 #' field$datatype = pl$Categorical #<- is fine too

diff --git a/R/convert.R b/R/convert.R
@@ -15,7 +15,7 @@
 #' @examples
 #' pl$from_arrow(
 #'   data = arrow::arrow_table(iris),
-#'   schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
+#'   schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$String)
 #' )
 #'
 #' char_schema = names(iris)

diff --git a/R/csv.R b/R/csv.R
@@ -25,7 +25,7 @@
 #' * "Float64" or "float64" for DataType::Float64,
 #' * "Int32" or "integer" for DataType::Int32,
 #' * "Int64" or "integer64" for DataType::Int64,
-#' * "Utf8" or "character" for DataType::Utf8,
+#' * "String" or "character" for DataType::String,
 #' @param null_values Values to interpret as `NA` values. Can be:
 #' * a character vector: all values that match one of the values in this vector
 #'   will be `NA`;
@@ -55,7 +55,7 @@
 #' the name is set).
 #' @param try_parse_dates Try to automatically parse dates. Most ISO8601-like
 #' formats can be inferred, as well as a handful of others. If this does not
-#' succeed, the column remains of data type `pl$Utf8`.
+#' succeed, the column remains of data type `pl$String`.
 #' @param eol_char Single byte end of line character (default: `\n`). When
 #' encountering a file with Windows line endings (`\r\n`), one can go with the
 #' default `\n`. The extra `\r` will be removed when processed.

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
@@ -141,7 +141,7 @@ NULL
 #' pl$DataFrame(mtcars)
 #'
 #' # custom schema
-#' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8))
+#' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$String))
 pl_DataFrame = function(..., make_names_unique = TRUE, schema = NULL) {
   uw = \(res) unwrap(res, "in $DataFrame():")
 
@@ -1669,9 +1669,9 @@ DataFrame_sample = function(
 #' # simple use-case
 #' pl$DataFrame(mtcars)$transpose(include_header = TRUE, column_names = rownames(mtcars))
 #'
-#' # All rows must have one shared supertype, recast Categorical to Utf8 which is a supertype
+#' # All rows must have one shared supertype, recast Categorical to String which is a supertype
 #' # of f64, and then dataset "Iris" can be transposed
-#' pl$DataFrame(iris)$with_columns(pl$col("Species")$cast(pl$Utf8))$transpose()
+#' pl$DataFrame(iris)$with_columns(pl$col("Species")$cast(pl$String))$transpose()
 #'
 DataFrame_transpose = function(
     include_header = FALSE,

diff --git a/R/datatype.R b/R/datatype.R
@@ -48,15 +48,15 @@ wrap_proto_schema = function(x) {
 #' @examples
 #' print(ls(pl$dtypes))
 #' pl$dtypes$Float64
-#' pl$dtypes$Utf8
+#' pl$dtypes$String
 #'
 #' pl$List(pl$List(pl$UInt64))
 #'
-#' pl$Struct(pl$Field("CityNames", pl$Utf8))
+#' pl$Struct(pl$Field("CityNames", pl$String))
 #'
-#' # The function changes type from Integer(Int32)[Integers] to char(Utf8)[Strings]
-#' # specifying the output DataType: Utf8 solves the problem
-#' pl$Series(1:4)$map_elements(\(x) letters[x], datatype = pl$dtypes$Utf8)
+#' # The function changes type from Int32 to String
+#' # Specifying the output DataType: String solves the problem
+#' pl$Series(1:4)$map_elements(\(x) letters[x], datatype = pl$dtypes$String)
 #'
 NULL
 

diff --git a/R/expr__expr.R b/R/expr__expr.R
@@ -672,7 +672,7 @@ construct_ProtoExprArray = function(...) {
 #'   select(
 #'   pl$col("Sepal.Length")$map_batches(\(x) {
 #'     paste("cheese", as.character(x$to_vector()))
-#'   }, pl$dtypes$Utf8)
+#'   }, pl$dtypes$String)
 #' )
 #'
 #' # R parallel process example, use Sys.sleep() to imitate some CPU expensive
@@ -799,7 +799,7 @@ Expr_map = function(f, output_type = NULL, agg_list = FALSE, in_background = FAL
 #'
 #' e_letter = my_selection$map_elements(\(x) {
 #'   letters[ceiling(x)]
-#' }, return_type = pl$dtypes$Utf8)$name$suffix("_letter")
+#' }, return_type = pl$dtypes$String)$name$suffix("_letter")
 #' pl$DataFrame(iris)$select(e_add10, e_letter)
 #'
 #'
@@ -1575,10 +1575,12 @@ Expr_sort_by = function(by, descending = FALSE) {
 #' Gather values by index
 #'
 #' @param indices R scalar/vector or Series, or Expr that leads to a Series of
-#' dtype UInt32.
+#' dtype Int64. (0-indexed)
 #' @return Expr
 #' @examples
-#' pl$DataFrame(a = c(1, 2, 4, 5, 8))$select(pl$col("a")$gather(c(0, 2, 4)))
+#' df = pl$DataFrame(a = 1:10)
+#'
+#' df$select(pl$col("a")$gather(c(0, 2, 4, -1)))
 Expr_gather = function(indices) {
   .pr$Expr$gather(self, pl$lit(indices)) |>
     unwrap("in $gather():")
@@ -2034,7 +2036,7 @@ Expr_filter = function(predicate) {
 Expr_where = Expr_filter
 
 
-#' Explode a list or Utf8 Series
+#' Explode a list or String Series
 #'
 #' This means that every item is expanded to a new row.
 #'

diff --git a/R/expr__list.R b/R/expr__list.R
@@ -231,11 +231,11 @@ ExprList_contains = function(item) .pr$Expr$list_contains(self, wrap_e(item))
 #'
 #' @description
 #' Join all string items in a sublist and place a separator between them.
-#' This errors if inner type of list `!= Utf8`.
+#' This errors if inner type of list `!= String`.
 #' @param separator String to separate the items with. Can be an Expr.
 #' @keywords ExprList
 #' @format function
-#' @return Series of dtype Utf8
+#' @return Series of dtype String
 #' @aliases list_join
 #' @examples
 #' df = pl$DataFrame(list(s = list(c("a", "b", "c"), c("x", "y"))))