Skip to content

Commit

Permalink
feat: Bump rust-polars to 0.36.2 (#659)
Browse files Browse the repository at this point in the history
Co-authored-by: eitsupi <ts1s1andn@gmail.com>
  • Loading branch information
etiennebacher and eitsupi committed Jan 4, 2024
1 parent accb214 commit 2f631e4
Show file tree
Hide file tree
Showing 79 changed files with 312 additions and 309 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,5 @@ Collate:
'zzz.R'
Config/rextendr/version: 0.3.1
VignetteBuilder: knitr
Config/polars/LibVersion: 0.35.2
Config/polars/LibVersion: 0.36.0
Config/polars/RustToolchainVersion: nightly-2023-12-23
12 changes: 10 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,25 @@

## polars (development version)

### Rust-polars update

- rust-polars is updated to 0.36.2 (#659). Most of the changes were covered
in 0.12.0. The main change is that `pl$Utf8` is replaced by `pl$String`.
`pl$Utf8` is an alias and will keep working, but `pl$String` is now preferred
in the documentation and in new code.

### What's changed

- New methods `$str$reverse()`, `$str$contains_any()`, and `$str$replace_many()`
(#641).
- New methods `$rle()` and `$rle_id()` (#648).
- New functions `is_polars_df()`, `is_polars_lf()`, `is_polars_series()` (#658).
- `$gather()` now accepts negative indexing (#659).

### Miscellaneous

- Remeve the `Makefile` in favor of `Taskfile.yml`.
Please use `task` instaed of `make` as a task runner (#654).
- Remove the `Makefile` in favor of `Taskfile.yml`.
Please use `task` instead of `make` as a task runner (#654).

## polars 0.12.0

Expand Down
6 changes: 3 additions & 3 deletions R/Field.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @return A object of with DataType `"RField"` containing its name and its
#' DataType.
#' @examples
#' pl$Field("city_names", pl$Utf8)
#' pl$Field("city_names", pl$String)
pl_Field = function(name, datatype) {
.pr$RField$new(name, datatype)
}
Expand Down Expand Up @@ -71,7 +71,7 @@ RField.property_setters = new.env(parent = emptyenv())
#'
#' @rdname RField_name
#' @examples
#' field = pl$Field("Cities", pl$Utf8)
#' field = pl$Field("Cities", pl$String)
#' field$name
#'
#' field$name = "CityPoPulations" #<- is fine too
Expand All @@ -90,7 +90,7 @@ RField.property_setters$name = function(self, value) {
#'
#' @keywords DataFrame
#' @examples
#' field = pl$Field("Cities", pl$Utf8)
#' field = pl$Field("Cities", pl$String)
#' field$datatype
#'
#' field$datatype = pl$Categorical #<- is fine too
Expand Down
2 changes: 1 addition & 1 deletion R/convert.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#' @examples
#' pl$from_arrow(
#' data = arrow::arrow_table(iris),
#' schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
#' schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$String)
#' )
#'
#' char_schema = names(iris)
Expand Down
4 changes: 2 additions & 2 deletions R/csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#' * "Float64" or "float64" for DataType::Float64,
#' * "Int32" or "integer" for DataType::Int32,
#' * "Int64" or "integer64" for DataType::Int64,
#' * "Utf8" or "character" for DataType::Utf8,
#' * "String" or "character" for DataType::String,
#' @param null_values Values to interpret as `NA` values. Can be:
#' * a character vector: all values that match one of the values in this vector
#' will be `NA`;
Expand Down Expand Up @@ -55,7 +55,7 @@
#' the name is set).
#' @param try_parse_dates Try to automatically parse dates. Most ISO8601-like
#' formats can be inferred, as well as a handful of others. If this does not
#' succeed, the column remains of data type `pl$Utf8`.
#' succeed, the column remains of data type `pl$String`.
#' @param eol_char Single byte end of line character (default: `\n`). When
#' encountering a file with Windows line endings (`\r\n`), one can go with the
#' default `\n`. The extra `\r` will be removed when processed.
Expand Down
6 changes: 3 additions & 3 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ NULL
#' pl$DataFrame(mtcars)
#'
#' # custom schema
#' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8))
#' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$String))
pl_DataFrame = function(..., make_names_unique = TRUE, schema = NULL) {
uw = \(res) unwrap(res, "in $DataFrame():")

Expand Down Expand Up @@ -1669,9 +1669,9 @@ DataFrame_sample = function(
#' # simple use-case
#' pl$DataFrame(mtcars)$transpose(include_header = TRUE, column_names = rownames(mtcars))
#'
#' # All rows must have one shared supertype, recast Categorical to Utf8 which is a supertype
#' # All rows must have one shared supertype, recast Categorical to String which is a supertype
#' # of f64, and then dataset "Iris" can be transposed
#' pl$DataFrame(iris)$with_columns(pl$col("Species")$cast(pl$Utf8))$transpose()
#' pl$DataFrame(iris)$with_columns(pl$col("Species")$cast(pl$String))$transpose()
#'
DataFrame_transpose = function(
include_header = FALSE,
Expand Down
10 changes: 5 additions & 5 deletions R/datatype.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ wrap_proto_schema = function(x) {
#' @examples
#' print(ls(pl$dtypes))
#' pl$dtypes$Float64
#' pl$dtypes$Utf8
#' pl$dtypes$String
#'
#' pl$List(pl$List(pl$UInt64))
#'
#' pl$Struct(pl$Field("CityNames", pl$Utf8))
#' pl$Struct(pl$Field("CityNames", pl$String))
#'
#' # The function changes type from Integer(Int32)[Integers] to char(Utf8)[Strings]
#' # specifying the output DataType: Utf8 solves the problem
#' pl$Series(1:4)$map_elements(\(x) letters[x], datatype = pl$dtypes$Utf8)
#' # The function changes type from Int32 to String
#' # Specifying the output DataType: String solves the problem
#' pl$Series(1:4)$map_elements(\(x) letters[x], datatype = pl$dtypes$String)
#'
NULL

Expand Down
12 changes: 7 additions & 5 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ construct_ProtoExprArray = function(...) {
#' select(
#' pl$col("Sepal.Length")$map_batches(\(x) {
#' paste("cheese", as.character(x$to_vector()))
#' }, pl$dtypes$Utf8)
#' }, pl$dtypes$String)
#' )
#'
#' # R parallel process example, use Sys.sleep() to imitate some CPU expensive
Expand Down Expand Up @@ -799,7 +799,7 @@ Expr_map = function(f, output_type = NULL, agg_list = FALSE, in_background = FAL
#'
#' e_letter = my_selection$map_elements(\(x) {
#' letters[ceiling(x)]
#' }, return_type = pl$dtypes$Utf8)$name$suffix("_letter")
#' }, return_type = pl$dtypes$String)$name$suffix("_letter")
#' pl$DataFrame(iris)$select(e_add10, e_letter)
#'
#'
Expand Down Expand Up @@ -1575,10 +1575,12 @@ Expr_sort_by = function(by, descending = FALSE) {
#' Gather values by index
#'
#' @param indices R scalar/vector or Series, or Expr that leads to a Series of
#' dtype UInt32.
#' dtype Int64. (0-indexed)
#' @return Expr
#' @examples
#' pl$DataFrame(a = c(1, 2, 4, 5, 8))$select(pl$col("a")$gather(c(0, 2, 4)))
#' df = pl$DataFrame(a = 1:10)
#'
#' df$select(pl$col("a")$gather(c(0, 2, 4, -1)))
Expr_gather = function(indices) {
.pr$Expr$gather(self, pl$lit(indices)) |>
unwrap("in $gather():")
Expand Down Expand Up @@ -2034,7 +2036,7 @@ Expr_filter = function(predicate) {
Expr_where = Expr_filter


#' Explode a list or Utf8 Series
#' Explode a list or String Series
#'
#' This means that every item is expanded to a new row.
#'
Expand Down
4 changes: 2 additions & 2 deletions R/expr__list.R
Original file line number Diff line number Diff line change
Expand Up @@ -231,11 +231,11 @@ ExprList_contains = function(item) .pr$Expr$list_contains(self, wrap_e(item))
#'
#' @description
#' Join all string items in a sublist and place a separator between them.
#' This errors if inner type of list `!= Utf8`.
#' This errors if inner type of list `!= String`.
#' @param separator String to separate the items with. Can be an Expr.
#' @keywords ExprList
#' @format function
#' @return Series of dtype Utf8
#' @return Series of dtype String
#' @aliases list_join
#' @examples
#' df = pl$DataFrame(list(s = list(c("a", "b", "c"), c("x", "y"))))
Expand Down
Loading

0 comments on commit 2f631e4

Please sign in to comment.