diff --git a/DESCRIPTION b/DESCRIPTION
index 65e1531c8..88ab9c6ba 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -113,5 +113,5 @@ Collate:
'zzz.R'
Config/rextendr/version: 0.3.1
VignetteBuilder: knitr
-Config/polars/LibVersion: 0.35.2
+Config/polars/LibVersion: 0.36.0
Config/polars/RustToolchainVersion: nightly-2023-12-23
diff --git a/NEWS.md b/NEWS.md
index d7157507d..bdcfceda2 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,17 +2,25 @@
## polars (development version)
+### Rust-polars update
+
+- rust-polars is updated to 0.36.2 (#659). Most of the changes were covered
+ in 0.12.0. The main change is that `pl$Utf8` is replaced by `pl$String`.
+ `pl$Utf8` is an alias and will keep working, but `pl$String` is now preferred
+ in the documentation and in new code.
+
### What's changed
- New methods `$str$reverse()`, `$str$contains_any()`, and `$str$replace_many()`
(#641).
- New methods `$rle()` and `$rle_id()` (#648).
- New functions `is_polars_df()`, `is_polars_lf()`, `is_polars_series()` (#658).
+- `$gather()` now accepts negative indexing (#659).
### Miscellaneous
-- Remeve the `Makefile` in favor of `Taskfile.yml`.
- Please use `task` instaed of `make` as a task runner (#654).
+- Remove the `Makefile` in favor of `Taskfile.yml`.
+ Please use `task` instead of `make` as a task runner (#654).
## polars 0.12.0
diff --git a/R/Field.R b/R/Field.R
index 5c2b9d639..62080223e 100644
--- a/R/Field.R
+++ b/R/Field.R
@@ -13,7 +13,7 @@
#' @return A object of with DataType `"RField"` containing its name and its
#' DataType.
#' @examples
-#' pl$Field("city_names", pl$Utf8)
+#' pl$Field("city_names", pl$String)
pl_Field = function(name, datatype) {
.pr$RField$new(name, datatype)
}
@@ -71,7 +71,7 @@ RField.property_setters = new.env(parent = emptyenv())
#'
#' @rdname RField_name
#' @examples
-#' field = pl$Field("Cities", pl$Utf8)
+#' field = pl$Field("Cities", pl$String)
#' field$name
#'
#' field$name = "CityPoPulations" #<- is fine too
@@ -90,7 +90,7 @@ RField.property_setters$name = function(self, value) {
#'
#' @keywords DataFrame
#' @examples
-#' field = pl$Field("Cities", pl$Utf8)
+#' field = pl$Field("Cities", pl$String)
#' field$datatype
#'
#' field$datatype = pl$Categorical #<- is fine too
diff --git a/R/convert.R b/R/convert.R
index 3a4136ad2..2e0c7c68d 100644
--- a/R/convert.R
+++ b/R/convert.R
@@ -15,7 +15,7 @@
#' @examples
#' pl$from_arrow(
#' data = arrow::arrow_table(iris),
-#' schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
+#' schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$String)
#' )
#'
#' char_schema = names(iris)
diff --git a/R/csv.R b/R/csv.R
index 3ff539943..0b444e098 100644
--- a/R/csv.R
+++ b/R/csv.R
@@ -25,7 +25,7 @@
#' * "Float64" or "float64" for DataType::Float64,
#' * "Int32" or "integer" for DataType::Int32,
#' * "Int64" or "integer64" for DataType::Int64,
-#' * "Utf8" or "character" for DataType::Utf8,
+#' * "String" or "character" for DataType::String,
#' @param null_values Values to interpret as `NA` values. Can be:
#' * a character vector: all values that match one of the values in this vector
#' will be `NA`;
@@ -55,7 +55,7 @@
#' the name is set).
#' @param try_parse_dates Try to automatically parse dates. Most ISO8601-like
#' formats can be inferred, as well as a handful of others. If this does not
-#' succeed, the column remains of data type `pl$Utf8`.
+#' succeed, the column remains of data type `pl$String`.
#' @param eol_char Single byte end of line character (default: `\n`). When
#' encountering a file with Windows line endings (`\r\n`), one can go with the
#' default `\n`. The extra `\r` will be removed when processed.
diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
index d826173dd..fc431e6db 100644
--- a/R/dataframe__frame.R
+++ b/R/dataframe__frame.R
@@ -141,7 +141,7 @@ NULL
#' pl$DataFrame(mtcars)
#'
#' # custom schema
-#' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8))
+#' pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$String))
pl_DataFrame = function(..., make_names_unique = TRUE, schema = NULL) {
uw = \(res) unwrap(res, "in $DataFrame():")
@@ -1669,9 +1669,9 @@ DataFrame_sample = function(
#' # simple use-case
#' pl$DataFrame(mtcars)$transpose(include_header = TRUE, column_names = rownames(mtcars))
#'
-#' # All rows must have one shared supertype, recast Categorical to Utf8 which is a supertype
+#' # All rows must have one shared supertype, recast Categorical to String which is a supertype
#' # of f64, and then dataset "Iris" can be transposed
-#' pl$DataFrame(iris)$with_columns(pl$col("Species")$cast(pl$Utf8))$transpose()
+#' pl$DataFrame(iris)$with_columns(pl$col("Species")$cast(pl$String))$transpose()
#'
DataFrame_transpose = function(
include_header = FALSE,
diff --git a/R/datatype.R b/R/datatype.R
index 48b6e2a31..9416bd767 100644
--- a/R/datatype.R
+++ b/R/datatype.R
@@ -48,15 +48,15 @@ wrap_proto_schema = function(x) {
#' @examples
#' print(ls(pl$dtypes))
#' pl$dtypes$Float64
-#' pl$dtypes$Utf8
+#' pl$dtypes$String
#'
#' pl$List(pl$List(pl$UInt64))
#'
-#' pl$Struct(pl$Field("CityNames", pl$Utf8))
+#' pl$Struct(pl$Field("CityNames", pl$String))
#'
-#' # The function changes type from Integer(Int32)[Integers] to char(Utf8)[Strings]
-#' # specifying the output DataType: Utf8 solves the problem
-#' pl$Series(1:4)$map_elements(\(x) letters[x], datatype = pl$dtypes$Utf8)
+#' # The function changes type from Int32 to String
+#' # Specifying the output DataType: String solves the problem
+#' pl$Series(1:4)$map_elements(\(x) letters[x], datatype = pl$dtypes$String)
#'
NULL
diff --git a/R/expr__expr.R b/R/expr__expr.R
index 9e06054d3..6741762dc 100644
--- a/R/expr__expr.R
+++ b/R/expr__expr.R
@@ -672,7 +672,7 @@ construct_ProtoExprArray = function(...) {
#' select(
#' pl$col("Sepal.Length")$map_batches(\(x) {
#' paste("cheese", as.character(x$to_vector()))
-#' }, pl$dtypes$Utf8)
+#' }, pl$dtypes$String)
#' )
#'
#' # R parallel process example, use Sys.sleep() to imitate some CPU expensive
@@ -799,7 +799,7 @@ Expr_map = function(f, output_type = NULL, agg_list = FALSE, in_background = FAL
#'
#' e_letter = my_selection$map_elements(\(x) {
#' letters[ceiling(x)]
-#' }, return_type = pl$dtypes$Utf8)$name$suffix("_letter")
+#' }, return_type = pl$dtypes$String)$name$suffix("_letter")
#' pl$DataFrame(iris)$select(e_add10, e_letter)
#'
#'
@@ -1575,10 +1575,12 @@ Expr_sort_by = function(by, descending = FALSE) {
#' Gather values by index
#'
#' @param indices R scalar/vector or Series, or Expr that leads to a Series of
-#' dtype UInt32.
+#' dtype Int64. (0-indexed)
#' @return Expr
#' @examples
-#' pl$DataFrame(a = c(1, 2, 4, 5, 8))$select(pl$col("a")$gather(c(0, 2, 4)))
+#' df = pl$DataFrame(a = 1:10)
+#'
+#' df$select(pl$col("a")$gather(c(0, 2, 4, -1)))
Expr_gather = function(indices) {
.pr$Expr$gather(self, pl$lit(indices)) |>
unwrap("in $gather():")
@@ -2034,7 +2036,7 @@ Expr_filter = function(predicate) {
Expr_where = Expr_filter
-#' Explode a list or Utf8 Series
+#' Explode a list or String Series
#'
#' This means that every item is expanded to a new row.
#'
diff --git a/R/expr__list.R b/R/expr__list.R
index 03aa3328c..54346dd53 100644
--- a/R/expr__list.R
+++ b/R/expr__list.R
@@ -231,11 +231,11 @@ ExprList_contains = function(item) .pr$Expr$list_contains(self, wrap_e(item))
#'
#' @description
#' Join all string items in a sublist and place a separator between them.
-#' This errors if inner type of list `!= Utf8`.
+#' This errors if inner type of list `!= String`.
#' @param separator String to separate the items with. Can be an Expr.
#' @keywords ExprList
#' @format function
-#' @return Series of dtype Utf8
+#' @return Series of dtype String
#' @aliases list_join
#' @examples
#' df = pl$DataFrame(list(s = list(c("a", "b", "c"), c("x", "y"))))
diff --git a/R/expr__string.R b/R/expr__string.R
index c7330dd07..860437682 100644
--- a/R/expr__string.R
+++ b/R/expr__string.R
@@ -4,7 +4,7 @@
# expr_str_make_sub_ns = macro_new_subnamespace("^ExprStr_", "RPolarsExprStrNameSpace")
-#' Convert a Utf8 column into a Date/Datetime/Time column.
+#' Convert a String column into a Date/Datetime/Time column.
#'
#'
#' @param datatype The data type to convert into. Can be either Date, Datetime,
@@ -96,7 +96,7 @@ ExprStr_strptime = function(
unwrap("in str$strptime():")
}
-#' Convert a Utf8 column into a Date column
+#' Convert a String column into a Date column
#'
#' @param format Format to use for conversion. See `?strptime` for possible
#' values. Example: "%Y-%m-%d". If `NULL` (default), the format is
@@ -120,7 +120,7 @@ ExprStr_to_date = function(format = NULL, strict = TRUE, exact = TRUE, cache = T
unwrap("in $str$to_date():")
}
-#' Convert a Utf8 column into a Time column
+#' Convert a String column into a Time column
#'
#' @param format Format to use for conversion. See `?strptime` for possible
#' values. Example: "%H:%M:%S". If `NULL` (default), the format is
@@ -142,7 +142,7 @@ ExprStr_to_time = function(format = NULL, strict = TRUE, cache = TRUE) {
unwrap("in $str$to_time():")
}
-#' Convert a Utf8 column into a Datetime column
+#' Convert a String column into a Datetime column
#'
#' @param format Format to use for conversion. See `?strptime` for possible
#' values. Example: "%Y-%m-%d %H:%M:%S". If `NULL` (default), the format is
@@ -223,7 +223,7 @@ ExprStr_len_chars = function() {
#' @param ignore_nulls Ignore null values. If `FALSE`, null values will be
#' propagated: if the column contains any null values, the output is null.
#' @keywords ExprStr
-#' @return Expr of Utf8 concatenated
+#' @return Expr of String concatenated
#' @examples
#' # concatenate a Series of strings to a single string
#' df = pl$DataFrame(foo = c("1", NA, 2))
@@ -242,7 +242,7 @@ ExprStr_concat = function(delimiter = "-", ignore_nulls = TRUE) {
#'
#' @description Transform to uppercase variant.
#' @keywords ExprStr
-#' @return Expr of Utf8 uppercase chars
+#' @return Expr of String uppercase chars
#' @examples
#' pl$lit(c("A", "b", "c", "1", NA))$str$to_uppercase()$to_series()
ExprStr_to_uppercase = function() {
@@ -253,7 +253,7 @@ ExprStr_to_uppercase = function() {
#'
#' @description Transform to lowercase variant.
#' @keywords ExprStr
-#' @return Expr of Utf8 lowercase chars
+#' @return Expr of String lowercase chars
#' @examples
#' pl$lit(c("A", "b", "c", "1", NA))$str$to_lowercase()$to_series()
ExprStr_to_lowercase = function() {
@@ -264,7 +264,7 @@ ExprStr_to_lowercase = function() {
#'
#' @description Transform to titlecase variant.
#' @keywords ExprStr
-#' @return Expr of Utf8 titlecase chars
+#' @return Expr of String titlecase chars
#' @details
#' This method is only available with the feature flag "simd" which can
#' be set via envvar "RPOLARS_FULL_FEATURES" and it requires
@@ -296,7 +296,7 @@ ExprStr_to_titlecase = function() {
#' `strip_chars()` removes characters at the beginning and the end of the string.
#' Use `strip_chars_start()` and `strip_chars_end()` to remove characters only
#' from left and right respectively.
-#' @return Expr of Utf8 lowercase chars
+#' @return Expr of String lowercase chars
#' @examples
#' df = pl$DataFrame(foo = c(" hello", "\tworld"))
#' df$select(pl$col("foo")$str$strip_chars())
@@ -321,7 +321,7 @@ ExprStr_strip_chars = function(matches = NULL) {
#' `strip_chars_start()` removes characters at the beginning of the string only.
#' Use `strip_chars()` and `strip_chars_end()` to remove characters from the left
#' and right or only from the right respectively.
-#' @return Expr of Utf8 lowercase chars
+#' @return Expr of String lowercase chars
#' @examples
#' df = pl$DataFrame(foo = c(" hello", "\tworld"))
#' df$select(pl$col("foo")$str$strip_chars_start(" hel rld"))
@@ -345,7 +345,7 @@ ExprStr_strip_chars_start = function(matches = NULL) {
#' `strip_chars_end()` removes characters at the end of the string only.
#' Use `strip_chars()` and `strip_chars_start()` to remove characters from the left
#' and right or only from the left respectively.
-#' @return Expr of Utf8 lowercase chars
+#' @return Expr of String lowercase chars
#' @examples
#' df = pl$DataFrame(foo = c(" hello", "\tworld"))
#' df$select(pl$col("foo")$str$strip_chars_end(" hel\trld"))
@@ -375,12 +375,12 @@ ExprStr_strip_chars_end = function(matches = NULL) {
#' @examples
#' some_floats_expr = pl$lit(c(0, 10, -5, 5))
#'
-#' # cast to Utf8 and ljust alignment = 5, and view as R char vector
-#' some_floats_expr$cast(pl$Utf8)$str$zfill(5)$to_r()
+#' # cast to String and ljust alignment = 5, and view as R char vector
+#' some_floats_expr$cast(pl$String)$str$zfill(5)$to_r()
#'
#' # cast to int and the to utf8 and then ljust alignment = 5, and view as R
#' # char vector
-#' some_floats_expr$cast(pl$Int64)$cast(pl$Utf8)$str$zfill(5)$to_r()
+#' some_floats_expr$cast(pl$Int64)$cast(pl$String)$str$zfill(5)$to_r()
ExprStr_zfill = function(alignment) {
.pr$Expr$str_zfill(self, alignment) |>
unwrap("in str$zfill():")
@@ -395,7 +395,7 @@ ExprStr_zfill = function(alignment) {
#' @param fillchar Fill with this ASCII character.
#' @details Padding is done using the specified `fillchar`. The original string
#' is returned if `width` is less than or equal to `len(s)`.
-#' @return Expr of Utf8
+#' @return Expr of String
#' @examples
#' df = pl$DataFrame(a = c("cow", "monkey", NA, "hippopotamus"))
#' df$select(pl$col("a")$str$pad_end(8, "*"))
@@ -508,10 +508,10 @@ ExprStr_json_decode = function(dtype, infer_schema_length = 100) {
#' @param json_path A valid JSON path query string.
#' @details
#' Throw errors if encounter invalid JSON strings. All return value will be
-#' cast to Utf8 regardless of the original value.
+#' cast to String regardless of the original value.
#'
#' Documentation on JSONPath standard can be found here: .
-#' @return Utf8 array. Contain null if original value is null or the json_path
+#' @return String array. Contain null if original value is null or the json_path
#' return nothing.
#' @examples
#' df = pl$DataFrame(
@@ -532,7 +532,7 @@ ExprStr_json_path_match = function(json_path) {
#' @param strict If `TRUE` (default), raise an error if the underlying value
#' cannot be decoded. Otherwise, replace it with a null value.
#'
-#' @return Utf8 array with values decoded using provided encoding
+#' @return String array with values decoded using provided encoding
#'
#' @examples
#' df = pl$DataFrame(strings = c("foo", "bar", NA))
@@ -541,14 +541,16 @@ ExprStr_json_path_match = function(json_path) {
#' pl$col("strings")$str$encode("base64")$alias("base64"), # notice DataType is not encoded
#' pl$col("strings")$str$encode("hex")$alias("hex") # ... and must restored with cast
#' )$with_columns(
-#' pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$Utf8),
-#' pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$Utf8)
+#' pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$String),
+#' pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$String)
#' )
ExprStr_decode = function(encoding, ..., strict = TRUE) {
+ uw = \(res) unwrap(res, "in $str$decode():")
+
pcase(
!is_string(encoding), stop("encoding must be a string, it was: ", encoding),
- encoding == "hex", .pr$Expr$str_hex_decode(self, strict),
- encoding == "base64", .pr$Expr$str_base64_decode(self, strict),
+ encoding == "hex", uw(.pr$Expr$str_hex_decode(self, strict)),
+ encoding == "base64", uw(.pr$Expr$str_base64_decode(self, strict)),
or_else = stop("encoding must be one of 'hex' or 'base64', got ", encoding)
)
}
@@ -557,7 +559,7 @@ ExprStr_decode = function(encoding, ..., strict = TRUE) {
#'
#' @keywords ExprStr
#' @param encoding Either 'hex' or 'base64'.
-#' @return Utf8 array with values encoded using provided encoding
+#' @return String array with values encoded using provided encoding
#'
#' @examples
#' df = pl$DataFrame(strings = c("foo", "bar", NA))
@@ -566,14 +568,16 @@ ExprStr_decode = function(encoding, ..., strict = TRUE) {
#' pl$col("strings")$str$encode("base64")$alias("base64"), # notice DataType is not encoded
#' pl$col("strings")$str$encode("hex")$alias("hex") # ... and must restored with cast
#' )$with_columns(
-#' pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$Utf8),
-#' pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$Utf8)
+#' pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$String),
+#' pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$String)
#' )
ExprStr_encode = function(encoding) {
+ uw = \(res) unwrap(res, "in $str$$encode():")
+
pcase(
!is_string(encoding), stop("encoding must be a string, it was: ", encoding),
- encoding == "hex", .pr$Expr$str_hex_encode(self),
- encoding == "base64", .pr$Expr$str_base64_encode(self),
+ encoding == "hex", uw(.pr$Expr$str_hex_encode(self)),
+ encoding == "base64", uw(.pr$Expr$str_base64_encode(self)),
or_else = stop("encoding must be one of 'hex' or 'base64', got ", encoding)
)
}
@@ -587,7 +591,7 @@ ExprStr_encode = function(encoding) {
#' pattern, first group begin at index 1 (default).
#'
#' @return
-#' Utf8 array. Contains null if original value is null or regex capture nothing.
+#' String array. Contains null if original value is null or regex capture nothing.
#'
#' @examples
#' df = pl$DataFrame(
@@ -614,7 +618,7 @@ ExprStr_extract = function(pattern, group_index) {
#' @param pattern A valid regex pattern
#'
#' @return
-#' `List[Utf8]` array. Contain null if original value is null or regex capture
+#' `List[String]` array. Contain null if original value is null or regex capture
#' nothing.
#'
#' @examples
@@ -661,7 +665,7 @@ ExprStr_count_matches = function(pattern, literal = FALSE) {
#' @param inclusive If `TRUE`, include the split character/string in the results.
#'
#' @return
-#' List of Utf8 type
+#' List of String type
#'
#' @examples
#' df = pl$DataFrame(s = c("foo bar", "foo-bar", "foo bar baz"))
@@ -690,7 +694,7 @@ ExprStr_split = function(by, inclusive = FALSE) {
#' @param n Number of splits to make.
#' @param inclusive If `TRUE`, include the split character/string in the results.
#'
-#' @return Struct where each of n+1 fields is of Utf8 type
+#' @return Struct where each of n+1 fields is of String type
#'
#' @examples
#' df = pl$DataFrame(s = c("a_1", NA, "c", "d_4"))
@@ -714,7 +718,7 @@ ExprStr_split_exact = function(by, n, inclusive = FALSE) {
#' @param n Number of splits to make.
#'
#' @return
-#' Struct where each of `n` fields is of Utf8 type
+#' Struct where each of `n` fields is of String type
#'
#' @examples
#' df = pl$DataFrame(s = c("a_1", NA, "c", "d_4"))
@@ -733,7 +737,7 @@ ExprStr_splitn = function(by, n) {
#' @param value Replacement, can be an Expr.
#' @param literal Treat pattern as a literal string.
#'
-#' @return Expr of Utf8 Series
+#' @return Expr of String Series
#'
#' @seealso `$str$replace_all()`: Replace all matching regex/literal substrings.
#'
@@ -756,7 +760,7 @@ ExprStr_replace = function(pattern, value, literal = FALSE) {
#' @param value Replacement, can be an Expr.
#' @param literal Treat pattern as a literal string.
#'
-#' @return Expr of Utf8 Series
+#' @return Expr of String Series
#'
#' @seealso `$str$replace()`: Replace first matching regex/literal substring.
#'
@@ -771,14 +775,14 @@ ExprStr_replace_all = function(pattern, value, literal = FALSE) {
}
-#' Create subslices of the string values of a Utf8 Series
+#' Create subslices of the string values of a String Series
#'
#' @keywords ExprStr
#' @param offset Start index. Negative indexing is supported.
#' @param length Length of the slice. If `NULL` (default), the slice is taken to
#' the end of the string.
#'
-#' @return Expr: Series of dtype Utf8.
+#' @return Expr: Series of dtype String.
#'
#' @examples
#' df = pl$DataFrame(s = c("pear", NA, "papaya", "dragonfruit"))
@@ -793,7 +797,7 @@ ExprStr_slice = function(offset, length = NULL) {
#' Returns a column with a separate row for every string character
#'
#' @keywords ExprStr
-#' @return Expr: Series of dtype Utf8.
+#' @return Expr: Series of dtype String.
#' @examples
#' df = pl$DataFrame(a = c("foo", "bar"))
#' df$select(pl$col("a")$str$explode())
diff --git a/R/functions__lazy.R b/R/functions__lazy.R
index 9c1f1d06c..21479a610 100644
--- a/R/functions__lazy.R
+++ b/R/functions__lazy.R
@@ -62,7 +62,7 @@ pl_all = function(name = NULL) {
#' df$select(pl$col(pl$dtypes$Float64))
#'
#' # ... or an R list of DataTypes, select any column of any such DataType
-#' df$select(pl$col(list(pl$dtypes$Float64, pl$dtypes$Utf8)))
+#' df$select(pl$col(list(pl$dtypes$Float64, pl$dtypes$String)))
#'
#' # from Series of names
#' df$select(pl$col(pl$Series(c("bar", "foobar"))))
@@ -691,13 +691,13 @@ pl_concat_list = function(exprs) {
#' # wrap two columns in a struct and provide a schema to set all or some DataTypes by name
#' e1 = pl$struct(
#' pl$col(c("int", "str")),
-#' schema = list(int = pl$Int64, str = pl$Utf8)
+#' schema = list(int = pl$Int64, str = pl$String)
#' )$alias("my_struct")
#' # same result as e.g. wrapping the columns in a struct and casting afterwards
#' e2 = pl$struct(
#' list(pl$col("int"), pl$col("str"))
#' )$cast(
-#' pl$Struct(int = pl$Int64, str = pl$Utf8)
+#' pl$Struct(int = pl$Int64, str = pl$String)
#' )$alias("my_struct")
#'
#' df = pl$DataFrame(
@@ -741,7 +741,7 @@ pl_struct = function(
#'
#' @param ... Columns to concatenate into a single string column. Accepts
#' expressions. Strings are parsed as column names, other non-expression inputs
-#' are parsed as literals. Non-Utf8 columns are cast to Utf8.
+#' are parsed as literals. Non-String columns are cast to String
#' @param separator String that will be used to separate the values of each
#' column.
#' @return Expr
diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R
index 621f20706..bc95f678e 100644
--- a/R/lazyframe__lazy.R
+++ b/R/lazyframe__lazy.R
@@ -144,7 +144,7 @@ NULL
#' # custom schema
#' pl$LazyFrame(
#' iris,
-#' schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
+#' schema = list(Sepal.Length = pl$Float32, Species = pl$String)
#' )$collect()
pl_LazyFrame = function(...) {
pl$DataFrame(...)$lazy()
@@ -1502,7 +1502,7 @@ LazyFrame_profile = function(
#' `"name"` is implicitly converted to `pl$col("name")`.
#'
#' @details
-#' Only columns of DataType `List` or `Utf8` can be exploded.
+#' Only columns of DataType `List` or `String` can be exploded.
#'
#' Named expressions like `$explode(a = pl$col("b"))` will not implicitly trigger
#' `$alias("a")` here, due to only variant `Expr::Column` is supported in
diff --git a/R/series__series.R b/R/series__series.R
index 8b3bff913..e26f61f4e 100644
--- a/R/series__series.R
+++ b/R/series__series.R
@@ -251,11 +251,10 @@ Series_shape = method_as_property(function() {
#'
#' @examples
#'
-#' # make polars Series_Utf8
#' series_vec = pl$Series(letters[1:3])
#'
#' # Series_non_list
-#' series_vec$to_r() # as vector because Series DataType is not list (is Utf8)
+#' series_vec$to_r() # as vector because Series DataType is not list (is String)
#' series_vec$to_r_list() # implicit call as.list(), convert to list
#' series_vec$to_vector() # implicit call unlist(), same as to_r() as already vector
#'
@@ -354,7 +353,7 @@ Series_value_counts = function(sort = TRUE, parallel = FALSE) {
#' @examples
#' s = pl$Series(letters[1:5], "ltrs")
#' f = \(x) paste(x, ":", as.integer(charToRaw(x)))
-#' s$map_elements(f, pl$Utf8)
+#' s$map_elements(f, pl$String)
#'
#' # same as
#' pl$Series(sapply(s$to_r(), f), s$name)
diff --git a/R/zzz.R b/R/zzz.R
index 388d116e8..e81a8ad04 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -111,7 +111,7 @@ move_env_elements(RPolarsExpr, pl, c("lit"), remove = FALSE)
.onLoad = function(libname, pkgname) {
# instanciate one of each DataType (it's just an enum)
- all_types = .pr$DataType$get_all_simple_type_names()
+ all_types = c(.pr$DataType$get_all_simple_type_names(), "Utf8") # Allow "Utf8" as an alias of "String"
names(all_types) = all_types
pl$dtypes = c(
lapply(all_types, DataType_new), # instanciate all simple flag-like types
diff --git a/man/DataFrame_transpose.Rd b/man/DataFrame_transpose.Rd
index b5c5b2f7a..1196a56c1 100644
--- a/man/DataFrame_transpose.Rd
+++ b/man/DataFrame_transpose.Rd
@@ -39,9 +39,9 @@ Polars transpose is currently eager only, likely because it is not trivial to de
# simple use-case
pl$DataFrame(mtcars)$transpose(include_header = TRUE, column_names = rownames(mtcars))
-# All rows must have one shared supertype, recast Categorical to Utf8 which is a supertype
+# All rows must have one shared supertype, recast Categorical to String which is a supertype
# of f64, and then dataset "Iris" can be transposed
-pl$DataFrame(iris)$with_columns(pl$col("Species")$cast(pl$Utf8))$transpose()
+pl$DataFrame(iris)$with_columns(pl$col("Species")$cast(pl$String))$transpose()
}
\keyword{DataFrame}
diff --git a/man/ExprList_join.Rd b/man/ExprList_join.Rd
index 61f560f92..d515b5bc5 100644
--- a/man/ExprList_join.Rd
+++ b/man/ExprList_join.Rd
@@ -14,11 +14,11 @@ ExprList_join(separator)
\item{separator}{String to separate the items with. Can be an Expr.}
}
\value{
-Series of dtype Utf8
+Series of dtype String
}
\description{
Join all string items in a sublist and place a separator between them.
-This errors if inner type of list \verb{!= Utf8}.
+This errors if inner type of list \verb{!= String}.
}
\examples{
df = pl$DataFrame(list(s = list(c("a", "b", "c"), c("x", "y"))))
diff --git a/man/ExprStr_concat.Rd b/man/ExprStr_concat.Rd
index b29a572c9..45f2e4b6d 100644
--- a/man/ExprStr_concat.Rd
+++ b/man/ExprStr_concat.Rd
@@ -13,7 +13,7 @@ ExprStr_concat(delimiter = "-", ignore_nulls = TRUE)
propagated: if the column contains any null values, the output is null.}
}
\value{
-Expr of Utf8 concatenated
+Expr of String concatenated
}
\description{
Vertically concatenate the values in the Series to a single
diff --git a/man/ExprStr_decode.Rd b/man/ExprStr_decode.Rd
index e84ded69b..4b3763885 100644
--- a/man/ExprStr_decode.Rd
+++ b/man/ExprStr_decode.Rd
@@ -15,7 +15,7 @@ ExprStr_decode(encoding, ..., strict = TRUE)
cannot be decoded. Otherwise, replace it with a null value.}
}
\value{
-Utf8 array with values decoded using provided encoding
+String array with values decoded using provided encoding
}
\description{
Decode a value using the provided encoding
@@ -27,8 +27,8 @@ df$with_columns(
pl$col("strings")$str$encode("base64")$alias("base64"), # notice DataType is not encoded
pl$col("strings")$str$encode("hex")$alias("hex") # ... and must restored with cast
)$with_columns(
- pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$Utf8),
- pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$Utf8)
+ pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$String),
+ pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$String)
)
}
\keyword{ExprStr}
diff --git a/man/ExprStr_encode.Rd b/man/ExprStr_encode.Rd
index 91272edd6..ca3692b11 100644
--- a/man/ExprStr_encode.Rd
+++ b/man/ExprStr_encode.Rd
@@ -10,7 +10,7 @@ ExprStr_encode(encoding)
\item{encoding}{Either 'hex' or 'base64'.}
}
\value{
-Utf8 array with values encoded using provided encoding
+String array with values encoded using provided encoding
}
\description{
Encode a value using the provided encoding
@@ -22,8 +22,8 @@ df$with_columns(
pl$col("strings")$str$encode("base64")$alias("base64"), # notice DataType is not encoded
pl$col("strings")$str$encode("hex")$alias("hex") # ... and must restored with cast
)$with_columns(
- pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$Utf8),
- pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$Utf8)
+ pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$String),
+ pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$String)
)
}
\keyword{ExprStr}
diff --git a/man/ExprStr_explode.Rd b/man/ExprStr_explode.Rd
index 2f419c241..18880c287 100644
--- a/man/ExprStr_explode.Rd
+++ b/man/ExprStr_explode.Rd
@@ -7,7 +7,7 @@
ExprStr_explode()
}
\value{
-Expr: Series of dtype Utf8.
+Expr: Series of dtype String.
}
\description{
Returns a column with a separate row for every string character
diff --git a/man/ExprStr_extract.Rd b/man/ExprStr_extract.Rd
index 7fc2502a8..a27009bb6 100644
--- a/man/ExprStr_extract.Rd
+++ b/man/ExprStr_extract.Rd
@@ -13,7 +13,7 @@ ExprStr_extract(pattern, group_index)
pattern, first group begin at index 1 (default).}
}
\value{
-Utf8 array. Contains null if original value is null or regex capture nothing.
+String array. Contains null if original value is null or regex capture nothing.
}
\description{
Extract the target capture group from provided patterns
diff --git a/man/ExprStr_extract_all.Rd b/man/ExprStr_extract_all.Rd
index fa31066d7..653800915 100644
--- a/man/ExprStr_extract_all.Rd
+++ b/man/ExprStr_extract_all.Rd
@@ -10,7 +10,7 @@ ExprStr_extract_all(pattern)
\item{pattern}{A valid regex pattern}
}
\value{
-\code{List[Utf8]} array. Contain null if original value is null or regex capture
+\code{List[String]} array. Contain null if original value is null or regex capture
nothing.
}
\description{
diff --git a/man/ExprStr_json_path_match.Rd b/man/ExprStr_json_path_match.Rd
index 880311430..b56c7c3be 100644
--- a/man/ExprStr_json_path_match.Rd
+++ b/man/ExprStr_json_path_match.Rd
@@ -10,7 +10,7 @@ ExprStr_json_path_match(json_path)
\item{json_path}{A valid JSON path query string.}
}
\value{
-Utf8 array. Contain null if original value is null or the json_path
+String array. Contain null if original value is null or the json_path
return nothing.
}
\description{
@@ -18,7 +18,7 @@ Extract the first match of JSON string with the provided JSONPath expression
}
\details{
Throw errors if encounter invalid JSON strings. All return value will be
-cast to Utf8 regardless of the original value.
+cast to String regardless of the original value.
Documentation on JSONPath standard can be found here: \url{https://goessner.net/articles/JsonPath/}.
}
diff --git a/man/ExprStr_pad_end.Rd b/man/ExprStr_pad_end.Rd
index 447e8f8a0..a61abeac4 100644
--- a/man/ExprStr_pad_end.Rd
+++ b/man/ExprStr_pad_end.Rd
@@ -12,7 +12,7 @@ ExprStr_pad_end(width, fillchar = " ")
\item{fillchar}{Fill with this ASCII character.}
}
\value{
-Expr of Utf8
+Expr of String
}
\description{
Return the string left justified in a string of length \code{width}.
diff --git a/man/ExprStr_pad_start.Rd b/man/ExprStr_pad_start.Rd
index 5fae7980c..8176d5059 100644
--- a/man/ExprStr_pad_start.Rd
+++ b/man/ExprStr_pad_start.Rd
@@ -12,7 +12,7 @@ ExprStr_pad_start(width, fillchar = " ")
\item{fillchar}{Fill with this ASCII character.}
}
\value{
-Expr of Utf8
+Expr of String
}
\description{
Return the string right justified in a string of length \code{width}.
diff --git a/man/ExprStr_replace.Rd b/man/ExprStr_replace.Rd
index d4ec1d8f7..d458f2ae6 100644
--- a/man/ExprStr_replace.Rd
+++ b/man/ExprStr_replace.Rd
@@ -14,7 +14,7 @@ ExprStr_replace(pattern, value, literal = FALSE)
\item{literal}{Treat pattern as a literal string.}
}
\value{
-Expr of Utf8 Series
+Expr of String Series
}
\description{
Replace first matching regex/literal substring with a new string value
diff --git a/man/ExprStr_replace_all.Rd b/man/ExprStr_replace_all.Rd
index 56d0a1d41..36e48d7e0 100644
--- a/man/ExprStr_replace_all.Rd
+++ b/man/ExprStr_replace_all.Rd
@@ -14,7 +14,7 @@ ExprStr_replace_all(pattern, value, literal = FALSE)
\item{literal}{Treat pattern as a literal string.}
}
\value{
-Expr of Utf8 Series
+Expr of String Series
}
\description{
Replace all matching regex/literal substrings with a new string value
diff --git a/man/ExprStr_slice.Rd b/man/ExprStr_slice.Rd
index 4d6bc3a4a..a8a1ef2ab 100644
--- a/man/ExprStr_slice.Rd
+++ b/man/ExprStr_slice.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/expr__string.R
\name{ExprStr_slice}
\alias{ExprStr_slice}
-\title{Create subslices of the string values of a Utf8 Series}
+\title{Create subslices of the string values of a String Series}
\usage{
ExprStr_slice(offset, length = NULL)
}
@@ -13,10 +13,10 @@ ExprStr_slice(offset, length = NULL)
the end of the string.}
}
\value{
-Expr: Series of dtype Utf8.
+Expr: Series of dtype String.
}
\description{
-Create subslices of the string values of a Utf8 Series
+Create subslices of the string values of a String Series
}
\examples{
df = pl$DataFrame(s = c("pear", NA, "papaya", "dragonfruit"))
diff --git a/man/ExprStr_split.Rd b/man/ExprStr_split.Rd
index bdbac79dc..9cb6123d0 100644
--- a/man/ExprStr_split.Rd
+++ b/man/ExprStr_split.Rd
@@ -13,7 +13,7 @@ used to split the string.}
\item{inclusive}{If \code{TRUE}, include the split character/string in the results.}
}
\value{
-List of Utf8 type
+List of String type
}
\description{
Split the string by a substring
diff --git a/man/ExprStr_split_exact.Rd b/man/ExprStr_split_exact.Rd
index 2d7df2417..c6f3405a6 100644
--- a/man/ExprStr_split_exact.Rd
+++ b/man/ExprStr_split_exact.Rd
@@ -14,7 +14,7 @@ ExprStr_split_exact(by, n, inclusive = FALSE)
\item{inclusive}{If \code{TRUE}, include the split character/string in the results.}
}
\value{
-Struct where each of n+1 fields is of Utf8 type
+Struct where each of n+1 fields is of String type
}
\description{
This results in a struct of \code{n+1} fields. If it cannot make \code{n}
diff --git a/man/ExprStr_splitn.Rd b/man/ExprStr_splitn.Rd
index a8b9c80e8..9f504e491 100644
--- a/man/ExprStr_splitn.Rd
+++ b/man/ExprStr_splitn.Rd
@@ -12,7 +12,7 @@ ExprStr_splitn(by, n)
\item{n}{Number of splits to make.}
}
\value{
-Struct where each of \code{n} fields is of Utf8 type
+Struct where each of \code{n} fields is of String type
}
\description{
If the number of possible splits is less than \code{n-1}, the remaining field
diff --git a/man/ExprStr_strip_chars.Rd b/man/ExprStr_strip_chars.Rd
index f988f515f..8884e95c1 100644
--- a/man/ExprStr_strip_chars.Rd
+++ b/man/ExprStr_strip_chars.Rd
@@ -13,7 +13,7 @@ set of characters will be stripped. If \code{NULL} (default), all whitespace is
removed instead. This can be an Expr.}
}
\value{
-Expr of Utf8 lowercase chars
+Expr of String lowercase chars
}
\description{
Remove leading and trailing characters.
diff --git a/man/ExprStr_strip_chars_end.Rd b/man/ExprStr_strip_chars_end.Rd
index 690fe1d05..bf7b38a07 100644
--- a/man/ExprStr_strip_chars_end.Rd
+++ b/man/ExprStr_strip_chars_end.Rd
@@ -13,7 +13,7 @@ set of characters will be stripped. If \code{NULL} (default), all whitespace is
removed instead. This can be an Expr.}
}
\value{
-Expr of Utf8 lowercase chars
+Expr of String lowercase chars
}
\description{
Remove trailing characters.
diff --git a/man/ExprStr_strip_chars_start.Rd b/man/ExprStr_strip_chars_start.Rd
index d9e472020..5b87bd1ab 100644
--- a/man/ExprStr_strip_chars_start.Rd
+++ b/man/ExprStr_strip_chars_start.Rd
@@ -13,7 +13,7 @@ set of characters will be stripped. If \code{NULL} (default), all whitespace is
removed instead. This can be an Expr.}
}
\value{
-Expr of Utf8 lowercase chars
+Expr of String lowercase chars
}
\description{
Remove leading characters.
diff --git a/man/ExprStr_strptime.Rd b/man/ExprStr_strptime.Rd
index 299be8a0c..9603aebc1 100644
--- a/man/ExprStr_strptime.Rd
+++ b/man/ExprStr_strptime.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/expr__string.R
\name{ExprStr_strptime}
\alias{ExprStr_strptime}
-\title{Convert a Utf8 column into a Date/Datetime/Time column.}
+\title{Convert a String column into a Date/Datetime/Time column.}
\usage{
ExprStr_strptime(
datatype,
@@ -42,7 +42,7 @@ conversion.}
Expr of a Date, Datetime or Time Series
}
\description{
-Convert a Utf8 column into a Date/Datetime/Time column.
+Convert a String column into a Date/Datetime/Time column.
}
\details{
When parsing a Datetime the column precision will be inferred from the format
diff --git a/man/ExprStr_to_date.Rd b/man/ExprStr_to_date.Rd
index fb6e8fc47..d1ec18061 100644
--- a/man/ExprStr_to_date.Rd
+++ b/man/ExprStr_to_date.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/expr__string.R
\name{ExprStr_to_date}
\alias{ExprStr_to_date}
-\title{Convert a Utf8 column into a Date column}
+\title{Convert a String column into a Date column}
\usage{
ExprStr_to_date(format = NULL, strict = TRUE, exact = TRUE, cache = TRUE)
}
@@ -25,7 +25,7 @@ conversion.}
Expr
}
\description{
-Convert a Utf8 column into a Date column
+Convert a String column into a Date column
}
\examples{
pl$DataFrame(str_date = c("2009-01-02", "2009-01-03", "2009-1-4", "2009 05 01"))$
diff --git a/man/ExprStr_to_datetime.Rd b/man/ExprStr_to_datetime.Rd
index 0a2027ec5..452aed605 100644
--- a/man/ExprStr_to_datetime.Rd
+++ b/man/ExprStr_to_datetime.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/expr__string.R
\name{ExprStr_to_datetime}
\alias{ExprStr_to_datetime}
-\title{Convert a Utf8 column into a Datetime column}
+\title{Convert a String column into a Datetime column}
\usage{
ExprStr_to_datetime(
format = NULL,
@@ -45,7 +45,7 @@ conversion.}
Expr
}
\description{
-Convert a Utf8 column into a Datetime column
+Convert a String column into a Datetime column
}
\examples{
pl$DataFrame(str_date = c("2009-01-02 01:00", "2009-01-03 02:00", "2009-1-4 3:00"))$
diff --git a/man/ExprStr_to_lowercase.Rd b/man/ExprStr_to_lowercase.Rd
index b66050884..a359c8614 100644
--- a/man/ExprStr_to_lowercase.Rd
+++ b/man/ExprStr_to_lowercase.Rd
@@ -7,7 +7,7 @@
ExprStr_to_lowercase()
}
\value{
-Expr of Utf8 lowercase chars
+Expr of String lowercase chars
}
\description{
Transform to lowercase variant.
diff --git a/man/ExprStr_to_time.Rd b/man/ExprStr_to_time.Rd
index 351f73a5a..0b63c5eae 100644
--- a/man/ExprStr_to_time.Rd
+++ b/man/ExprStr_to_time.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/expr__string.R
\name{ExprStr_to_time}
\alias{ExprStr_to_time}
-\title{Convert a Utf8 column into a Time column}
+\title{Convert a String column into a Time column}
\usage{
ExprStr_to_time(format = NULL, strict = TRUE, cache = TRUE)
}
@@ -22,7 +22,7 @@ conversion.}
Expr
}
\description{
-Convert a Utf8 column into a Time column
+Convert a String column into a Time column
}
\examples{
pl$DataFrame(str_time = c("01:20:01", "28:00:02", "03:00:02"))$
diff --git a/man/ExprStr_to_titlecase.Rd b/man/ExprStr_to_titlecase.Rd
index d6dd51ca8..4a53880f5 100644
--- a/man/ExprStr_to_titlecase.Rd
+++ b/man/ExprStr_to_titlecase.Rd
@@ -7,7 +7,7 @@
ExprStr_to_titlecase()
}
\value{
-Expr of Utf8 titlecase chars
+Expr of String titlecase chars
}
\description{
Transform to titlecase variant.
diff --git a/man/ExprStr_to_uppercase.Rd b/man/ExprStr_to_uppercase.Rd
index fbc8c1df2..c51324cf5 100644
--- a/man/ExprStr_to_uppercase.Rd
+++ b/man/ExprStr_to_uppercase.Rd
@@ -7,7 +7,7 @@
ExprStr_to_uppercase()
}
\value{
-Expr of Utf8 uppercase chars
+Expr of String uppercase chars
}
\description{
Transform to uppercase variant.
diff --git a/man/ExprStr_zfill.Rd b/man/ExprStr_zfill.Rd
index 457d1bdb8..477a28621 100644
--- a/man/ExprStr_zfill.Rd
+++ b/man/ExprStr_zfill.Rd
@@ -28,11 +28,11 @@ less than or equal to \code{len(s)}.
\examples{
some_floats_expr = pl$lit(c(0, 10, -5, 5))
-# cast to Utf8 and ljust alignment = 5, and view as R char vector
-some_floats_expr$cast(pl$Utf8)$str$zfill(5)$to_r()
+# cast to String and ljust alignment = 5, and view as R char vector
+some_floats_expr$cast(pl$String)$str$zfill(5)$to_r()
# cast to int and the to utf8 and then ljust alignment = 5, and view as R
# char vector
-some_floats_expr$cast(pl$Int64)$cast(pl$Utf8)$str$zfill(5)$to_r()
+some_floats_expr$cast(pl$Int64)$cast(pl$String)$str$zfill(5)$to_r()
}
\keyword{ExprStr}
diff --git a/man/Expr_explode.Rd b/man/Expr_explode.Rd
index 374e784a7..1e7873dd9 100644
--- a/man/Expr_explode.Rd
+++ b/man/Expr_explode.Rd
@@ -2,7 +2,7 @@
% Please edit documentation in R/expr__expr.R
\name{Expr_explode}
\alias{Expr_explode}
-\title{Explode a list or Utf8 Series}
+\title{Explode a list or String Series}
\usage{
Expr_explode
}
diff --git a/man/Expr_flatten.Rd b/man/Expr_flatten.Rd
index 736be03d5..bc88ccd36 100644
--- a/man/Expr_flatten.Rd
+++ b/man/Expr_flatten.Rd
@@ -3,7 +3,7 @@
\docType{data}
\name{Expr_flatten}
\alias{Expr_flatten}
-\title{Explode a list or Utf8 Series}
+\title{Explode a list or String Series}
\format{
An object of class \code{character} of length 1.
}
diff --git a/man/Expr_gather.Rd b/man/Expr_gather.Rd
index c307d6c52..01d45faf4 100644
--- a/man/Expr_gather.Rd
+++ b/man/Expr_gather.Rd
@@ -8,7 +8,7 @@ Expr_gather(indices)
}
\arguments{
\item{indices}{R scalar/vector or Series, or Expr that leads to a Series of
-dtype UInt32.}
+dtype Int64. (0-indexed)}
}
\value{
Expr
@@ -17,5 +17,7 @@ Expr
Gather values by index
}
\examples{
-pl$DataFrame(a = c(1, 2, 4, 5, 8))$select(pl$col("a")$gather(c(0, 2, 4)))
+df = pl$DataFrame(a = 1:10)
+
+df$select(pl$col("a")$gather(c(0, 2, 4, -1)))
}
diff --git a/man/Expr_map_batches.Rd b/man/Expr_map_batches.Rd
index 01c138205..592b035ab 100644
--- a/man/Expr_map_batches.Rd
+++ b/man/Expr_map_batches.Rd
@@ -56,7 +56,7 @@ pl$DataFrame(iris)$
select(
pl$col("Sepal.Length")$map_batches(\(x) {
paste("cheese", as.character(x$to_vector()))
- }, pl$dtypes$Utf8)
+ }, pl$dtypes$String)
)
# R parallel process example, use Sys.sleep() to imitate some CPU expensive
diff --git a/man/Expr_map_elements.Rd b/man/Expr_map_elements.Rd
index f1194329c..eb135efed 100644
--- a/man/Expr_map_elements.Rd
+++ b/man/Expr_map_elements.Rd
@@ -92,7 +92,7 @@ e_add10 = my_selection$map_elements(\(x) {
e_letter = my_selection$map_elements(\(x) {
letters[ceiling(x)]
-}, return_type = pl$dtypes$Utf8)$name$suffix("_letter")
+}, return_type = pl$dtypes$String)$name$suffix("_letter")
pl$DataFrame(iris)$select(e_add10, e_letter)
diff --git a/man/IO_read_csv.Rd b/man/IO_read_csv.Rd
index 8b8910f71..703dbd017 100644
--- a/man/IO_read_csv.Rd
+++ b/man/IO_read_csv.Rd
@@ -59,7 +59,7 @@ list is used while reading to overwrite dtypes. Supported types so far are:
\item "Float64" or "float64" for DataType::Float64,
\item "Int32" or "integer" for DataType::Int32,
\item "Int64" or "integer64" for DataType::Int64,
-\item "Utf8" or "character" for DataType::Utf8,
+\item "String" or "character" for DataType::String,
}}
\item{null_values}{Values to interpret as \code{NA} values. Can be:
@@ -100,7 +100,7 @@ the name is set).}
\item{try_parse_dates}{Try to automatically parse dates. Most ISO8601-like
formats can be inferred, as well as a handful of others. If this does not
-succeed, the column remains of data type \code{pl$Utf8}.}
+succeed, the column remains of data type \code{pl$String}.}
\item{eol_char}{Single byte end of line character (default: \verb{\\n}). When
encountering a file with Windows line endings (\verb{\\r\\n}), one can go with the
diff --git a/man/IO_scan_csv.Rd b/man/IO_scan_csv.Rd
index 51ec750e6..10e80dc4a 100644
--- a/man/IO_scan_csv.Rd
+++ b/man/IO_scan_csv.Rd
@@ -59,7 +59,7 @@ list is used while reading to overwrite dtypes. Supported types so far are:
\item "Float64" or "float64" for DataType::Float64,
\item "Int32" or "integer" for DataType::Int32,
\item "Int64" or "integer64" for DataType::Int64,
-\item "Utf8" or "character" for DataType::Utf8,
+\item "String" or "character" for DataType::String,
}}
\item{null_values}{Values to interpret as \code{NA} values. Can be:
@@ -100,7 +100,7 @@ the name is set).}
\item{try_parse_dates}{Try to automatically parse dates. Most ISO8601-like
formats can be inferred, as well as a handful of others. If this does not
-succeed, the column remains of data type \code{pl$Utf8}.}
+succeed, the column remains of data type \code{pl$String}.}
\item{eol_char}{Single byte end of line character (default: \verb{\\n}). When
encountering a file with Windows line endings (\verb{\\r\\n}), one can go with the
diff --git a/man/LazyFrame_explode.Rd b/man/LazyFrame_explode.Rd
index 9ae4b732e..786e875cb 100644
--- a/man/LazyFrame_explode.Rd
+++ b/man/LazyFrame_explode.Rd
@@ -21,7 +21,7 @@ This will take every element of a list column and add it on an
additional row.
}
\details{
-Only columns of DataType \code{List} or \code{Utf8} can be exploded.
+Only columns of DataType \code{List} or \code{String} can be exploded.
Named expressions like \verb{$explode(a = pl$col("b"))} will not implicitly trigger
\verb{$alias("a")} here, due to only variant \code{Expr::Column} is supported in
diff --git a/man/RField_class.Rd b/man/RField_class.Rd
index 037210929..a1ca78468 100644
--- a/man/RField_class.Rd
+++ b/man/RField_class.Rd
@@ -22,5 +22,5 @@ datatypes and Schemas to represent everything of the Series/Column except the
raw values.
}
\examples{
-pl$Field("city_names", pl$Utf8)
+pl$Field("city_names", pl$String)
}
diff --git a/man/RField_datatype.Rd b/man/RField_datatype.Rd
index c7c80f1cf..d7c73ae1d 100644
--- a/man/RField_datatype.Rd
+++ b/man/RField_datatype.Rd
@@ -10,7 +10,7 @@ RField_datatype()
Get/set Field datatype
}
\examples{
-field = pl$Field("Cities", pl$Utf8)
+field = pl$Field("Cities", pl$String)
field$datatype
field$datatype = pl$Categorical #<- is fine too
diff --git a/man/RField_name.Rd b/man/RField_name.Rd
index 4753528be..2f4f931f8 100644
--- a/man/RField_name.Rd
+++ b/man/RField_name.Rd
@@ -10,7 +10,7 @@ RField_name()
Get/set Field name
}
\examples{
-field = pl$Field("Cities", pl$Utf8)
+field = pl$Field("Cities", pl$String)
field$name
field$name = "CityPoPulations" #<- is fine too
diff --git a/man/Series_map_elements.Rd b/man/Series_map_elements.Rd
index 8573358a1..08d960751 100644
--- a/man/Series_map_elements.Rd
+++ b/man/Series_map_elements.Rd
@@ -30,7 +30,7 @@ About as slow as regular non-vectorized R. Similar to using R sapply on a vector
\examples{
s = pl$Series(letters[1:5], "ltrs")
f = \(x) paste(x, ":", as.integer(charToRaw(x)))
-s$map_elements(f, pl$Utf8)
+s$map_elements(f, pl$String)
# same as
pl$Series(sapply(s$to_r(), f), s$name)
diff --git a/man/Series_to_r.Rd b/man/Series_to_r.Rd
index 2072eae90..2a8ce2fd5 100644
--- a/man/Series_to_r.Rd
+++ b/man/Series_to_r.Rd
@@ -32,11 +32,10 @@ Thus every leaf(non list type) will be placed on the same depth of the tree, and
}
\examples{
-# make polars Series_Utf8
series_vec = pl$Series(letters[1:3])
# Series_non_list
-series_vec$to_r() # as vector because Series DataType is not list (is Utf8)
+series_vec$to_r() # as vector because Series DataType is not list (is String)
series_vec$to_r_list() # implicit call as.list(), convert to list
series_vec$to_vector() # implicit call unlist(), same as to_r() as already vector
diff --git a/man/pl_DataFrame.Rd b/man/pl_DataFrame.Rd
index 9c581ae90..727e0fa46 100644
--- a/man/pl_DataFrame.Rd
+++ b/man/pl_DataFrame.Rd
@@ -49,6 +49,6 @@ pl$DataFrame(list(
pl$DataFrame(mtcars)
# custom schema
-pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8))
+pl$DataFrame(iris, schema = list(Sepal.Length = pl$Float32, Species = pl$String))
}
\keyword{DataFrame_new}
diff --git a/man/pl_LazyFrame.Rd b/man/pl_LazyFrame.Rd
index 83b0edf61..fcce955cd 100644
--- a/man/pl_LazyFrame.Rd
+++ b/man/pl_LazyFrame.Rd
@@ -36,7 +36,7 @@ pl$LazyFrame(list(
# custom schema
pl$LazyFrame(
iris,
- schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
+ schema = list(Sepal.Length = pl$Float32, Species = pl$String)
)$collect()
}
\keyword{LazyFrame_new}
diff --git a/man/pl_col.Rd b/man/pl_col.Rd
index 4b52a6ee7..184e61ed1 100644
--- a/man/pl_col.Rd
+++ b/man/pl_col.Rd
@@ -49,7 +49,7 @@ df$select(pl$col("^foo.*$"))
df$select(pl$col(pl$dtypes$Float64))
# ... or an R list of DataTypes, select any column of any such DataType
-df$select(pl$col(list(pl$dtypes$Float64, pl$dtypes$Utf8)))
+df$select(pl$col(list(pl$dtypes$Float64, pl$dtypes$String)))
# from Series of names
df$select(pl$col(pl$Series(c("bar", "foobar"))))
diff --git a/man/pl_concat_str.Rd b/man/pl_concat_str.Rd
index 1362f7a63..2053bd44d 100644
--- a/man/pl_concat_str.Rd
+++ b/man/pl_concat_str.Rd
@@ -9,7 +9,7 @@ pl_concat_str(..., separator = "")
\arguments{
\item{...}{Columns to concatenate into a single string column. Accepts
expressions. Strings are parsed as column names, other non-expression inputs
-are parsed as literals. Non-Utf8 columns are cast to Utf8.}
+are parsed as literals. Non-String columns are cast to String}
\item{separator}{String that will be used to separate the values of each
column.}
diff --git a/man/pl_dtypes.Rd b/man/pl_dtypes.Rd
index c12619594..9eff6173c 100644
--- a/man/pl_dtypes.Rd
+++ b/man/pl_dtypes.Rd
@@ -12,14 +12,14 @@ not applicable
\examples{
print(ls(pl$dtypes))
pl$dtypes$Float64
-pl$dtypes$Utf8
+pl$dtypes$String
pl$List(pl$List(pl$UInt64))
-pl$Struct(pl$Field("CityNames", pl$Utf8))
+pl$Struct(pl$Field("CityNames", pl$String))
-# The function changes type from Integer(Int32)[Integers] to char(Utf8)[Strings]
-# specifying the output DataType: Utf8 solves the problem
-pl$Series(1:4)$map_elements(\(x) letters[x], datatype = pl$dtypes$Utf8)
+# The function changes type from Int32 to String
+# Specifying the output DataType: String solves the problem
+pl$Series(1:4)$map_elements(\(x) letters[x], datatype = pl$dtypes$String)
}
diff --git a/man/pl_from_arrow.Rd b/man/pl_from_arrow.Rd
index ca4434913..a49f4a96a 100644
--- a/man/pl_from_arrow.Rd
+++ b/man/pl_from_arrow.Rd
@@ -37,7 +37,7 @@ import Arrow Table or Array
\examples{
pl$from_arrow(
data = arrow::arrow_table(iris),
- schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
+ schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$String)
)
char_schema = names(iris)
diff --git a/man/pl_pl.Rd b/man/pl_pl.Rd
index 0314de1ab..a485ff4b7 100644
--- a/man/pl_pl.Rd
+++ b/man/pl_pl.Rd
@@ -6,7 +6,7 @@
\alias{pl}
\title{The complete polars public API.}
\format{
-An object of class \code{pl_polars_env} (inherits from \code{environment}) of length 95.
+An object of class \code{pl_polars_env} (inherits from \code{environment}) of length 96.
}
\usage{
pl
diff --git a/man/pl_struct.Rd b/man/pl_struct.Rd
index 91b392428..b66f1fc54 100644
--- a/man/pl_struct.Rd
+++ b/man/pl_struct.Rd
@@ -54,13 +54,13 @@ print(df$schema) # returns a schema, a named list containing one element a Struc
# wrap two columns in a struct and provide a schema to set all or some DataTypes by name
e1 = pl$struct(
pl$col(c("int", "str")),
- schema = list(int = pl$Int64, str = pl$Utf8)
+ schema = list(int = pl$Int64, str = pl$String)
)$alias("my_struct")
# same result as e.g. wrapping the columns in a struct and casting afterwards
e2 = pl$struct(
list(pl$col("int"), pl$col("str"))
)$cast(
- pl$Struct(int = pl$Int64, str = pl$Utf8)
+ pl$Struct(int = pl$Int64, str = pl$String)
)$alias("my_struct")
df = pl$DataFrame(
diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock
index 01c8b4e25..3500f9f33 100644
--- a/src/rust/Cargo.lock
+++ b/src/rust/Cargo.lock
@@ -1382,8 +1382,8 @@ dependencies = [
[[package]]
name = "polars"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"getrandom",
"polars-core",
@@ -1398,8 +1398,8 @@ dependencies = [
[[package]]
name = "polars-arrow"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"ahash",
"arrow-format",
@@ -1434,8 +1434,8 @@ dependencies = [
[[package]]
name = "polars-compute"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"bytemuck",
"num-traits",
@@ -1446,8 +1446,8 @@ dependencies = [
[[package]]
name = "polars-core"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"ahash",
"bitflags 2.4.1",
@@ -1481,8 +1481,8 @@ dependencies = [
[[package]]
name = "polars-error"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"arrow-format",
"avro-schema",
@@ -1493,8 +1493,8 @@ dependencies = [
[[package]]
name = "polars-io"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"ahash",
"async-trait",
@@ -1534,8 +1534,8 @@ dependencies = [
[[package]]
name = "polars-json"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"ahash",
"chrono",
@@ -1554,8 +1554,8 @@ dependencies = [
[[package]]
name = "polars-lazy"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"ahash",
"bitflags 2.4.1",
@@ -1577,8 +1577,8 @@ dependencies = [
[[package]]
name = "polars-ops"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"ahash",
"aho-corasick",
@@ -1613,8 +1613,8 @@ dependencies = [
[[package]]
name = "polars-parquet"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"ahash",
"async-stream",
@@ -1638,8 +1638,8 @@ dependencies = [
[[package]]
name = "polars-pipe"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"crossbeam-channel",
"crossbeam-queue",
@@ -1661,8 +1661,8 @@ dependencies = [
[[package]]
name = "polars-plan"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"ahash",
"bytemuck",
@@ -1688,8 +1688,8 @@ dependencies = [
[[package]]
name = "polars-row"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"polars-arrow",
"polars-error",
@@ -1698,8 +1698,8 @@ dependencies = [
[[package]]
name = "polars-sql"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"polars-arrow",
"polars-core",
@@ -1714,8 +1714,8 @@ dependencies = [
[[package]]
name = "polars-time"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"atoi",
"chrono",
@@ -1734,8 +1734,8 @@ dependencies = [
[[package]]
name = "polars-utils"
-version = "0.35.4"
-source = "git+https://github.com/pola-rs/polars.git?rev=4046c732dec0c9311294a2589590b4d017c5a02a#4046c732dec0c9311294a2589590b4d017c5a02a"
+version = "0.36.2"
+source = "git+https://github.com/pola-rs/polars.git?rev=fa59ffc1685043b44476dcb2a3f3804460ead5c5#fa59ffc1685043b44476dcb2a3f3804460ead5c5"
dependencies = [
"ahash",
"bytemuck",
@@ -1776,7 +1776,7 @@ dependencies = [
[[package]]
name = "r-polars"
-version = "0.35.2"
+version = "0.36.0"
dependencies = [
"either",
"extendr-api",
diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml
index 24149d8f5..09d2f2063 100644
--- a/src/rust/Cargo.toml
+++ b/src/rust/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "r-polars"
-version = "0.35.2"
+version = "0.36.0"
edition = "2021"
rust-version = "1.73"
publish = false
@@ -49,8 +49,8 @@ serde_json = "*"
smartstring = "1.0.1"
state = "0.6.0"
thiserror = "1.0.56"
-polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "4046c732dec0c9311294a2589590b4d017c5a02a", default-features = false }
-polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "4046c732dec0c9311294a2589590b4d017c5a02a", default-features = false }
+polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "fa59ffc1685043b44476dcb2a3f3804460ead5c5", default-features = false }
+polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "fa59ffc1685043b44476dcb2a3f3804460ead5c5", default-features = false }
either = "1"
#features copied from node-polars
@@ -145,4 +145,4 @@ features = [
"zip_with",
]
git = "https://github.com/pola-rs/polars.git"
-rev = "4046c732dec0c9311294a2589590b4d017c5a02a"
+rev = "fa59ffc1685043b44476dcb2a3f3804460ead5c5"
diff --git a/src/rust/src/conversion_s_to_r.rs b/src/rust/src/conversion_s_to_r.rs
index ecf8f1887..291be3aad 100644
--- a/src/rust/src/conversion_s_to_r.rs
+++ b/src/rust/src/conversion_s_to_r.rs
@@ -94,7 +94,7 @@ pub fn pl_series_to_list(
.map(|opt| opt.map(|val| val as f64))
.collect_robj()
}),
- Utf8 => s.utf8().map(|ca| ca.into_iter().collect_robj()),
+ String => s.str().map(|ca| ca.into_iter().collect_robj()),
Boolean => s.bool().map(|ca| ca.into_iter().collect_robj()),
Binary => s.binary().map(|ca| {
diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs
index f1cb66fcc..98754df7c 100644
--- a/src/rust/src/lazy/dsl.rs
+++ b/src/rust/src/lazy/dsl.rs
@@ -19,7 +19,7 @@ use extendr_api::{extendr, prelude::*, rprintln, Deref, DerefMut, Rinternals};
use pl::PolarsError as pl_error;
use pl::{
BinaryNameSpaceImpl, Duration, DurationMethods, IntoSeries, RollingGroupOptions,
- TemporalMethods, Utf8NameSpaceImpl,
+ StringNameSpaceImpl, TemporalMethods,
};
use polars::lazy::dsl;
use polars::prelude as pl;
@@ -133,7 +133,7 @@ impl RPolarsExpr {
}
(Rtype::Strings, 1) => {
if robj.is_na() {
- Ok(dsl::lit(pl::NULL).cast(pl::DataType::Utf8))
+ Ok(dsl::lit(pl::NULL).cast(pl::DataType::String))
} else {
Ok(dsl::lit(robj.as_str().unwrap()))
}
@@ -299,7 +299,11 @@ impl RPolarsExpr {
}
pub fn gather(&self, idx: Robj) -> RResult {
- Ok(self.clone().0.gather(robj_to!(PLExpr, idx)?).into())
+ Ok(self
+ .clone()
+ .0
+ .gather(robj_to!(PLExpr, idx)?.cast(pl::DataType::Int64))
+ .into())
}
pub fn sort_by(&self, by: Robj, descending: Robj) -> RResult {
@@ -919,7 +923,7 @@ impl RPolarsExpr {
move |s| {
//swap owned inline string to str as only supported and if swapped here life time is long enough
let av = match &av {
- pl::AnyValue::Utf8Owned(x) => pl::AnyValue::Utf8(x.as_str()),
+ pl::AnyValue::StringOwned(x) => pl::AnyValue::String(x.as_str()),
x => x.clone(),
};
s.extend_constant(av, n).map(Some)
@@ -1833,7 +1837,7 @@ impl RPolarsExpr {
pub fn str_len_bytes(&self) -> Self {
use pl::*;
let function = |s: pl::Series| {
- let ca = s.utf8()?;
+ let ca = s.str()?;
Ok(Some(ca.str_len_bytes().into_series()))
};
self.clone()
@@ -1845,7 +1849,7 @@ impl RPolarsExpr {
pub fn str_len_chars(&self) -> Self {
let function = |s: pl::Series| {
- let ca = s.utf8()?;
+ let ca = s.str()?;
Ok(Some(ca.str_len_chars().into_series()))
};
self.clone()
@@ -1950,7 +1954,7 @@ impl RPolarsExpr {
use pl::*;
let pat: String = robj_to!(String, pat, "in str$json_path_match: {}")?;
let function = move |s: Series| {
- let ca = s.utf8()?;
+ let ca = s.str()?;
match ca.json_path_match(&pat) {
Ok(ca) => Ok(Some(ca.into_series())),
Err(e) => Err(pl::PolarsError::ComputeError(format!("{e:?}").into())),
@@ -1959,7 +1963,7 @@ impl RPolarsExpr {
Ok(RPolarsExpr(
self.0
.clone()
- .map(function, pl::GetOutput::from_type(pl::DataType::Utf8))
+ .map(function, pl::GetOutput::from_type(pl::DataType::String))
.with_fmt("str.json_path_match"),
))
}();
@@ -1977,65 +1981,38 @@ impl RPolarsExpr {
.into())
}
- pub fn str_hex_encode(&self) -> Self {
- use pl::*;
- self.clone()
- .0
- .map(
- move |s| s.utf8().map(|s| Some(s.hex_encode().into_series())),
- pl::GetOutput::same_type(),
- )
- .with_fmt("str.hex_encode")
- .into()
+ pub fn str_hex_encode(&self) -> RResult {
+ Ok(self.0.clone().str().hex_encode().into())
}
- pub fn str_hex_decode(&self, strict: bool) -> Self {
- use pl::*;
- self.clone()
+ pub fn str_hex_decode(&self, strict: Robj) -> RResult {
+ Ok(self
.0
- .map(
- move |s| s.utf8()?.hex_decode(strict).map(|s| Some(s.into_series())),
- pl::GetOutput::same_type(),
- )
- .with_fmt("str.hex_decode")
- .into()
+ .clone()
+ .str()
+ .hex_decode(robj_to!(bool, strict)?)
+ .into())
}
- pub fn str_base64_encode(&self) -> Self {
- use pl::*;
- self.clone()
- .0
- .map(
- move |s| s.utf8().map(|s| Some(s.base64_encode().into_series())),
- pl::GetOutput::same_type(),
- )
- .with_fmt("str.base64_encode")
- .into()
+ pub fn str_base64_encode(&self) -> RResult {
+ Ok(self.0.clone().str().base64_encode().into())
}
- pub fn str_base64_decode(&self, strict: bool) -> Self {
- use pl::*;
- self.clone()
+ pub fn str_base64_decode(&self, strict: Robj) -> RResult {
+ Ok(self
.0
- .map(
- move |s| {
- s.utf8()?
- .base64_decode(strict)
- .map(|s| Some(s.into_series()))
- },
- pl::GetOutput::same_type(),
- )
- .with_fmt("str.base64_decode")
- .into()
+ .clone()
+ .str()
+ .base64_decode(robj_to!(bool, strict)?)
+ .into())
}
- pub fn str_extract(&self, pattern: Robj, group_index: Robj) -> List {
- let res = || -> Result {
- let pat = robj_to!(String, pattern)?;
- let gi = robj_to!(usize, group_index)?;
- Ok(self.0.clone().str().extract(pat.as_str(), gi).into())
- }()
- .map_err(|err| format!("in str$extract: {}", err));
- r_result_list(res)
+ pub fn str_extract(&self, pattern: Robj, group_index: Robj) -> RResult {
+ Ok(self
+ .0
+ .clone()
+ .str()
+ .extract(robj_to!(str, pattern)?, robj_to!(usize, group_index)?)
+ .into())
}
pub fn str_extract_all(&self, pattern: &RPolarsExpr) -> Self {
diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs
index 3344665d1..b4a7f91d8 100644
--- a/src/rust/src/rdatatype.rs
+++ b/src/rust/src/rdatatype.rs
@@ -73,7 +73,7 @@ impl RPolarsDataType {
"Float32" | "float32" | "double" => pl::DataType::Float32,
"Float64" | "float64" => pl::DataType::Float64,
- "Utf8" | "character" => pl::DataType::Utf8,
+ "Utf8" | "String" | "character" => pl::DataType::String,
"Binary" | "binary" => pl::DataType::Binary,
"Date" | "date" => pl::DataType::Date,
"Time" | "time" => pl::DataType::Time,
@@ -145,7 +145,7 @@ impl RPolarsDataType {
"Int64".into(),
"Float32".into(),
"Float64".into(),
- "Utf8".into(),
+ "String".into(),
"Binary".into(),
"Date".into(),
"Time".into(),
@@ -329,11 +329,11 @@ pub fn literal_to_any_value(litval: pl::LiteralValue) -> RResult Ok(av::UInt64(x)),
lv::UInt8(x) => Ok(av::UInt8(x)),
// lv::Utf8(x) => Ok(av::Utf8(x.as_str())),
- lv::Utf8(x) => {
+ lv::String(x) => {
let mut s = SString::new();
s.push_str(x.as_str());
- Ok(av::Utf8Owned(s))
+ Ok(av::StringOwned(s))
}
x => rerr().bad_val(format!("cannot convert LiteralValue {:?} to AnyValue", x)),
}
diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs
index 67997bb2e..66c054d5d 100644
--- a/src/rust/src/series.rs
+++ b/src/rust/src/series.rs
@@ -166,7 +166,7 @@ impl RPolarsSeries {
"{}",
self.0.get(index.try_into().expect("usize>u32")).unwrap()
);
- if let DataType::Utf8 | DataType::Categorical(_, _) = self.0.dtype() {
+ if let DataType::String | DataType::Categorical(_, _) = self.0.dtype() {
let v_trunc = &val[..val
.char_indices()
.take(str_length.try_into().expect("usize>u32"))
@@ -222,7 +222,7 @@ impl RPolarsSeries {
Int64 => comp!(self, other, i64, op),
Float64 => comp!(self, other, f64, op),
Boolean => comp!(self, other, bool, op),
- Utf8 => comp!(self, other, utf8, op),
+ String => comp!(self, other, str, op),
_ => Err(format!(
"oups this type: {} is not supported yet, but easily could be",
dtype
@@ -365,7 +365,7 @@ impl RPolarsSeries {
Int32 => apply_input!(self.0, i32, rfun, na_fun),
Int16 => apply_input!(self.0, i16, rfun, na_fun),
Int8 => apply_input!(self.0, i8, rfun, na_fun),
- Utf8 => apply_input!(self.0, utf8, rfun, na_fun),
+ String => apply_input!(self.0, str, rfun, na_fun),
Boolean => apply_input!(self.0, bool, rfun, na_fun),
//List(..) => apply_input!(self.0, list, rfun, na_fun),
List(..) => {
@@ -392,7 +392,7 @@ impl RPolarsSeries {
match out_type {
Float64 => apply_output!(r_iter, strict, allow_fail_eval, Doubles, Float64Chunked),
Int32 => apply_output!(r_iter, strict, allow_fail_eval, Integers, Int32Chunked),
- Utf8 => apply_output!(r_iter, strict, allow_fail_eval, Strings, Utf8Chunked),
+ String => apply_output!(r_iter, strict, allow_fail_eval, Strings, StringChunked),
Boolean => apply_output!(r_iter, strict, allow_fail_eval, Logicals, BooleanChunked),
List(..) => {
//ierate over R return values, opt if never run (no values), err if fail
diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs
index 9da0308aa..f77cac379 100644
--- a/src/rust/src/utils/mod.rs
+++ b/src/rust/src/utils/mod.rs
@@ -53,7 +53,7 @@ macro_rules! make_r_na_fun {
(i8 $rfun:expr) => {make_r_na_fun!(i32 $rfun)};
(f32 $rfun:expr) => {make_r_na_fun!(f64 $rfun)};
- (utf8 $rfun:expr) => {
+ (str $rfun:expr) => {
R!("function(f) {function() f(NA_character_)}")
.unwrap()
.as_function()
diff --git a/tests/testthat/_snaps/after-wrappers.md b/tests/testthat/_snaps/after-wrappers.md
index 3ae4c5e6c..5489a9b97 100644
--- a/tests/testthat/_snaps/after-wrappers.md
+++ b/tests/testthat/_snaps/after-wrappers.md
@@ -12,45 +12,45 @@
[13] "Int8" "LazyFrame"
[15] "List" "Null"
[17] "PTime" "SQLContext"
- [19] "Series" "Struct"
- [21] "Time" "UInt16"
- [23] "UInt32" "UInt64"
- [25] "UInt8" "Unknown"
- [27] "Utf8" "all"
- [29] "all_horizontal" "any_horizontal"
- [31] "approx_n_unique" "class_names"
- [33] "coalesce" "col"
- [35] "concat" "concat_list"
- [37] "concat_str" "corr"
- [39] "count" "cov"
- [41] "date_range" "disable_string_cache"
- [43] "dtypes" "element"
- [45] "enable_string_cache" "expr_to_r"
- [47] "extra_auto_completion" "first"
- [49] "fold" "from_arrow"
- [51] "get_global_rpool_cap" "head"
- [53] "implode" "is_schema"
- [55] "last" "lit"
- [57] "max" "max_horizontal"
- [59] "mean" "median"
- [61] "mem_address" "min"
- [63] "min_horizontal" "n_unique"
- [65] "numeric_dtypes" "options"
- [67] "polars_info" "raw_list"
- [69] "read_csv" "read_ndjson"
- [71] "read_parquet" "reduce"
- [73] "reset_options" "rolling_corr"
- [75] "rolling_cov" "same_outer_dt"
- [77] "scan_csv" "scan_ipc"
- [79] "scan_ndjson" "scan_parquet"
- [81] "select" "set_global_rpool_cap"
- [83] "set_options" "show_all_public_functions"
- [85] "show_all_public_methods" "std"
- [87] "struct" "sum"
- [89] "sum_horizontal" "tail"
- [91] "threadpool_size" "using_string_cache"
- [93] "var" "when"
- [95] "with_string_cache"
+ [19] "Series" "String"
+ [21] "Struct" "Time"
+ [23] "UInt16" "UInt32"
+ [25] "UInt64" "UInt8"
+ [27] "Unknown" "Utf8"
+ [29] "all" "all_horizontal"
+ [31] "any_horizontal" "approx_n_unique"
+ [33] "class_names" "coalesce"
+ [35] "col" "concat"
+ [37] "concat_list" "concat_str"
+ [39] "corr" "count"
+ [41] "cov" "date_range"
+ [43] "disable_string_cache" "dtypes"
+ [45] "element" "enable_string_cache"
+ [47] "expr_to_r" "extra_auto_completion"
+ [49] "first" "fold"
+ [51] "from_arrow" "get_global_rpool_cap"
+ [53] "head" "implode"
+ [55] "is_schema" "last"
+ [57] "lit" "max"
+ [59] "max_horizontal" "mean"
+ [61] "median" "mem_address"
+ [63] "min" "min_horizontal"
+ [65] "n_unique" "numeric_dtypes"
+ [67] "options" "polars_info"
+ [69] "raw_list" "read_csv"
+ [71] "read_ndjson" "read_parquet"
+ [73] "reduce" "reset_options"
+ [75] "rolling_corr" "rolling_cov"
+ [77] "same_outer_dt" "scan_csv"
+ [79] "scan_ipc" "scan_ndjson"
+ [81] "scan_parquet" "select"
+ [83] "set_global_rpool_cap" "set_options"
+ [85] "show_all_public_functions" "show_all_public_methods"
+ [87] "std" "struct"
+ [89] "sum" "sum_horizontal"
+ [91] "tail" "threadpool_size"
+ [93] "using_string_cache" "var"
+ [95] "when" "with_string_cache"
---
diff --git a/tests/testthat/test-dataframe.R b/tests/testthat/test-dataframe.R
index 9a59e2c8e..b34c2da30 100644
--- a/tests/testthat/test-dataframe.R
+++ b/tests/testthat/test-dataframe.R
@@ -144,13 +144,13 @@ test_that("get set properties", {
test_that("DataFrame, custom schema", {
df = pl$DataFrame(
iris,
- schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
+ schema = list(Sepal.Length = pl$Float32, Species = pl$String)
)
# dtypes from object are as expected
expect_true(
all(mapply(
df$dtypes,
- pl$dtypes[c("Float32", rep("Float64", 3), "Utf8")],
+ pl$dtypes[c("Float32", rep("Float64", 3), "String")],
FUN = "=="
))
)
@@ -1217,7 +1217,7 @@ test_that("transpose", {
expect_identical(
pl$DataFrame(iris)$
with_columns(pl$col("Species")$
- cast(pl$Utf8))$
+ cast(pl$String))$
transpose(FALSE)$
to_data_frame(),
df_expected
diff --git a/tests/testthat/test-datatype.R b/tests/testthat/test-datatype.R
index 8f3041ffc..85fa5b247 100644
--- a/tests/testthat/test-datatype.R
+++ b/tests/testthat/test-datatype.R
@@ -66,3 +66,9 @@ test_that("POSIXct data conversion", {
# POSIXct is converted to datetime[ms], so sub-ms precision is lost
expect_identical(pl$lit(x)$to_r(), as.POSIXct(c("2020-01-01 13:45:48.343", "2020-01-01 13:45:48.343"), tz = "UTC"))
})
+
+test_that("String and Utf8 are identical", {
+ string = pl$DataFrame(x = "a", schema = list(x = pl$String))$to_data_frame()
+ utf8 = pl$DataFrame(x = "a", schema = list(x = pl$Utf8))$to_data_frame()
+ expect_identical(string, utf8)
+})
diff --git a/tests/testthat/test-expr_binary.R b/tests/testthat/test-expr_binary.R
index 19789cabd..d1b56f93f 100644
--- a/tests/testthat/test-expr_binary.R
+++ b/tests/testthat/test-expr_binary.R
@@ -61,7 +61,7 @@ test_that("bin$encode and bin$decode", {
c("hex_decoded")
)$select(
pl$lit(
- pl$col("hex_decoded")$cast(pl$Utf8)
+ pl$col("hex_decoded")$cast(pl$String)
)
)$to_list()
@@ -71,7 +71,7 @@ test_that("bin$encode and bin$decode", {
c("base64_decoded")
)$select(
pl$lit(
- pl$col("base64_decoded")$cast(pl$Utf8)
+ pl$col("base64_decoded")$cast(pl$String)
)
)$to_list()
diff --git a/tests/testthat/test-expr_expr.R b/tests/testthat/test-expr_expr.R
index 58e44f3be..fe4c15819 100644
--- a/tests/testthat/test-expr_expr.R
+++ b/tests/testthat/test-expr_expr.R
@@ -501,10 +501,10 @@ test_that("to_physical + cast", {
df
- # cast error raised for Utf8 to Boolean
+ # cast error raised for String to Boolean
expect_error(
pl$DataFrame(iris)$with_columns(
- pl$col("Species")$cast(pl$dtypes$Utf8)$cast(pl$dtypes$Boolean)
+ pl$col("Species")$cast(pl$dtypes$String)$cast(pl$dtypes$Boolean)
)
)
@@ -1054,12 +1054,18 @@ test_that("gather that", {
c(1L, 3L, 5L, NA_integer_)
)
+ expect_identical(
+ pl$select(pl$lit(1:6)$gather(c(0, -1)))$to_list()[[1L]],
+ c(1L, 6L)
+ )
+
expect_error(
pl$select(pl$lit(0:10)$gather(11))$to_list()[[1L]]
)
- expect_error(
- pl$select(pl$lit(0:10)$gather(-5))$to_list()[[1L]]
+ expect_identical(
+ pl$select(pl$lit(0:10)$gather(-5))$to_list()[[1L]],
+ 6L
)
})
@@ -1522,7 +1528,7 @@ test_that("hash + reinterpret", {
hash_values1 = unname(unlist(df$select(pl$col(c("Sepal.Width", "Species"))$unique()$hash()$implode())$to_list()))
hash_values2 = unname(unlist(df$select(pl$col(c("Sepal.Width", "Species"))$unique()$hash(1, 2, 3, 4)$implode())$to_list()))
- hash_values3 = unname((df$select(pl$col(c("Sepal.Width", "Species"))$unique()$hash(1, 2, 3, 4)$implode()$cast(pl$List(pl$Utf8)))$to_list()))
+ hash_values3 = unname((df$select(pl$col(c("Sepal.Width", "Species"))$unique()$hash(1, 2, 3, 4)$implode()$cast(pl$List(pl$String)))$to_list()))
expect_true(!any(duplicated(hash_values1)))
expect_true(!any(sapply(hash_values3, \(x) any(duplicated(x)))))
@@ -2112,7 +2118,7 @@ test_that("ewm_", {
test_that("extend_constant", {
expect_identical(
pl$lit(c("5", "Bob_is_not_a_number"))
- $cast(pl$dtypes$Utf8, strict = FALSE)
+ $cast(pl$dtypes$String, strict = FALSE)
$extend_constant("chuchu", 2)$to_r(),
c("5", "Bob_is_not_a_number", "chuchu", "chuchu")
)
@@ -2276,7 +2282,7 @@ test_that("shrink_dtype", {
expect_true(all(mapply(
df$dtypes,
- pl$dtypes[c("Int8", "Int64", "Int32", "Int8", "Int16", "Utf8", "Float32", "Boolean")],
+ pl$dtypes[c("Int8", "Int64", "Int32", "Int8", "Int16", "String", "Float32", "Boolean")],
FUN = function(actual, expected) actual == expected
)))
})
diff --git a/tests/testthat/test-expr_string.R b/tests/testthat/test-expr_string.R
index ba283ca8f..05270eb0d 100644
--- a/tests/testthat/test-expr_string.R
+++ b/tests/testthat/test-expr_string.R
@@ -400,15 +400,15 @@ test_that("encode decode", {
pl$col("strings")$str$encode("base64")$alias("base64"), # notice DataType is not encoded
pl$col("strings")$str$encode("hex")$alias("hex") # ... and must restored with cast
)$with_columns(
- pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$Utf8),
- pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$Utf8)
+ pl$col("base64")$str$decode("base64")$alias("base64_decoded")$cast(pl$String),
+ pl$col("hex")$str$decode("hex")$alias("hex_decoded")$cast(pl$String)
)$to_list()
expect_identical(l$strings, l$base64_decoded)
expect_identical(l$strings, l$hex_decoded)
expect_identical(
- pl$lit("?")$str$decode("base64", strict = FALSE)$cast(pl$Utf8)$to_r(),
+ pl$lit("?")$str$decode("base64", strict = FALSE)$cast(pl$String)$to_r(),
NA_character_
)
@@ -442,7 +442,7 @@ test_that("str$extract", {
expect_grepl_error(
pl$lit("abc")$str$extract(42, 42),
- "String"
+ "str"
)
expect_true(
diff --git a/tests/testthat/test-from_arrow.R b/tests/testthat/test-from_arrow.R
index c83d6de04..1707242c7 100644
--- a/tests/testthat/test-from_arrow.R
+++ b/tests/testthat/test-from_arrow.R
@@ -72,7 +72,7 @@ test_that("from_arrow", {
# use schema override
df = pl$from_arrow(
arrow::arrow_table(iris),
- schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
+ schema_overrides = list(Sepal.Length = pl$Float32, Species = pl$String)
)
iris_str = iris
iris_str$Species = as.character(iris_str$Species)
diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R
index f421c88e9..c8423c983 100644
--- a/tests/testthat/test-lazy.R
+++ b/tests/testthat/test-lazy.R
@@ -33,14 +33,14 @@ test_that("create LazyFrame", {
test_that("LazyFrame, custom schema", {
df = pl$LazyFrame(
iris,
- schema = list(Sepal.Length = pl$Float32, Species = pl$Utf8)
+ schema = list(Sepal.Length = pl$Float32, Species = pl$String)
)$collect()
# dtypes from object are as expected
expect_true(
all(mapply(
df$dtypes,
- pl$dtypes[c("Float32", rep("Float64", 3), "Utf8")],
+ pl$dtypes[c("Float32", rep("Float64", 3), "String")],
FUN = "=="
))
)
diff --git a/tests/testthat/test-series.R b/tests/testthat/test-series.R
index 9f7c4d1b1..70a330338 100644
--- a/tests/testthat/test-series.R
+++ b/tests/testthat/test-series.R
@@ -457,7 +457,7 @@ test_that("Series list", {
s = pl$Series(l)
# check data_type
- expect_true(s$dtype == with(pl, List(List(List(Utf8)))))
+ expect_true(s$dtype == with(pl, List(List(List(String)))))
# flatten 3-levels and return to R
# TODO CONTRIBUTE POLARS this is a bug, when flattening an empty list, it should not give a null
@@ -532,5 +532,5 @@ patrick::with_parameters_test_that("mean, median, std, var",
test_that("n_unique", {
x = c(1:4, NA, NaN, 1) # 6 unique one repeated
expect_identical(pl$Series(x)$n_unique(), 6)
- expect_grepl_error(pl$Series(c())$n_unique(), "operation not supported for dtype")
+ expect_identical(pl$Series(c())$n_unique(), 0)
})