diff --git a/NEWS.md b/NEWS.md index 9a1ecedf7..56ae7ce5b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,7 @@ ### Breaking changes -- Updated rust-polars to unreleased version (> 0.40.0) (#1104, #1110): +- Updated rust-polars to unreleased version (> 0.40.0) (#1104, #1110, #1117): - In `$join()`, there is a new argument `coalesce` and the `how` options now accept `"full"` instead of `"outer"` and `"outer_coalesce"`. - `$top_k()` and `$bottom_k()` gain three arguments `nulls_last`, @@ -26,7 +26,10 @@ - In all functions accepting optimization parameter (such as `projection_pushdown`), there is a new parameter `cluster_with_columns` to combine sequential independent calls to `$with_columns()`. - + - `$str$expload()` is removed. + - The `check_sorted` argument is removed from `$rolling()` and `$group_by_dynamic()`. + Sortedness is now verified in a quick manner, so this argument is no longer needed + (pola-rs/polars#16494). - As warned in v0.16.0, the order of arguments in `pl$Series` is changed (#1071). The first argument is now `name`, and the second argument is `values`. - `$to_struct()` on an Expr is removed. This method is now only available for diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 95a065f98..18e613589 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -2127,11 +2127,10 @@ DataFrame_rolling = function( period, offset = NULL, closed = "right", - group_by = NULL, - check_sorted = TRUE) { + group_by = NULL) { period = parse_as_polars_duration_string(period) offset = parse_as_polars_duration_string(offset) %||% negate_duration_string(period) - construct_rolling_group_by(self, index_column, period, offset, closed, group_by, check_sorted) + construct_rolling_group_by(self, index_column, period, offset, closed, group_by) } #' @inherit LazyFrame_group_by_dynamic title description details params @@ -2211,14 +2210,13 @@ DataFrame_group_by_dynamic = function( closed = "left", label = "left", group_by = NULL, - start_by = "window", - check_sorted = TRUE) { + start_by = "window") { every = parse_as_polars_duration_string(every) offset = parse_as_polars_duration_string(offset) %||% negate_duration_string(every) period = parse_as_polars_duration_string(period) %||% every construct_group_by_dynamic( self, index_column, every, period, offset, include_boundaries, closed, label, - group_by, start_by, check_sorted + group_by, start_by ) } diff --git a/R/expr__expr.R b/R/expr__expr.R index ae4708d85..c68d56442 100644 --- a/R/expr__expr.R +++ b/R/expr__expr.R @@ -3282,10 +3282,6 @@ Expr_peak_max = function() { #' See the `Polars duration string language` section for details. #' @param closed Define which sides of the temporal interval are closed #' (inclusive). This can be either `"left"`, `"right"`, `"both"` or `"none"`. -#' @param check_sorted Check whether data is actually sorted. Checking it is -#' expensive so if you are sure the data within the `index_column` is sorted, you -#' can set this to `FALSE` but note that if the data actually is unsorted, it -#' will lead to incorrect output. #' #' @inheritSection polars_duration_string Polars duration string language #' @return Expr @@ -3319,11 +3315,10 @@ Expr_rolling = function( ..., period, offset = NULL, - closed = "right", - check_sorted = TRUE) { + closed = "right") { period = parse_as_polars_duration_string(period) offset = parse_as_polars_duration_string(offset) %||% negate_duration_string(period) - .pr$Expr$rolling(self, index_column, period, offset, closed, check_sorted) |> + .pr$Expr$rolling(self, index_column, period, offset, closed) |> unwrap("in $rolling():") } diff --git a/R/expr__string.R b/R/expr__string.R index 8df916eba..2d858d9a5 100644 --- a/R/expr__string.R +++ b/R/expr__string.R @@ -853,18 +853,6 @@ ExprStr_slice = function(offset, length = NULL) { unwrap("in $str$slice():") } -#' Returns a column with a separate row for every string character -#' -#' @keywords ExprStr -#' @return Expr: Series of dtype String. -#' @examples -#' df = pl$DataFrame(a = c("foo", "bar")) -#' df$select(pl$col("a")$str$explode()) -ExprStr_explode = function() { - .pr$Expr$str_explode(self) |> - unwrap("in $str$explode():") -} - #' Convert a String column into an Int64 column with base radix #' diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index a89c7c583..b961e6840 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -1084,8 +1084,6 @@ RPolarsExpr$str_replace_all <- function(pat, value, literal) .Call(wrap__RPolars RPolarsExpr$str_slice <- function(offset, length) .Call(wrap__RPolarsExpr__str_slice, self, offset, length) -RPolarsExpr$str_explode <- function() .Call(wrap__RPolarsExpr__str_explode, self) - RPolarsExpr$str_to_integer <- function(base, strict) .Call(wrap__RPolarsExpr__str_to_integer, self, base, strict) RPolarsExpr$str_reverse <- function() .Call(wrap__RPolarsExpr__str_reverse, self) @@ -1154,7 +1152,7 @@ RPolarsExpr$corr <- function(a, b, method, ddof, propagate_nans) .Call(wrap__RPo RPolarsExpr$rolling_corr <- function(a, b, window_size, min_periods, ddof) .Call(wrap__RPolarsExpr__rolling_corr, a, b, window_size, min_periods, ddof) -RPolarsExpr$rolling <- function(index_column, period, offset, closed, check_sorted) .Call(wrap__RPolarsExpr__rolling, self, index_column, period, offset, closed, check_sorted) +RPolarsExpr$rolling <- function(index_column, period, offset, closed) .Call(wrap__RPolarsExpr__rolling, self, index_column, period, offset, closed) #' @export `$.RPolarsExpr` <- function (self, name) { func <- RPolarsExpr[[name]]; environment(func) <- environment(); func } @@ -1270,9 +1268,9 @@ RPolarsLazyFrame$clone_in_rust <- function() .Call(wrap__RPolarsLazyFrame__clone RPolarsLazyFrame$with_context <- function(contexts) .Call(wrap__RPolarsLazyFrame__with_context, self, contexts) -RPolarsLazyFrame$rolling <- function(index_column, period, offset, closed, group_by, check_sorted) .Call(wrap__RPolarsLazyFrame__rolling, self, index_column, period, offset, closed, group_by, check_sorted) +RPolarsLazyFrame$rolling <- function(index_column, period, offset, closed, group_by) .Call(wrap__RPolarsLazyFrame__rolling, self, index_column, period, offset, closed, group_by) -RPolarsLazyFrame$group_by_dynamic <- function(index_column, every, period, offset, label, include_boundaries, closed, by, start_by, check_sorted) .Call(wrap__RPolarsLazyFrame__group_by_dynamic, self, index_column, every, period, offset, label, include_boundaries, closed, by, start_by, check_sorted) +RPolarsLazyFrame$group_by_dynamic <- function(index_column, every, period, offset, label, include_boundaries, closed, by, start_by) .Call(wrap__RPolarsLazyFrame__group_by_dynamic, self, index_column, every, period, offset, label, include_boundaries, closed, by, start_by) RPolarsLazyFrame$to_dot <- function(optimized) .Call(wrap__RPolarsLazyFrame__to_dot, self, optimized) diff --git a/R/group_by_dynamic.R b/R/group_by_dynamic.R index f107d74c6..54cd85cef 100644 --- a/R/group_by_dynamic.R +++ b/R/group_by_dynamic.R @@ -40,7 +40,7 @@ RPolarsDynamicGroupBy = new.env(parent = emptyenv()) #' @noRd construct_group_by_dynamic = function( df, index_column, every, period, offset, include_boundaries, closed, label, - group_by, start_by, check_sorted) { + group_by, start_by) { if (!inherits(df, "RPolarsDataFrame")) { stop("internal error: construct_group called not on DataFrame") } @@ -58,8 +58,7 @@ construct_group_by_dynamic = function( closed = closed, label = label, group_by = group_by, - start_by = start_by, - check_sorted = check_sorted + start_by = start_by ) class(out) = "RPolarsDynamicGroupBy" out @@ -96,8 +95,7 @@ DynamicGroupBy_agg = function(...) { closed = prv$closed, label = prv$label, group_by = prv$group_by, - start_by = prv$start_by, - check_sorted = prv$check_sorted + start_by = prv$start_by )$ agg(unpack_list(..., .context = "in $agg():"))$ collect(no_optimization = TRUE) diff --git a/R/group_by_rolling.R b/R/group_by_rolling.R index 636030be5..802237e76 100644 --- a/R/group_by_rolling.R +++ b/R/group_by_rolling.R @@ -36,7 +36,7 @@ RPolarsRollingGroupBy = new.env(parent = emptyenv()) #' The internal RollingGroupBy constructor #' @return The input as grouped DataFrame #' @noRd -construct_rolling_group_by = function(df, index_column, period, offset, closed, group_by, check_sorted) { +construct_rolling_group_by = function(df, index_column, period, offset, closed, group_by) { if (!inherits(df, "RPolarsDataFrame")) { stop("internal error: construct_group called not on DataFrame") } @@ -50,8 +50,7 @@ construct_rolling_group_by = function(df, index_column, period, offset, closed, period = period, offset = offset, closed = closed, - group_by = group_by, - check_sorted = check_sorted + group_by = group_by ) class(out) = "RPolarsRollingGroupBy" out @@ -96,8 +95,7 @@ RollingGroupBy_agg = function(...) { period = prv$period, offset = prv$offset, closed = prv$closed, - group_by = prv$group_by, - check_sorted = prv$check_sorted + group_by = prv$group_by )$ agg(unpack_list(..., .context = "in $agg():"))$ collect(no_optimization = TRUE) diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 3772c6afa..80cf8fed8 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -1924,13 +1924,12 @@ LazyFrame_rolling = function( period, offset = NULL, closed = "right", - group_by = NULL, - check_sorted = TRUE) { + group_by = NULL) { period = parse_as_polars_duration_string(period) offset = parse_as_polars_duration_string(offset) %||% negate_duration_string(period) .pr$LazyFrame$rolling( self, index_column, period, offset, closed, - wrap_elist_result(group_by, str_to_lit = FALSE), check_sorted + wrap_elist_result(group_by, str_to_lit = FALSE) ) |> unwrap("in $rolling():") } @@ -2034,15 +2033,14 @@ LazyFrame_group_by_dynamic = function( closed = "left", label = "left", group_by = NULL, - start_by = "window", - check_sorted = TRUE) { + start_by = "window") { every = parse_as_polars_duration_string(every) offset = parse_as_polars_duration_string(offset) %||% negate_duration_string(every) period = parse_as_polars_duration_string(period) %||% every .pr$LazyFrame$group_by_dynamic( self, index_column, every, period, offset, label, include_boundaries, closed, - wrap_elist_result(group_by, str_to_lit = FALSE), start_by, check_sorted + wrap_elist_result(group_by, str_to_lit = FALSE), start_by ) |> unwrap("in $group_by_dynamic():") } diff --git a/man/DataFrame_group_by_dynamic.Rd b/man/DataFrame_group_by_dynamic.Rd index b8504eeed..79985a206 100644 --- a/man/DataFrame_group_by_dynamic.Rd +++ b/man/DataFrame_group_by_dynamic.Rd @@ -14,8 +14,7 @@ DataFrame_group_by_dynamic( closed = "left", label = "left", group_by = NULL, - start_by = "window", - check_sorted = TRUE + start_by = "window" ) } \arguments{ @@ -64,11 +63,6 @@ and then adding \code{offset}. Note that weekly windows start on Monday. \item a day of the week (only takes effect if \code{every} contains \code{"w"}): \code{"monday"} starts the window on the Monday before the first data point, etc. }} - -\item{check_sorted}{Check whether data is actually sorted. Checking it is -expensive so if you are sure the data within the \code{index_column} is sorted, you -can set this to \code{FALSE} but note that if the data actually is unsorted, it -will lead to incorrect output.} } \value{ A \link[=GroupBy_class]{GroupBy} object diff --git a/man/DataFrame_rolling.Rd b/man/DataFrame_rolling.Rd index 511eabc76..4d78a361d 100644 --- a/man/DataFrame_rolling.Rd +++ b/man/DataFrame_rolling.Rd @@ -10,8 +10,7 @@ DataFrame_rolling( period, offset = NULL, closed = "right", - group_by = NULL, - check_sorted = TRUE + group_by = NULL ) } \arguments{ @@ -36,11 +35,6 @@ See the \verb{Polars duration string language} section for details.} (inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} \item{group_by}{Also group by this column/these columns.} - -\item{check_sorted}{Check whether data is actually sorted. Checking it is -expensive so if you are sure the data within the \code{index_column} is sorted, you -can set this to \code{FALSE} but note that if the data actually is unsorted, it -will lead to incorrect output.} } \value{ A \link[=RollingGroupBy_class]{RollingGroupBy} object diff --git a/man/ExprStr_explode.Rd b/man/ExprStr_explode.Rd deleted file mode 100644 index 18880c287..000000000 --- a/man/ExprStr_explode.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/expr__string.R -\name{ExprStr_explode} -\alias{ExprStr_explode} -\title{Returns a column with a separate row for every string character} -\usage{ -ExprStr_explode() -} -\value{ -Expr: Series of dtype String. -} -\description{ -Returns a column with a separate row for every string character -} -\examples{ -df = pl$DataFrame(a = c("foo", "bar")) -df$select(pl$col("a")$str$explode()) -} -\keyword{ExprStr} diff --git a/man/Expr_rolling.Rd b/man/Expr_rolling.Rd index f28556532..263166009 100644 --- a/man/Expr_rolling.Rd +++ b/man/Expr_rolling.Rd @@ -4,14 +4,7 @@ \alias{Expr_rolling} \title{Create rolling groups based on a time or numeric column} \usage{ -Expr_rolling( - index_column, - ..., - period, - offset = NULL, - closed = "right", - check_sorted = TRUE -) +Expr_rolling(index_column, ..., period, offset = NULL, closed = "right") } \arguments{ \item{index_column}{Column used to group based on the time window. Often of @@ -32,11 +25,6 @@ See the \verb{Polars duration string language} section for details.} \item{closed}{Define which sides of the temporal interval are closed (inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} - -\item{check_sorted}{Check whether data is actually sorted. Checking it is -expensive so if you are sure the data within the \code{index_column} is sorted, you -can set this to \code{FALSE} but note that if the data actually is unsorted, it -will lead to incorrect output.} } \value{ Expr diff --git a/man/LazyFrame_group_by_dynamic.Rd b/man/LazyFrame_group_by_dynamic.Rd index e2300d346..f0177d440 100644 --- a/man/LazyFrame_group_by_dynamic.Rd +++ b/man/LazyFrame_group_by_dynamic.Rd @@ -14,8 +14,7 @@ LazyFrame_group_by_dynamic( closed = "left", label = "left", group_by = NULL, - start_by = "window", - check_sorted = TRUE + start_by = "window" ) } \arguments{ @@ -64,11 +63,6 @@ and then adding \code{offset}. Note that weekly windows start on Monday. \item a day of the week (only takes effect if \code{every} contains \code{"w"}): \code{"monday"} starts the window on the Monday before the first data point, etc. }} - -\item{check_sorted}{Check whether data is actually sorted. Checking it is -expensive so if you are sure the data within the \code{index_column} is sorted, you -can set this to \code{FALSE} but note that if the data actually is unsorted, it -will lead to incorrect output.} } \value{ A \link[=LazyGroupBy_class]{LazyGroupBy} object diff --git a/man/LazyFrame_rolling.Rd b/man/LazyFrame_rolling.Rd index 193fa30c4..099eb5500 100644 --- a/man/LazyFrame_rolling.Rd +++ b/man/LazyFrame_rolling.Rd @@ -10,8 +10,7 @@ LazyFrame_rolling( period, offset = NULL, closed = "right", - group_by = NULL, - check_sorted = TRUE + group_by = NULL ) } \arguments{ @@ -36,11 +35,6 @@ See the \verb{Polars duration string language} section for details.} (inclusive). This can be either \code{"left"}, \code{"right"}, \code{"both"} or \code{"none"}.} \item{group_by}{Also group by this column/these columns.} - -\item{check_sorted}{Check whether data is actually sorted. Checking it is -expensive so if you are sure the data within the \code{index_column} is sorted, you -can set this to \code{FALSE} but note that if the data actually is unsorted, it -will lead to incorrect output.} } \value{ A \link[=LazyGroupBy_class]{LazyGroupBy} object diff --git a/src/rust/Cargo.lock b/src/rust/Cargo.lock index 255168294..09762d5f8 100644 --- a/src/rust/Cargo.lock +++ b/src/rust/Cargo.lock @@ -1613,7 +1613,7 @@ dependencies = [ [[package]] name = "polars" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "getrandom", "polars-arrow", @@ -1633,7 +1633,7 @@ dependencies = [ [[package]] name = "polars-arrow" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "atoi", @@ -1680,7 +1680,7 @@ dependencies = [ [[package]] name = "polars-compute" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "bytemuck", "either", @@ -1695,7 +1695,7 @@ dependencies = [ [[package]] name = "polars-core" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "bitflags 2.4.2", @@ -1729,7 +1729,7 @@ dependencies = [ [[package]] name = "polars-error" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "avro-schema", "object_store", @@ -1742,7 +1742,7 @@ dependencies = [ [[package]] name = "polars-expr" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "bitflags 2.4.2", @@ -1761,7 +1761,7 @@ dependencies = [ [[package]] name = "polars-io" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "async-trait", @@ -1805,7 +1805,7 @@ dependencies = [ [[package]] name = "polars-json" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "chrono", @@ -1825,7 +1825,7 @@ dependencies = [ [[package]] name = "polars-lazy" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "bitflags 2.4.2", @@ -1851,7 +1851,7 @@ dependencies = [ [[package]] name = "polars-ops" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "aho-corasick", @@ -1887,7 +1887,7 @@ dependencies = [ [[package]] name = "polars-parquet" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "async-stream", @@ -1912,7 +1912,7 @@ dependencies = [ [[package]] name = "polars-pipe" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "crossbeam-channel", "crossbeam-queue", @@ -1939,7 +1939,7 @@ dependencies = [ [[package]] name = "polars-plan" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "bytemuck", @@ -1970,7 +1970,7 @@ dependencies = [ [[package]] name = "polars-row" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "bytemuck", "polars-arrow", @@ -1981,7 +1981,7 @@ dependencies = [ [[package]] name = "polars-sql" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "hex", "once_cell", @@ -1989,6 +1989,7 @@ dependencies = [ "polars-core", "polars-error", "polars-lazy", + "polars-ops", "polars-plan", "rand", "serde", @@ -1999,7 +2000,7 @@ dependencies = [ [[package]] name = "polars-time" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "atoi", "bytemuck", @@ -2020,7 +2021,7 @@ dependencies = [ [[package]] name = "polars-utils" version = "0.40.0" -source = "git+https://github.com/pola-rs/polars.git?rev=3e8e6a50552f9a8e9570724da2d5ce072a7381cb#3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +source = "git+https://github.com/pola-rs/polars.git?rev=d190e02693ead521cf38a603b31722db258ef491#d190e02693ead521cf38a603b31722db258ef491" dependencies = [ "ahash", "bytemuck", diff --git a/src/rust/Cargo.toml b/src/rust/Cargo.toml index 7c8542a31..31504513e 100644 --- a/src/rust/Cargo.toml +++ b/src/rust/Cargo.toml @@ -50,8 +50,8 @@ serde_json = "*" smartstring = "1.0.1" state = "0.6.0" thiserror = "1.0.61" -polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "3e8e6a50552f9a8e9570724da2d5ce072a7381cb", default-features = false } -polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "3e8e6a50552f9a8e9570724da2d5ce072a7381cb", default-features = false } +polars-core = { git = "https://github.com/pola-rs/polars.git", rev = "d190e02693ead521cf38a603b31722db258ef491", default-features = false } +polars-lazy = { git = "https://github.com/pola-rs/polars.git", rev = "d190e02693ead521cf38a603b31722db258ef491", default-features = false } either = "1" [dependencies.polars] @@ -157,4 +157,4 @@ features = [ "zip_with", ] git = "https://github.com/pola-rs/polars.git" -rev = "3e8e6a50552f9a8e9570724da2d5ce072a7381cb" +rev = "d190e02693ead521cf38a603b31722db258ef491" diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 8d56d865d..0ab5f527b 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -528,7 +528,7 @@ impl RPolarsLazyFrame { .into()) } - fn schema(&self) -> RResult { + fn schema(&mut self) -> RResult { let schema = self .0 .schema() @@ -636,14 +636,12 @@ impl RPolarsLazyFrame { offset: Robj, closed: Robj, group_by: Robj, - check_sorted: Robj, ) -> RResult { let index_column = robj_to!(PLExprCol, index_column)?; let period = Duration::parse(robj_to!(str, period)?); let offset = Duration::parse(robj_to!(str, offset)?); let closed_window = robj_to!(ClosedWindow, closed)?; let group_by = robj_to!(VecPLExprCol, group_by)?; - let check_sorted = robj_to!(bool, check_sorted)?; let lazy_gb = self.0.clone().rolling( index_column, @@ -653,7 +651,6 @@ impl RPolarsLazyFrame { period, offset, closed_window, - check_sorted, }, ); @@ -675,7 +672,6 @@ impl RPolarsLazyFrame { closed: Robj, by: Robj, start_by: Robj, - check_sorted: Robj, ) -> RResult { let closed_window = robj_to!(ClosedWindow, closed)?; let by = robj_to!(VecPLExprCol, by)?; @@ -691,7 +687,6 @@ impl RPolarsLazyFrame { include_boundaries: robj_to!(bool, include_boundaries)?, closed_window, start_by: robj_to!(StartBy, start_by)?, - check_sorted: robj_to!(bool, check_sorted)?, ..Default::default() }, ); diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs index 97025657f..68506b30c 100644 --- a/src/rust/src/lazy/dsl.rs +++ b/src/rust/src/lazy/dsl.rs @@ -2389,10 +2389,6 @@ impl RPolarsExpr { Ok(self.clone().0.str().slice(offset, length).into()) } - pub fn str_explode(&self) -> RResult { - Ok(self.0.clone().str().explode().into()) - } - pub fn str_to_integer(&self, base: Robj, strict: Robj) -> RResult { let base = robj_to!(PLExprCol, base)?; let strict = robj_to!(bool, strict)?; @@ -2669,20 +2665,17 @@ impl RPolarsExpr { period: Robj, offset: Robj, closed: Robj, - check_sorted: Robj, ) -> RResult { let index_column = robj_to!(String, index_column)?.into(); let period = Duration::parse(robj_to!(str, period)?); let offset = Duration::parse(robj_to!(str, offset)?); let closed_window = robj_to!(ClosedWindow, closed)?; - let check_sorted = robj_to!(bool, check_sorted)?; let options = RollingGroupOptions { index_column, period, offset, closed_window, - check_sorted, }; Ok(self.0.clone().rolling(options).into()) diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index b75623a78..85e296b96 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -20,8 +20,8 @@ use extendr_api::{extendr, prelude::*, rprintln}; use pl::SeriesMethods; use polars::datatypes::*; use polars::prelude as pl; -use polars::prelude::ArgAgg; -use polars::prelude::IntoSeries; +use polars::prelude::{ArgAgg, IntoSeries}; +use polars_core::series::IsSorted; pub const R_INT_NA_ENC: i32 = -2147483648; use crate::rpolarserr::polars_to_rpolars_err; use std::result::Result; @@ -181,22 +181,22 @@ impl RPolarsSeries { self.0.arg_max() } + // TODO: rename to `can_fast_explode_flag` pub fn fast_explode_flag(&self) -> bool { - self.0 - .get_flags() - .contains(polars::chunked_array::Settings::FAST_EXPLODE_LIST) + match self.0.list() { + Err(_) => false, + Ok(list) => list._can_fast_explode(), + } } + // TODO: rename to `is_sorted_ascending_flag` pub fn is_sorted_flag(&self) -> bool { - self.0 - .get_flags() - .contains(polars::chunked_array::Settings::SORTED_ASC) + matches!(self.0.is_sorted_flag(), IsSorted::Ascending) } + // TODO: rename to `is_sorted_descending_flag` pub fn is_sorted_reverse_flag(&self) -> bool { - self.0 - .get_flags() - .contains(polars::chunked_array::Settings::SORTED_DSC) + matches!(self.0.is_sorted_flag(), IsSorted::Descending) } pub fn is_sorted(&self, descending: Robj) -> RResult { diff --git a/tests/testthat/_snaps/after-wrappers.md b/tests/testthat/_snaps/after-wrappers.md index 1c61db7da..e4e599b88 100644 --- a/tests/testthat/_snaps/after-wrappers.md +++ b/tests/testthat/_snaps/after-wrappers.md @@ -419,32 +419,32 @@ [275] "str_base64_encode" "str_concat" [277] "str_contains" "str_contains_any" [279] "str_count_matches" "str_ends_with" - [281] "str_explode" "str_extract" - [283] "str_extract_all" "str_extract_groups" - [285] "str_find" "str_head" - [287] "str_hex_decode" "str_hex_encode" - [289] "str_json_decode" "str_json_path_match" - [291] "str_len_bytes" "str_len_chars" - [293] "str_pad_end" "str_pad_start" - [295] "str_replace" "str_replace_all" - [297] "str_replace_many" "str_reverse" - [299] "str_slice" "str_split" - [301] "str_split_exact" "str_splitn" - [303] "str_starts_with" "str_strip_chars" - [305] "str_strip_chars_end" "str_strip_chars_start" - [307] "str_tail" "str_to_date" - [309] "str_to_datetime" "str_to_integer" - [311] "str_to_lowercase" "str_to_time" - [313] "str_to_titlecase" "str_to_uppercase" - [315] "str_zfill" "struct_field_by_name" - [317] "struct_rename_fields" "struct_with_fields" - [319] "sub" "sum" - [321] "tail" "tan" - [323] "tanh" "to_physical" - [325] "top_k" "unique" - [327] "unique_counts" "unique_stable" - [329] "upper_bound" "value_counts" - [331] "var" "xor" + [281] "str_extract" "str_extract_all" + [283] "str_extract_groups" "str_find" + [285] "str_head" "str_hex_decode" + [287] "str_hex_encode" "str_json_decode" + [289] "str_json_path_match" "str_len_bytes" + [291] "str_len_chars" "str_pad_end" + [293] "str_pad_start" "str_replace" + [295] "str_replace_all" "str_replace_many" + [297] "str_reverse" "str_slice" + [299] "str_split" "str_split_exact" + [301] "str_splitn" "str_starts_with" + [303] "str_strip_chars" "str_strip_chars_end" + [305] "str_strip_chars_start" "str_tail" + [307] "str_to_date" "str_to_datetime" + [309] "str_to_integer" "str_to_lowercase" + [311] "str_to_time" "str_to_titlecase" + [313] "str_to_uppercase" "str_zfill" + [315] "struct_field_by_name" "struct_rename_fields" + [317] "struct_with_fields" "sub" + [319] "sum" "tail" + [321] "tan" "tanh" + [323] "to_physical" "top_k" + [325] "unique" "unique_counts" + [327] "unique_stable" "upper_bound" + [329] "value_counts" "var" + [331] "xor" # public and private methods of each class When diff --git a/tests/testthat/test-expr_expr.R b/tests/testthat/test-expr_expr.R index edcd5d5b0..a2d54e962 100644 --- a/tests/testthat/test-expr_expr.R +++ b/tests/testthat/test-expr_expr.R @@ -2659,35 +2659,6 @@ test_that("rolling: passing a difftime as period works", { ) }) -test_that("rolling, arg check_sorted", { - dates = c( - "2020-01-02 18:12:48", "2020-01-03 19:45:32", "2020-01-08 23:16:43", - "2020-01-01 13:45:48", "2020-01-01 16:42:13", "2020-01-01 16:45:09" - ) - - df = pl$DataFrame(dt = dates, a = c(3, 7, 5, 9, 2, 1))$ - with_columns( - pl$col("dt")$str$strptime(pl$Datetime("us"), format = "%Y-%m-%d %H:%M:%S") - ) - - expect_grepl_error( - df$with_columns( - sum_a_offset1 = pl$sum("a")$rolling(index_column = "dt", period = "2d") - ), - "is not explicitly sorted" - ) - - # no error message but wrong output - expect_no_error( - df$with_columns(pl$col("dt")$set_sorted())$with_columns( - sum_a_offset1 = pl$sum("a")$rolling( - index_column = "dt", period = "2d", - check_sorted = FALSE - ) - ) - ) -}) - test_that("eq_missing and ne_missing", { x = c(rep(TRUE, 3), rep(FALSE, 3), rep(NA, 3)) y = c(rep(c(TRUE, FALSE, NA), 3)) diff --git a/tests/testthat/test-expr_string.R b/tests/testthat/test-expr_string.R index fe9043908..c4dec7167 100644 --- a/tests/testthat/test-expr_string.R +++ b/tests/testthat/test-expr_string.R @@ -676,15 +676,6 @@ test_that("str$slice", { }) -test_that("str$str_explode", { - s = c("64", "255", "9", "11", "16", "2.5", NA, "not number") - expect_identical( - pl$lit(s)$str$explode()$to_r(), - unlist(strsplit(s, split = "")) - ) -}) - - test_that("str$to_integer", { expect_identical( pl$lit(c("110", "101", "010"))$str$to_integer(base = 2)$to_r(), diff --git a/tests/testthat/test-groupby.R b/tests/testthat/test-groupby.R index cbf9ee1d3..15466dc8e 100644 --- a/tests/testthat/test-groupby.R +++ b/tests/testthat/test-groupby.R @@ -263,17 +263,6 @@ test_that("group_by_dynamic for LazyFrame: integer variable", { ) }) -test_that("group_by_dynamic for LazyFrame: error if not explicitly sorted", { - df = pl$LazyFrame( - index = c(1L, 2L, 3L, 4L, 8L, 9L), - a = c(3, 7, 5, 9, 2, 1) - ) - expect_grepl_error( - df$group_by_dynamic(index_column = "index", every = "2i")$agg(pl$col("a"))$collect(), - "not explicitly sorted" - ) -}) - test_that("group_by_dynamic for LazyFrame: error if every is negative", { df = pl$LazyFrame( idx = 0:5, @@ -414,25 +403,6 @@ test_that("group_by_dynamic for LazyFrame: argument 'by' works", { ) }) -test_that("group_by_dynamic for LazyFrame: argument 'check_sorted' works", { - df = pl$LazyFrame( - index = c(2L, 1L, 3L, 4L, 9L, 8L), # unsorted index - grp = c("a", "a", rep("b", 4)), - a = c(3, 7, 5, 9, 2, 1) - ) - expect_grepl_error( - df$group_by_dynamic(index_column = "index", every = "2i", group_by = "grp")$agg( - pl$sum("a")$alias("sum_a") - )$collect(), - "not sorted" - ) - expect_no_error( - df$group_by_dynamic(index_column = "index", every = "2i", group_by = "grp", check_sorted = FALSE)$agg( - pl$sum("a")$alias("sum_a") - )$collect() - ) -}) - test_that("group_by_dynamic for LazyFrame: error if index not int or date/time", { df = pl$LazyFrame( index = c(1:5, 6.0), diff --git a/tests/testthat/test-lazy.R b/tests/testthat/test-lazy.R index 65c16c4eb..bdcc29a31 100644 --- a/tests/testthat/test-lazy.R +++ b/tests/testthat/test-lazy.R @@ -962,17 +962,6 @@ test_that("rolling for LazyFrame: using difftime as period", { ) }) -test_that("rolling for LazyFrame: error if not explicitly sorted", { - df = pl$LazyFrame( - index = c(1L, 2L, 3L, 4L, 8L, 9L), - a = c(3, 7, 5, 9, 2, 1) - ) - expect_grepl_error( - df$rolling(index_column = "index", period = "2i")$agg(pl$col("a"))$collect(), - "not explicitly sorted" - ) -}) - test_that("rolling for LazyFrame: error if period is negative", { df = pl$LazyFrame( index = c(1L, 2L, 3L, 4L, 8L, 9L), @@ -1016,25 +1005,6 @@ test_that("rolling for LazyFrame: argument 'group_by' works", { ) }) -test_that("rolling for LazyFrame: argument 'check_sorted' works", { - df = pl$LazyFrame( - index = c(2L, 1L, 3L, 4L, 9L, 8L), # unsorted index - grp = c("a", "a", rep("b", 4)), - a = c(3, 7, 5, 9, 2, 1) - ) - expect_grepl_error( - df$rolling(index_column = "index", period = "2i", group_by = "grp")$agg( - pl$sum("a")$alias("sum_a") - )$collect(), - "not sorted" - ) - expect_no_error( - df$rolling(index_column = "index", period = "2i", group_by = "grp", check_sorted = FALSE)$agg( - pl$sum("a")$alias("sum_a") - )$collect() - ) -}) - test_that("rolling for LazyFrame: error if index not int or date/time", { df = pl$LazyFrame( index = c(1:5, 6.0), diff --git a/tests/testthat/test-series.R b/tests/testthat/test-series.R index c6283a39c..08db1004d 100644 --- a/tests/testthat/test-series.R +++ b/tests/testthat/test-series.R @@ -505,15 +505,14 @@ test_that("Series list", { # Note: flattening an empty list returns null in polars # https://github.com/pola-rs/polars/issues/6723 # https://github.com/pola-rs/polars/issues/14381 - # TODO: panicked with Rust Polars 0.40.0 - # ul = pl$DataFrame(s)$select(pl$col("")$flatten()$flatten()$flatten())$to_list() |> - # unlist() - - # expect_identical( - # lapply(ul, \(x) if (length(x) == 0) NA_character_ else x) |> - # unlist(), - # ul - # ) + ul = pl$DataFrame(s)$select(pl$col("")$flatten()$flatten()$flatten())$to_list() |> + unlist() + + expect_identical( + lapply(ul, \(x) if (length(x) == 0) NA_character_ else x) |> + unlist(), + ul + ) }) diff --git a/tests/testthat/test-sql.R b/tests/testthat/test-sql.R index 491604f5a..993ee320c 100644 --- a/tests/testthat/test-sql.R +++ b/tests/testthat/test-sql.R @@ -97,7 +97,7 @@ test_that("sql method for DataFrame and LazyFrame", { # Test the envir argument works correctly func1 = function(data) { df1 = pl$DataFrame(foo = "bar") - data$sql("select * from self join df1 using (x)", envir = parent.frame()) + data$sql("select x from self join df1 using (x)", envir = parent.frame()) } expect_true(df1$equals( diff --git a/vignettes/userguide.Rmd b/vignettes/userguide.Rmd index f1d4b0937..0e7857b82 100755 --- a/vignettes/userguide.Rmd +++ b/vignettes/userguide.Rmd @@ -571,7 +571,7 @@ df$select( # Single valued `Series` are broadcasted to the shape of the `DataFrame` df$select( pl$all(), - pl$all()$sum()$name$suffix("_sum") # This is a single valued Series broadcasted to the shape of the DataFrame + pl$col(pl$Float64)$sum()$name$suffix("_sum") # This is a single valued Series broadcasted to the shape of the DataFrame ) # Filters can also be applied within an expression