Bump rust-polars to 0.41 (#1147)

pola-rs · Jul 1, 2024 · b9437f3 · b9437f3
1 parent 2096838
commit b9437f3
Show file tree

Hide file tree

Showing 75 changed files with 1,526 additions and 932 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -2,21 +2,62 @@
 
 ## Polars R Package (development version)
 
+Updated rust-polars to 0.41.2 (#1147).
+
 ### Breaking changes
 
 - In `$n_chunks()`, the default value of `strategy` now is `"first"` (#1137).
--`$sample()` for Expr and DataFrame (#1136):
+- `$sample()` for Expr and DataFrame (#1136):
   - the argument `frac` is renamed `fraction`;
   - all the arguments except `n` must be named;
   - for the Expr method only, the first argument is now `n` (it was already the
     case for the DataFrame method);
   - for the Expr method only, the default value for `with_replacement` is now
     `FALSE` (it was already the case for the DataFrame method).
+- `$melt()` had several changes (#1147):
+  - `melt()` is renamed `$unpivot()`.
+  - Some arguments were renamed: `id_vars` is now `index`, `value_vars` is now
+    `on`.
+  - The order of arguments has changed: `on` is now first, then `index`. The
+    order of the other arguments hasn't changed. Note that `on` can be unnamed
+    but all the other arguments must be named.
+- `pivot()` had several changes (#1147):
+  - The argument `columns` is renamed `on`.
+  - The order of arguments has changed: `on` is now first, then `index` and
+    `values`. The order of the other arguments hasn't changed. Note that `on`
+    can be unnamed but all the other arguments must be named.
+- In `$write_parquet()` and `$sink_parquet()`, the default value of argument
+  `statistics` is now `TRUE` and can take other values than `TRUE/FALSE` (#1147).
+- In `$dt$truncate()` and `$dt$round()`, the argument `offset` has been removed.
+  Use `$dt$offset_by()` after those functions instead (#1147).
+- In `$top_k()` and `$bottom_k()` for `Expr`, the arguments `nulls_last`,
+  `maintain_order` and `multithreaded` have been removed. If any `null` values
+  are in the top/bottom `k` values, they will always be positioned last (#1147).
+- `$replace()` has been split in two functions depending on the desired
+  behaviour (#1147):
+  - `$replace()` recodes some values in the column, leaving all other values
+    unchanged. Compared to the previous version, it doesn't use the arguments
+    `default` and `return_dtype` anymore.
+  - `$replace_strict()` replaces all values by different values. If a value
+    doesn't have a specific mapping, it is replaced by the `default` value.
+- `$str$concat()` is deprecated, use `$str$join()` (with the same arguments)
+  instead (#1147).
+- In `pl$date_range()` and `pl$date_ranges()`, the arguments `time_unit` and
+  `time_zone` have been removed. They were deprecated in previous versions
+  (#1147).
+- In `$join()`, when `how = "cross"`, `on`, `left_on` and `right_on` must be
+  `NULL` (#1147).
+
 
 ### New features
 
 - New method `$has_nulls()` (#1133).
 - New method `$list$explode()` (#1139).
+- `$over()` gains a new argument `order_by` to specify the order of values
+  within each group. This is useful when the operation depends on the order of
+  values, such as `$shift()` (#1147).
+- `$value_counts()` gains an argument `normalize` to give relative frequencies
+  of unique values instead of their count (#1147).
 
 ## Polars R Package 0.17.0
 

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
@@ -1070,7 +1070,7 @@ DataFrame_to_list = function(unnest_structs = TRUE, ..., int64_conversion = pola
 DataFrame_join = function(
     other,
     on = NULL,
-    how = c("inner", "left", "full", "semi", "anti", "cross"),
+    how = "inner",
     ...,
     left_on = NULL,
     right_on = NULL,
@@ -1490,7 +1490,7 @@ DataFrame_join_asof = function(
 
 
 
-#' @inherit LazyFrame_melt
+#' @inherit LazyFrame_unpivot
 #' @keywords DataFrame
 #'
 #' @return A new `DataFrame`
@@ -1502,25 +1502,26 @@ DataFrame_join_asof = function(
 #'   c = c(2, 4, 6),
 #'   d = c(7, 8, 9)
 #' )
-#' df$melt(id_vars = "a", value_vars = c("b", "c", "d"))
-DataFrame_melt = function(
-    id_vars = NULL,
-    value_vars = NULL,
+#' df$unpivot(index = "a", on = c("b", "c", "d"))
+DataFrame_unpivot = function(
+    on = NULL,
+    ...,
+    index = NULL,
     variable_name = NULL,
     value_name = NULL) {
-  .pr$DataFrame$melt(
-    self, id_vars %||% character(), value_vars %||% character(),
+  .pr$DataFrame$unpivot(
+    self, on %||% character(), index %||% character(),
     value_name, variable_name
-  ) |> unwrap("in $melt( ): ")
+  ) |> unwrap("in $unpivot( ): ")
 }
 
 
 
 #' Pivot data from long to wide
 #' @param values Column values to aggregate. Can be multiple columns if the
-#' `columns` arguments contains multiple columns as well.
+#' `on` arguments contains multiple columns as well.
 #' @param index  One or multiple keys to group by.
-#' @param columns  Name of the column(s) whose values will be used as the header
+#' @param on  Name of the column(s) whose values will be used as the header
 #' of the output DataFrame.
 #' @param ... Not used.
 #' @param aggregate_function One of:
@@ -1544,7 +1545,7 @@ DataFrame_melt = function(
 #' df
 #'
 #' df$pivot(
-#'   values = "baz", index = "foo", columns = "bar"
+#'   values = "baz", index = "foo", on = "bar"
 #' )
 #'
 #' # Run an expression as aggregation function
@@ -1557,15 +1558,15 @@ DataFrame_melt = function(
 #'
 #' df$pivot(
 #'   index = "col1",
-#'   columns = "col2",
+#'   on = "col2",
 #'   values = "col3",
 #'   aggregate_function = pl$element()$tanh()$mean()
 #' )
 DataFrame_pivot = function(
-    values,
-    index,
-    columns,
+    on,
     ...,
+    index,
+    values,
     aggregate_function = NULL,
     maintain_order = TRUE,
     sort_columns = FALSE,
@@ -1586,7 +1587,7 @@ DataFrame_pivot = function(
     )) |>
     # run pivot when valid aggregate_expr
     and_then(\(aggregate_expr) .pr$DataFrame$pivot_expr(
-      self, index, columns, values, maintain_order, sort_columns, aggregate_expr, separator
+      self, on, index, values, maintain_order, sort_columns, aggregate_expr, separator
     )) |>
     # unwrap and add method context name
     unwrap("in $pivot():")
@@ -1736,7 +1737,7 @@ DataFrame_describe = function(percentiles = c(.25, .75), interpolation = "neares
     )$
       unnest("fields")$
       drop("column")$
-      pivot(index = "statistic", columns = "variable", values = "column_0")$
+      pivot(index = "statistic", on = "variable", values = "column_0")$
       with_columns(statistic = pl$lit(metrics))
   }) |>
     uw()
@@ -2031,9 +2032,11 @@ DataFrame_write_parquet = function(
     ...,
     compression = "zstd",
     compression_level = 3,
-    statistics = FALSE,
+    statistics = TRUE,
     row_group_size = NULL,
     data_pagesize_limit = NULL) {
+  statistics = translate_statistics(statistics) |>
+    unwrap("in $write_parquet():")
   .pr$DataFrame$write_parquet(
     self,
     file,