pola-rs · eitsupi · Jul 3, 2023 · Jun 27, 2023 · Jun 28, 2023 · Jun 28, 2023
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -53,8 +53,10 @@ Collate:
     'dataframe__frame.R'
     'datatype.R'
     'docs.R'
+    'error__rpolarserr.R'
+    'error__string.R'
+    'error__trait.R'
     'error_conversion.R'
-    'error_trait.R'
     'expr__binary.R'
     'expr__categorical.R'
     'expr__datetime.R'
@@ -78,11 +80,9 @@ Collate:
     'pkg-knitr.R'
     'pkg-nanoarrow.R'
     'rlang.R'
-    'rpolarserr.R'
     'rust_result.R'
     's3_methods.R'
     'series__series.R'
-    'string_error.R'
     'translation.R'
     'vctrs.R'
     'zzz.R'

diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R
@@ -708,19 +708,20 @@ DataFrame_sort = function(
 #'   (pl$col("Sepal.Length") + 2)$alias("add_2_SL")
 #' )
 DataFrame_select = function(...) {
-  args = list2(...)
-  exprs = do.call(construct_ProtoExprArray, args)
-  df = unwrap(.pr$DataFrame$select(self, exprs))
-
-  expr_names = names(args)
-  if (!is.null(expr_names)) {
-    old_names = df$columns
-    new_names = old_names
-    has_expr_name = nchar(expr_names) >= 1L
-    new_names[has_expr_name] = expr_names[has_expr_name]
-    df$columns = new_names
-  }
-  df
+  args = unpack_list(...)
+  .pr$DataFrame$select(self, args) |>
+    and_then(\(df) result(msg = "internal error while renaming columns", {
+      expr_names = names(args)
+      if (!is.null(expr_names)) {
+        old_names = df$columns
+        new_names = old_names
+        has_expr_name = nchar(expr_names) >= 1L
+        new_names[has_expr_name] = expr_names[has_expr_name]
+        df$columns = new_names
+      }
+      df
+    })) |>
+    unwrap("in $select()")
 }
 
 #' Drop in place
@@ -1389,7 +1390,72 @@ DataFrame_rename = function(...) {
   self$lazy()$rename(...)$collect()
 }
 
+#' @title Summary statistics for a DataFrame
+#' @param percentiles One or more percentiles to include in the summary statistics.
+#' All values must be in the range `[0; 1]`.
+#' @keywords DataFrame
+#' @return DataFrame
+#' @examples
+#' pl$DataFrame(iris)$describe()
+DataFrame_describe = function(percentiles = c(.25, .75)) {
+  perc = percentiles
 
+  # guard input
+  # styler: off
+  pcase(
+    is.null(perc), Ok(numeric()),
+    !is.numeric(perc), Err(bad_robj(perc)$mistyped("numeric")),
+    isFALSE(all(perc > 0) && all(perc < 1)), {
+      Err(bad_robj(perc)$misvalued("has all vector elements within 0 and 1"))
+    },
+    or_else = Ok(perc)
+    # styler: on
+  ) |>
+    map_err(
+      \(err)  err$bad_arg("percentiles")
+    ) |>
+    and_then(
+      \(perc) {
+        # this polars query should always succeed else flag as ...
+        result(msg = "internal error", {
+          # make percentile expressions
+          perc_exprs = lapply(
+            perc, \(x) pl$all()$quantile(x)$prefix(paste0(as.character(x * 100), "pct:"))
+          )
+
+          # bundle all expressions
+          largs = c(
+            list(
+              pl$all()$count()$prefix("count:"),
+              pl$all()$null_count()$prefix("null_count:"),
+              pl$all()$mean()$prefix("mean:"),
+              pl$all()$std()$prefix("std:"),
+              pl$all()$min()$prefix("min:"),
+              pl$all()$max()$prefix("max:"),
+              pl$all()$median()$prefix("median:")
+            ),
+            perc_exprs
+          )
+
+          # compute aggregates
+          df_aggs = do.call(self$select, largs)
+          e_col_row_names = pl$lit(df_aggs$columns)$str$split(":")
+
+          # pivotize
+          df_pivot = pl$select(
+            e_col_row_names$arr$first()$alias("rowname"),
+            e_col_row_names$arr$last()$alias("colname"),
+            pl$lit(unlist(as.data.frame(df_aggs)))$alias("value")
+          )$pivot(
+            values = "value", index = "rowname", columns = "colname"
+          )
+          df_pivot$columns[1] = "describe"
+          df_pivot
+        })
+      }
+    ) |>
+    unwrap("in $describe():")
+}
 
 #' @title Glimpse values in a DataFrame
 #' @keywords DataFrame
@@ -1428,25 +1494,27 @@ DataFrame_glimpse = function(..., return_as_string = FALSE) {
   }
 
   # construct print, flag any error as internal
-  output = result({
-    schema = self$schema
-    data = lapply(seq_along(schema), \(i) parse_column_(names(schema)[i], schema[[i]]))
-    max_col_name = max(sapply(data, \(x) nchar(x$col_name)))
-    max_col_dtyp = max(sapply(data, \(x) nchar(x$dtype)))
-    max_col_vals = 100 - max_col_name - max_col_dtyp - 3
-
-    sapply(data, \(x) {
-      name_filler = paste(rep(" ", max_col_name - nchar(x$col_name)), collapse = "")
-      dtyp_filler = paste(rep(" ", max_col_dtyp - nchar(x$dtype_str)), collapse = "")
-      vals_filler = paste(rep(" ", max_col_dtyp - nchar(x$dtype_str)), collapse = "")
-      paste0(
-        "& ", x$col_name, name_filler, x$dtype_str, dtyp_filler, " ",
-        substr(x$val_str, 1, max_col_vals), "\n"
-      )
-    }) |>
-      paste0(collapse = "")
-
-  }, msg = "internal error") |>
+  output = result(
+    {
+      schema = self$schema
+      data = lapply(seq_along(schema), \(i) parse_column_(names(schema)[i], schema[[i]]))
+      max_col_name = max(sapply(data, \(x) nchar(x$col_name)))
+      max_col_dtyp = max(sapply(data, \(x) nchar(x$dtype)))
+      max_col_vals = 100 - max_col_name - max_col_dtyp - 3
+
+      sapply(data, \(x) {
+        name_filler = paste(rep(" ", max_col_name - nchar(x$col_name)), collapse = "")
+        dtyp_filler = paste(rep(" ", max_col_dtyp - nchar(x$dtype_str)), collapse = "")
+        vals_filler = paste(rep(" ", max_col_dtyp - nchar(x$dtype_str)), collapse = "")
+        paste0(
+          "& ", x$col_name, name_filler, x$dtype_str, dtyp_filler, " ",
+          substr(x$val_str, 1, max_col_vals), "\n"
+        )
+      }) |>
+        paste0(collapse = "")
+    },
+    msg = "internal error"
+  ) |>
     unwrap("in $glimpse() :")
 
   # chose return type

diff --git a/R/rpolarserr.R → R/error__rpolarserr.R b/R/rpolarserr.R → R/error__rpolarserr.R
@@ -34,5 +34,19 @@ to_condition.RPolarsErr = function(err) {
   )
 }
 plain.RPolarsErr = function(err, msg) {
-  err$value$plain(msg)
+  err$plain(msg)
+}
+
+upgrade_err.RPolarsErr = function(err) { # already RPolarsErr pass through
+  err
+}
+
+
+
+
+#### ---- rpolarserr utils
+
+# short hand  for starting new error with a bad robj input
+bad_robj = function(r) {
+  .pr$RPolarsErr$new()$bad_robj(r)
 }
diff --git a/R/string_error.R → R/error__string.R b/R/string_error.R → R/error__string.R
@@ -14,3 +14,6 @@ to_condition.character = function(err) {
 plain.character = function(err, msg) {
   NextMethod("plain", err)
 }
+upgrade_err.character = function(err) {
+  .pr$RPolarsErr$new()$plain(err)
+}
diff --git a/R/error_trait.R → R/error__trait.R b/R/error_trait.R → R/error__trait.R
@@ -1,5 +1,5 @@
 # ANY NEW ERROR MUST IMPLEMENT THESE S3 METHODS, these are the "trait" of a polars error
-# ALSO MUST IMPLEMENT BASE THESE METHODS: print
+# ALSO MUST IMPLEMENT THESE BASE METHODS: print
 
 #' Internal generic method to add call to error
 #' @param err any type which impl as.character
@@ -86,3 +86,28 @@ plain = function(err, msg) {
 plain.default = function(err, msg) {
   paste0(msg, ": ", err)
 }
+
+
+## TODO refactor upgrade_err into as.RPolarsErr
+#' Internal generic method to add plain text to error message
+#' @details
+#' polars converts any other error types to RPolarsErr.
+#' An error type can choose to implement this to improve the translation.
+#' As fall back the error will be deparsed into a string with rust Debug, see rdbg()
+#' @param err some error type object
+#' @param msg string to add
+#' @keywords internal
+#' @return condition
+upgrade_err = function(err) {
+  UseMethod("upgrade_err", err)
+}
+upgrade_err.default = function(err) {
+  err # no upgrade found pass as is
+}
+
+# call upgrade error from internalsnamespace
+# error_trait methods are internal and do not work correctly
+# when called directly by user e.g. polars:::upgrade_err(polars:::RPolarsErr$new())
+# calling R from rust via R! but it is  a "user" call in .GlobalEnv
+# by calling a package function the parent env is the internal pacakge env.
+upgrade_err_internal_ns = \(x) upgrade_err(x)
diff --git a/R/error_conversion.R b/R/error_conversion.R
@@ -81,8 +81,22 @@ unwrap_err = function(result) {
 result = function(expr, msg = NULL) {
   tryCatch(
     Ok(expr),
-    error = \(cond) cond$value %||% cond$message |>
-      plain(msg) |>
-      Err()
+    error = \(cond) {
+      cond$value %||% cond$message |>
+        upgrade_err() |>
+        plain(msg) |>
+        Err()
+    }
+  )
+}
+
+
+raw_result = function(expr) {
+  tryCatch(
+    Ok(expr),
+    error = \(cond) {
+      cond$value %||% cond$message |>
+        Err()
+    }
   )
 }
diff --git a/R/expr__expr.R b/R/expr__expr.R
@@ -120,8 +120,9 @@ wrap_e_result = function(e, str_to_lit = TRUE, argname = NULL) {
   expr_result
 }
 
-#' wrap_elist_result
-#' @description make sure all elementsof a list is wrapped as Expr
+#' internal wrap_elist_result
+#' @description make sure all elements of a list is wrapped as Expr
+#' DEPRECATED:  prefer robj_to!(VecPlExpr) on rust side
 #' Capture any conversion error in the result
 #' @param elist a list Expr or any R object Into<Expr> (passable to pl$lit)
 #' @details
@@ -144,7 +145,7 @@ wrap_elist_result = function(elist, str_to_lit = TRUE) {
     msg = if (element_i >= 1L) {
       paste0("element [[", element_i, "]] of sequence not convertable into an Expr, error in:\n")
     } else {
-      "not convertable into a list of Expr, error in:\n"
+      paste0(str_string(elist), " was not convertable into a list of Expr, error in:\n")
     }
   )
 }

diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R
@@ -319,7 +319,7 @@ Expr$search_sorted <- function(element) .Call(wrap__Expr__search_sorted, self, e
 
 Expr$take <- function(idx) .Call(wrap__Expr__take, self, idx)
 
-Expr$sort_by <- function(by, descending) .Call(wrap__Expr__sort_by, self, by, descending)
+Expr$sort_by <- function(by, reverse) .Call(wrap__Expr__sort_by, self, by, reverse)
 
 Expr$backward_fill <- function(limit) .Call(wrap__Expr__backward_fill, self, limit)
 
@@ -401,7 +401,7 @@ Expr$rolling_skew <- function(window_size_f, bias) .Call(wrap__Expr__rolling_ske
 
 Expr$abs <- function() .Call(wrap__Expr__abs, self)
 
-Expr$rank <- function(method, descending) .Call(wrap__Expr__rank, self, method, descending)
+Expr$rank <- function(method, reverse) .Call(wrap__Expr__rank, self, method, reverse)
 
 Expr$diff <- function(n_float, null_behavior) .Call(wrap__Expr__diff, self, n_float, null_behavior)
 
@@ -489,7 +489,7 @@ Expr$lst_sum <- function() .Call(wrap__Expr__lst_sum, self)
 
 Expr$lst_mean <- function() .Call(wrap__Expr__lst_mean, self)
 
-Expr$lst_sort <- function(descending) .Call(wrap__Expr__lst_sort, self, descending)
+Expr$lst_sort <- function(reverse) .Call(wrap__Expr__lst_sort, self, reverse)
 
 Expr$lst_reverse <- function() .Call(wrap__Expr__lst_reverse, self)
 
@@ -985,7 +985,7 @@ Series$n_unique <- function() .Call(wrap__Series__n_unique, self)
 
 Series$name <- function() .Call(wrap__Series__name, self)
 
-Series$sort_mut <- function(descending) .Call(wrap__Series__sort_mut, self, descending)
+Series$sort_mut <- function(reverse) .Call(wrap__Series__sort_mut, self, reverse)
 
 Series$value_counts <- function(multithreaded, sorted) .Call(wrap__Series__value_counts, self, multithreaded, sorted)
 
@@ -997,7 +997,7 @@ Series$is_sorted_flag <- function() .Call(wrap__Series__is_sorted_flag, self)
 
 Series$is_sorted_reverse_flag <- function() .Call(wrap__Series__is_sorted_reverse_flag, self)
 
-Series$is_sorted <- function(descending, nulls_last) .Call(wrap__Series__is_sorted, self, descending, nulls_last)
+Series$is_sorted <- function(reverse, nulls_last) .Call(wrap__Series__is_sorted, self, reverse, nulls_last)
 
 Series$series_equal <- function(other, null_equal, strict) .Call(wrap__Series__series_equal, self, other, null_equal, strict)
 
@@ -1061,7 +1061,7 @@ Series$cumsum <- function(reverse) .Call(wrap__Series__cumsum, self, reverse)
 
 Series$to_frame <- function() .Call(wrap__Series__to_frame, self)
 
-Series$set_sorted_mut <- function(descending) invisible(.Call(wrap__Series__set_sorted_mut, self, descending))
+Series$set_sorted_mut <- function(reverse) invisible(.Call(wrap__Series__set_sorted_mut, self, reverse))
 
 Series$from_arrow <- function(name, array) .Call(wrap__Series__from_arrow, name, array)
 

diff --git a/R/functions__lazy.R b/R/functions__lazy.R
@@ -103,7 +103,7 @@ pl$col = function(name = "", ...) {
     }
   }
   # TODO implement series, DataType
-  stopf("not supported implement input")
+  stopf(paste("cannot make a column expression from:", str_string(name)))
 }
 
 #' an element in 'eval'-expr

diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R
@@ -215,8 +215,9 @@ LazyFrame_describe_plan = "use_extendr_wrapper"
 #' @param ... any single Expr or string naming a column
 #' @return A new `LazyFrame` object with applied filter.
 LazyFrame_select = function(...) {
-  pra = construct_ProtoExprArray(...)
-  .pr$LazyFrame$select(self, pra)
+  args = unpack_list(...)
+  .pr$LazyFrame$select(self, args) |>
+    unwrap("in $select()")
 }
 
 #' @title Lazy with columns
@@ -255,7 +256,7 @@ LazyFrame_filter = "use_extendr_wrapper"
 #' @return collected `DataFrame`
 #' @examples pl$DataFrame(iris)$lazy()$filter(pl$col("Species") == "setosa")$collect()
 LazyFrame_collect = function() {
-  unwrap(.pr$LazyFrame$collect(self), "in $collect():")
+  unwrap(.pr$LazyFrame$collect_handled(self), "in $collect():")
 }
 
 #' @title New DataFrame from LazyFrame_object$collect()

diff --git a/R/utils.R b/R/utils.R
@@ -110,8 +110,6 @@ unpack_list = function(...) {
   }
 }
 
-
-
 #' Simple SQL CASE WHEN implementation for R
 #'
 #' @description Inspired by data.table::fcase + dplyr::case_when.