pola-rs · etiennebacher · May 12, 2023 · May 4, 2023 · May 4, 2023 · May 4, 2023
diff --git a/R/expr__expr.R b/R/expr__expr.R
@@ -2240,9 +2240,8 @@ Expr_take_every = function(n) {
 #' @examples
 #' #get 3 first elements
 #' pl$DataFrame(list(x=1:11))$select(pl$col("x")$head(3))
-Expr_head = function(n=10) {
-  if(!is.numeric(n)) stopf("n must be numeric")
-  unwrap(.pr$Expr$head(self,n=n))
+Expr_head = function(n = 10) {
+  unwrap(.pr$Expr$head(self, n = n), "in $head():")
 }
 
 #' Tail
@@ -2256,9 +2255,8 @@ Expr_head = function(n=10) {
 #' @examples
 #' #get 3 last elements
 #' pl$DataFrame(list(x=1:11))$select(pl$col("x")$tail(3))
-Expr_tail = function(n=10) {
-  if(!is.numeric(n)) stopf("n must be numeric")
-  unwrap(.pr$Expr$tail(self,n=n))
+Expr_tail = function(n = 10) {
+  unwrap(.pr$Expr$tail(self, n = n), "in $tail():")
 }
 
 

diff --git a/R/functions__lazy.R b/R/functions__lazy.R
@@ -253,6 +253,70 @@ pl$last = function(column = NULL) {#-> Expr | Any:
 }
 
 
+#' Get the first `n` rows.
+#' @name pl_head
+#' @param column if dtype is:
+#' - Series: Take head value in `Series`
+#' - str or in: syntactic sugar for `pl.col(..).head()`
+#' @param n number of rows to take, NULL
+#' @keywords Expr_new
+#' @return Expr or head value of input Series
+#' @examples
+#' df = pl$DataFrame(
+#'   a = c(1, 8, 3),
+#'   b = c(4, 5, 2),
+#'   c = c("foo", "bar", "foo")
+#' )
+#' df$select(pl$head("a"))
+#'
+#' df$select(pl$head("a",2))
+#'
+#' pl$head(df$get_column("a"),2)
+pl$head = function(column = NULL, n = 10) {#-> Expr | Any:
+  pcase(
+    inherits(column,"Series"), result(column$expr$head(n)),
+    is.character(column), result(pl$col(column)$head(n)),
+    inherits(column,"Expr"), result(column$head(n)),
+    or_else = Err(paste0(
+      "param [column] type is neither Series, charvec nor Expr, but ",
+      str_string(column)
+    ))
+  ) |>
+    unwrap("in pl$head():")
+}
+
+
+#' Get the last `n` rows.
+#' @name pl_tail
+#' @param column if dtype is:
+#' - Series: Take head value in `Series`
+#' - str or in: syntactic sugar for `pl.col(..).head()`
+#' @param n number of rows to take, NULL
+#' @return Expr or tail value of input Series
+#' @examples
+#' df = pl$DataFrame(
+#'   a = c(1, 8, 3),
+#'   b = c(4, 5, 2),
+#'   c = c("foo", "bar", "foo")
+#' )
+#' df$select(pl$tail("a"))
+#'
+#' df$select(pl$tail("a",2))
+#'
+#' pl$tail(df$get_column("a"),2)
+pl$tail = function(column = NULL, n = 10) {#-> Expr | Any:
+  pcase(
+    inherits(column,"Series"), result(column$expr$tail(n)),
+    is.character(column), result(pl$col(column)$tail(n)),
+    inherits(column,"Expr"), result(column$tail(n)),
+    or_else = Err(paste0(
+      "param [column] type is neither Series, charvec nor Expr, but ",
+      str_string(column)
+    ))
+  ) |>
+    unwrap("in pl$tail():")
+}
+
 #' pl$mean
 #' @name pl_mean
 #' @description Depending on the input type this function does different things:
@@ -343,7 +407,7 @@ pl$median = function(...) { #-> Expr | Any:
 }
 
 #' count n unique values
-#' @name pl_unique
+#' @name pl_n_unique
 #' @description Depending on the input type this function does different things:
 #' @param column if dtype is:
 #' - Series: call method n_unique() to return value of unique values.
@@ -375,24 +439,24 @@ pl$n_unique = function(column) { #-> int or Expr
 }
 
 #' Approx count unique values.
-#' @name pl_unique
+#' @name pl_approx_unique
 #' @description This is done using the HyperLogLog++ algorithm for cardinality estimation.
 #' @param column if dtype is:
 #' - String: syntactic sugar for `pl$col(column)$approx_unique()`, returns Expr
 #' - Expr: syntactic sugar for `column$approx_unique()`, returns Expr
 #'
 #' @keywords Expr_new
 #'
-#' @return Expr or value
+#' @return Expr
 #'
 #' @examples
 #' #column as Series
-#' pl$approx_unique(pl$Series(1:4)) == 4
+#' pl$approx_unique(pl$lit(1:4)) == 4
 #'
 #' #column as String
 #' expr = pl$approx_unique("bob")
 #' print(expr)
-#' pl$DataFrame(bob = 1:4)$select(expr)
+#' pl$DataFrame(bob = 1:80)$select(expr)
 #'
 #' #colum as Expr
 #' pl$DataFrame(bob = 1:4)$select(pl$approx_unique(pl$col("bob")))
@@ -405,6 +469,7 @@ pl$approx_unique = function(column) { #-> int or Expr
     unwrap("in pl$approx_unique():")
 }
 
+
 #' sum across expressions / literals / Series
 #' @description  syntactic sugar for starting a expression with sum
 #' @name pl_sum

diff --git a/man/pl_unique.Rd → man/pl_approx_unique.Rd b/man/pl_unique.Rd → man/pl_approx_unique.Rd
diff --git a/man/pl_head.Rd b/man/pl_head.Rd
diff --git a/man/pl_n_unique.Rd b/man/pl_n_unique.Rd
diff --git a/man/pl_pl.Rd b/man/pl_pl.Rd
diff --git a/man/pl_tail.Rd b/man/pl_tail.Rd
diff --git a/src/rust/src/lazy/dsl.rs b/src/rust/src/lazy/dsl.rs
@@ -1541,18 +1541,12 @@ impl Expr {
         self.0.clone().last().into()
     }
 
-    pub fn head(&self, n: f64) -> List {
-        let res = try_f64_into_usize(n)
-            .map_err(|err| format!("in head: {}", err))
-            .map(|n| Expr(self.0.clone().head(Some(n))));
-        r_result_list(res)
+    pub fn head(&self, n: Robj) -> Result<Self, String> {
+        Ok(self.0.clone().head(Some(robj_to!(usize, n)?)).into())
     }
 
-    pub fn tail(&self, n: f64) -> List {
-        let res = try_f64_into_usize(n)
-            .map_err(|err| format!("in tail: {}", err))
-            .map(|n| Expr(self.0.clone().tail(Some(n))));
-        r_result_list(res)
+    pub fn tail(&self, n: Robj) -> Result<Self, String> {
+        Ok(self.0.clone().tail(Some(robj_to!(usize, n)?)).into())
     }
 
     //chaining methods

diff --git a/tests/testthat/test-lazy_functions.R b/tests/testthat/test-lazy_functions.R
@@ -216,3 +216,71 @@ test_that("pl$approx_unique", {
 
   expect_grepl_error(pl$approx_unique(1:99),c("in pl\\$approx_unique","is neither","1 2 3"))
 })
+
+
+test_that("pl$head", {
+  df = pl$DataFrame(
+    a = c(1, 8, 3),
+    b = c(4, 5, 2),
+    c = c("foo", "bar", "foo")
+  )
+  expect_identical(
+    df$select(pl$head("a"))$to_data_frame()$a,
+    head(df$to_data_frame())$a
+  )
+
+  expect_identical(
+    df$select(pl$head("a",2))$to_data_frame()$a,
+    head(df$to_data_frame(),2)$a
+  )
+
+  expect_identical(
+    df$select(pl$head(pl$col("a"),2))$to_data_frame()$a,
+    head(df$to_data_frame(),2)$a
+  )
+
+  expect_identical(
+    pl$head(df$get_column("a"),2)$to_r(),
+    head(df$to_list()$a,2)
+  )
+
+  expect_grepl_error(
+    pl$head(df$get_column("a"),-2),
+    "the arg \\[n\\] the value -2 cannot be less than zero"
+  )
+
+})
+
+
+test_that("pl$tail", {
+  df = pl$DataFrame(
+    a = c(1, 8, 3),
+    b = c(4, 5, 2),
+    c = c("foo", "bar", "foo")
+  )
+  expect_identical(
+    df$select(pl$tail("a"))$to_data_frame()$a,
+    tail(df$to_data_frame())$a
+  )
+
+  expect_identical(
+    df$select(pl$tail("a",2))$to_data_frame()$a,
+    tail(df$to_data_frame(),2)$a
+  )
+
+  expect_identical(
+    df$select(pl$tail(pl$col("a"),2))$to_data_frame()$a,
+    tail(df$to_data_frame(),2)$a
+  )
+
+  expect_identical(
+    pl$tail(df$get_column("a"),2)$to_r(),
+    tail(df$to_list()$a,2)
+  )
+
+  expect_grepl_error(
+    pl$tail(df$get_column("a"),-2),
+    "the arg \\[n\\] the value -2 cannot be less than zero"
+  )
+
+})