Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More lazy functions #196

Merged
merged 12 commits into from
May 12, 2023
10 changes: 4 additions & 6 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -2240,9 +2240,8 @@ Expr_take_every = function(n) {
#' @examples
#' #get 3 first elements
#' pl$DataFrame(list(x=1:11))$select(pl$col("x")$head(3))
Expr_head = function(n=10) {
if(!is.numeric(n)) stopf("n must be numeric")
unwrap(.pr$Expr$head(self,n=n))
Expr_head = function(n = 10) {
unwrap(.pr$Expr$head(self, n = n), "in $head():")
}

#' Tail
Expand All @@ -2256,9 +2255,8 @@ Expr_head = function(n=10) {
#' @examples
#' #get 3 last elements
#' pl$DataFrame(list(x=1:11))$select(pl$col("x")$tail(3))
Expr_tail = function(n=10) {
if(!is.numeric(n)) stopf("n must be numeric")
unwrap(.pr$Expr$tail(self,n=n))
Expr_tail = function(n = 10) {
unwrap(.pr$Expr$tail(self, n = n), "in $tail():")
}


Expand Down
75 changes: 70 additions & 5 deletions R/functions__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,70 @@ pl$last = function(column = NULL) {#-> Expr | Any:
}


#' Get the first `n` rows.
#' @name pl_head
#' @param column if dtype is:
#' - Series: Take head value in `Series`
#' - str or in: syntactic sugar for `pl.col(..).head()`
sorhawell marked this conversation as resolved.
Show resolved Hide resolved
#' @param n number of rows to take, NULL
#' @keywords Expr_new
#' @return Expr or head value of input Series
#' @examples
#' df = pl$DataFrame(
sorhawell marked this conversation as resolved.
Show resolved Hide resolved
#' a = c(1, 8, 3),
#' b = c(4, 5, 2),
#' c = c("foo", "bar", "foo")
#' )
#' df$select(pl$head("a"))
#'
#' df$select(pl$head("a",2))
#'
#' pl$head(df$get_column("a"),2)
pl$head = function(column = NULL, n = 10) {#-> Expr | Any:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If NULL is not an acceptable input, then we may want to leave the argument without default and include an is.missing() check instead.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oups good catch :)

pcase(
inherits(column,"Series"), result(column$expr$head(n)),
is.character(column), result(pl$col(column)$head(n)),
inherits(column,"Expr"), result(column$head(n)),
or_else = Err(paste0(
"param [column] type is neither Series, charvec nor Expr, but ",
str_string(column)
))
) |>
unwrap("in pl$head():")
}


#' Get the last `n` rows.
#' @name pl_tail
#' @param column if dtype is:
#' - Series: Take head value in `Series`
sorhawell marked this conversation as resolved.
Show resolved Hide resolved
#' - str or in: syntactic sugar for `pl.col(..).head()`
sorhawell marked this conversation as resolved.
Show resolved Hide resolved
#' @param n number of rows to take, NULL
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
#' @return Expr or tail value of input Series
#' @examples
#' df = pl$DataFrame(
sorhawell marked this conversation as resolved.
Show resolved Hide resolved
#' a = c(1, 8, 3),
#' b = c(4, 5, 2),
#' c = c("foo", "bar", "foo")
#' )
#' df$select(pl$tail("a"))
#'
#' df$select(pl$tail("a",2))
#'
#' pl$tail(df$get_column("a"),2)
pl$tail = function(column = NULL, n = 10) {#-> Expr | Any:
pcase(
inherits(column,"Series"), result(column$expr$tail(n)),
is.character(column), result(pl$col(column)$tail(n)),
inherits(column,"Expr"), result(column$tail(n)),
or_else = Err(paste0(
"param [column] type is neither Series, charvec nor Expr, but ",
str_string(column)
))
) |>
unwrap("in pl$tail():")
}

#' pl$mean
#' @name pl_mean
#' @description Depending on the input type this function does different things:
Expand Down Expand Up @@ -343,7 +407,7 @@ pl$median = function(...) { #-> Expr | Any:
}

#' count n unique values
sorhawell marked this conversation as resolved.
Show resolved Hide resolved
#' @name pl_unique
#' @name pl_n_unique
#' @description Depending on the input type this function does different things:
#' @param column if dtype is:
#' - Series: call method n_unique() to return value of unique values.
Expand Down Expand Up @@ -375,24 +439,24 @@ pl$n_unique = function(column) { #-> int or Expr
}

#' Approx count unique values.
sorhawell marked this conversation as resolved.
Show resolved Hide resolved
#' @name pl_unique
#' @name pl_approx_unique
#' @description This is done using the HyperLogLog++ algorithm for cardinality estimation.
sorhawell marked this conversation as resolved.
Show resolved Hide resolved
#' @param column if dtype is:
#' - String: syntactic sugar for `pl$col(column)$approx_unique()`, returns Expr
#' - Expr: syntactic sugar for `column$approx_unique()`, returns Expr
#'
#' @keywords Expr_new
#'
#' @return Expr or value
#' @return Expr
#'
#' @examples
#' #column as Series
#' pl$approx_unique(pl$Series(1:4)) == 4
#' pl$approx_unique(pl$lit(1:4)) == 4
#'
#' #column as String
#' expr = pl$approx_unique("bob")
#' print(expr)
#' pl$DataFrame(bob = 1:4)$select(expr)
#' pl$DataFrame(bob = 1:80)$select(expr)
#'
#' #colum as Expr
#' pl$DataFrame(bob = 1:4)$select(pl$approx_unique(pl$col("bob")))
Expand All @@ -405,6 +469,7 @@ pl$approx_unique = function(column) { #-> int or Expr
unwrap("in pl$approx_unique():")
}


#' sum across expressions / literals / Series
#' @description syntactic sugar for starting a expression with sum
#' @name pl_sum
Expand Down
26 changes: 6 additions & 20 deletions man/pl_unique.Rd → man/pl_approx_unique.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions man/pl_head.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions man/pl_n_unique.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/pl_pl.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions man/pl_tail.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 4 additions & 10 deletions src/rust/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1541,18 +1541,12 @@ impl Expr {
self.0.clone().last().into()
}

pub fn head(&self, n: f64) -> List {
let res = try_f64_into_usize(n)
.map_err(|err| format!("in head: {}", err))
.map(|n| Expr(self.0.clone().head(Some(n))));
r_result_list(res)
pub fn head(&self, n: Robj) -> Result<Self, String> {
Ok(self.0.clone().head(Some(robj_to!(usize, n)?)).into())
}

pub fn tail(&self, n: f64) -> List {
let res = try_f64_into_usize(n)
.map_err(|err| format!("in tail: {}", err))
.map(|n| Expr(self.0.clone().tail(Some(n))));
r_result_list(res)
pub fn tail(&self, n: Robj) -> Result<Self, String> {
Ok(self.0.clone().tail(Some(robj_to!(usize, n)?)).into())
}

//chaining methods
Expand Down
68 changes: 68 additions & 0 deletions tests/testthat/test-lazy_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,71 @@ test_that("pl$approx_unique", {

expect_grepl_error(pl$approx_unique(1:99),c("in pl\\$approx_unique","is neither","1 2 3"))
})


test_that("pl$head", {
df = pl$DataFrame(
a = c(1, 8, 3),
b = c(4, 5, 2),
c = c("foo", "bar", "foo")
)
expect_identical(
df$select(pl$head("a"))$to_data_frame()$a,
head(df$to_data_frame())$a
)

expect_identical(
df$select(pl$head("a",2))$to_data_frame()$a,
head(df$to_data_frame(),2)$a
)

expect_identical(
df$select(pl$head(pl$col("a"),2))$to_data_frame()$a,
head(df$to_data_frame(),2)$a
)

expect_identical(
pl$head(df$get_column("a"),2)$to_r(),
head(df$to_list()$a,2)
)

expect_grepl_error(
pl$head(df$get_column("a"),-2),
"the arg \\[n\\] the value -2 cannot be less than zero"
)

})


test_that("pl$tail", {
df = pl$DataFrame(
a = c(1, 8, 3),
b = c(4, 5, 2),
c = c("foo", "bar", "foo")
)
expect_identical(
df$select(pl$tail("a"))$to_data_frame()$a,
tail(df$to_data_frame())$a
)

expect_identical(
df$select(pl$tail("a",2))$to_data_frame()$a,
tail(df$to_data_frame(),2)$a
)

expect_identical(
df$select(pl$tail(pl$col("a"),2))$to_data_frame()$a,
tail(df$to_data_frame(),2)$a
)

expect_identical(
pl$tail(df$get_column("a"),2)$to_r(),
tail(df$to_list()$a,2)
)

expect_grepl_error(
pl$tail(df$get_column("a"),-2),
"the arg \\[n\\] the value -2 cannot be less than zero"
)

})