Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: rename $not_ -> $not, $first -> $first_distinct, and add $last_distinct #531

Merged
merged 7 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@
columns to these functions will now compute the min/max/sum in each column
separately. Use `pl$min_horizontal()` `pl$max_horizontal()`, and
`pl$sum_horizontal()` instead for rowwise computation (#508).
- `$is_not()` is deprecated and will be removed in 0.12.0. Use `$not_()` instead
(#511).
- `$is_not()` is deprecated and will be removed in 0.12.0. Use `$not()` instead
(#511, #531).
- `$is_first()` is deprecated and will be removed in 0.12.0. Use `$is_first_distinct()`
instead (#531).
- In `pl$concat()`, the argument `to_supertypes` is removed. Use the suffix
`"_relaxed"` in the `how` argument to cast columns to their shared supertypes
(#523).
Expand Down Expand Up @@ -56,6 +58,7 @@
date/datetime/numeric indices (#470).
- New methods `$name$to_lowercase()` and `$name$to_uppercase()` to transform
variable names (#529).
- New method `$is_last_distinct()` (#531).
- New methods of the Expressions class, `$floor_div()`, `$mod()`, `$eq_missing()`
and `$neq_missing()`. The base R operators `%/%` and `%%` for Expressions are
now translated to `$floor_div()` and `$mod()` (#523).
Expand Down
39 changes: 28 additions & 11 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -285,17 +285,17 @@ Expr_mul = Expr_mul = function(other) {
#' @format NULL
#' @examples
#' # two syntaxes same result
#' pl$lit(TRUE)$not_()
#' pl$lit(TRUE)$not()
#' !pl$lit(TRUE)
Expr_not_ = "use_extendr_wrapper"
Expr_not = "use_extendr_wrapper"
#' @export
#' @rdname Expr_not_
#' @rdname Expr_not
#' @param x Expr
"!.Expr" = function(x) x$not_()
"!.Expr" = function(x) x$not()

Expr_is_not = function() {
warning("`$is_not()` is deprecated and will be removed in 0.12.0. Use `$not_()` instead.")
.pr$Expr$not_(self)
warning("`$is_not()` is deprecated and will be removed in 0.12.0. Use `$not()` instead.")
.pr$Expr$not(self)
}

#' Check strictly lower inequality
Expand Down Expand Up @@ -564,7 +564,7 @@ Expr_is_null = "use_extendr_wrapper"
#' Check if elements are not NULL
#'
#' Returns a boolean Series indicating which values are not null. Syntactic sugar
#' for `$is_null()$not_()`.
#' for `$is_null()$not()`.
#' @return Expr
#' @docType NULL
#' @format NULL
Expand Down Expand Up @@ -1130,7 +1130,7 @@ Expr_is_nan = "use_extendr_wrapper"
#' Check if elements are not NaN
#'
#' Returns a boolean Series indicating which values are not NaN. Syntactic sugar
#' for `$is_nan()$not_()`.
#' for `$is_nan()$not()`.
#' @return Expr
#' @docType NULL
#' @format NULL
Expand Down Expand Up @@ -1915,13 +1915,30 @@ Expr_is_unique = "use_extendr_wrapper"
#'
#' @examples
#' pl$DataFrame(head(mtcars[, 1:2]))$
#' with_columns(is_ufirst = pl$col("mpg")$is_first())
Expr_is_first = "use_extendr_wrapper"
#' with_columns(is_ufirst = pl$col("mpg")$is_first_distinct())
Expr_is_first_distinct = "use_extendr_wrapper"

Expr_is_first = function() {
warning("`$is_first()` is deprecated and will be removed in 0.12.0. Use `$is_first_distinct()` instead.")
.pr$Expr$is_first_distinct(self)
}


#' Check whether each value is the last occurrence
#'
#' @return Expr
#' @docType NULL
#' @format NULL
#'
#' @examples
#' pl$DataFrame(head(mtcars[, 1:2]))$
#' with_columns(is_ulast = pl$col("mpg")$is_last_distinct())
Expr_is_last_distinct = "use_extendr_wrapper"


#' Check whether each value is duplicated
#'
#' This is syntactic sugar for `$is_unique()$not_()`.
#' This is syntactic sugar for `$is_unique()$not()`.
#' @return Expr
#' @docType NULL
#' @format NULL
Expand Down
6 changes: 4 additions & 2 deletions R/extendr-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -793,12 +793,14 @@ Expr$is_duplicated <- function() .Call(wrap__Expr__is_duplicated, self)

Expr$is_finite <- function() .Call(wrap__Expr__is_finite, self)

Expr$is_first <- function() .Call(wrap__Expr__is_first, self)
Expr$is_first_distinct <- function() .Call(wrap__Expr__is_first_distinct, self)

Expr$is_in <- function(other) .Call(wrap__Expr__is_in, self, other)

Expr$is_infinite <- function() .Call(wrap__Expr__is_infinite, self)

Expr$is_last_distinct <- function() .Call(wrap__Expr__is_last_distinct, self)

Expr$is_nan <- function() .Call(wrap__Expr__is_nan, self)

Expr$is_not_null <- function() .Call(wrap__Expr__is_not_null, self)
Expand All @@ -809,7 +811,7 @@ Expr$is_null <- function() .Call(wrap__Expr__is_null, self)

Expr$is_unique <- function() .Call(wrap__Expr__is_unique, self)

Expr$not_ <- function() .Call(wrap__Expr__not_, self)
Expr$not <- function() .Call(wrap__Expr__not, self)

Expr$count <- function() .Call(wrap__Expr__count, self)

Expand Down
2 changes: 1 addition & 1 deletion man/Expr_is_duplicated.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/Expr_is_first.Rd → man/Expr_is_first_distinct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/Expr_is_last_distinct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/Expr_is_not_nan.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/Expr_is_not_null.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/Expr_not_.Rd → man/Expr_not.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions src/rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ features = [
"interpolate",
"ipc",
"is_first_distinct",
"is_last_distinct",
"is_in",
"is_unique",
"json",
Expand Down
9 changes: 5 additions & 4 deletions src/rust/src/lazy/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1600,8 +1600,7 @@ impl Expr {
self.0.clone().is_finite().into()
}

// TODO: rename to is_first_distinct
pub fn is_first(&self) -> Self {
pub fn is_first_distinct(&self) -> Self {
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
self.clone().0.is_first_distinct().into()
}

Expand All @@ -1613,7 +1612,9 @@ impl Expr {
self.0.clone().is_infinite().into()
}

// TODO: is_last_distinct
pub fn is_last_distinct(&self) -> Self {
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
self.clone().0.is_last_distinct().into()
}

pub fn is_nan(&self) -> Self {
self.0.clone().is_nan().into()
Expand All @@ -1632,7 +1633,7 @@ impl Expr {
pub fn is_unique(&self) -> Self {
self.0.clone().is_unique().into()
}
pub fn not_(&self) -> Self {
pub fn not(&self) -> Self {
self.0.clone().not().into()
}

Expand Down
30 changes: 16 additions & 14 deletions tests/testthat/test-expr_expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,20 @@ test_that("expression boolean operators", {

cmp_operators_df = pl$DataFrame(list())$with_columns(
(pl$lit(1) < 2)$alias("1 lt 2"),
(pl$lit(1) < 1)$alias("1 lt 1 not")$not_(),
(pl$lit(1) < 1)$alias("1 lt 1 not")$not(),
(pl$lit(2) > 1)$alias("2 gt 1"),
(pl$lit(1) > 1)$alias("1 gt 1 not")$not_(),
(pl$lit(1) > 1)$alias("1 gt 1 not")$not(),
(pl$lit(1) == 1)$alias("1 eq 1"),
(pl$lit(1) == 2)$alias("1 eq 2 not")$not_(),
(pl$lit(1) == 2)$alias("1 eq 2 not")$not(),
(pl$lit(1) <= 1)$alias("1 lt_eq 1"),
(pl$lit(2) <= 1)$alias("2 lt_eq 1 not")$not_(),
(pl$lit(2) <= 1)$alias("2 lt_eq 1 not")$not(),
(pl$lit(2) >= 2)$alias("2 gt_eq 2"),
(pl$lit(1) >= 2)$alias("1 gt_eq 2 not")$not_(),
(pl$lit(1) >= 2)$alias("1 gt_eq 2 not")$not(),
(pl$lit(2) != 1)$alias("2 not eq 1"),
(pl$lit(2) != 2)$alias("2 not eq 1 not")$not_(),
(pl$lit(TRUE)$not_() == pl$lit(FALSE))$alias("not true == false"),
(pl$lit(2) != 2)$alias("2 not eq 1 not")$not(),
(pl$lit(TRUE)$not() == pl$lit(FALSE))$alias("not true == false"),
(pl$lit(TRUE) != pl$lit(FALSE))$alias("true != false"),
(pl$lit(TRUE)$not_() == FALSE)$alias("not true == false wrap"),
(pl$lit(TRUE)$not() == FALSE)$alias("not true == false wrap"),
(pl$lit(TRUE) != FALSE)$alias("true != false wrap")
)

Expand Down Expand Up @@ -253,7 +253,7 @@ test_that("is_null", {

expect_equal(
df$with_columns(pl$all()$is_not_null()$name$suffix("_isnull"))$to_data_frame(),
df$with_columns(pl$all()$is_null()$not_()$name$suffix("_isnull"))$to_data_frame()
df$with_columns(pl$all()$is_null()$not()$name$suffix("_isnull"))$to_data_frame()
)
})

Expand Down Expand Up @@ -471,7 +471,7 @@ test_that("and or is_in xor", {
pl$lit(NA_real_)$is_in(pl$lit(NULL))$alias("NULL typed is in NULL")

# anymore from rust-polars 0.30-0.32
# pl$lit(NULL)$is_in(pl$lit(NULL))$not_()$alias("NULL is in NULL, NOY")
# pl$lit(NULL)$is_in(pl$lit(NULL))$not()$alias("NULL is in NULL, NOY")
)$to_data_frame() |> unlist() |> all(na.rm = TRUE)
)
})
Expand Down Expand Up @@ -1266,18 +1266,20 @@ test_that("std var", {
})


test_that("is_unique is_first is_duplicated", {
test_that("is_unique is_first_distinct is_last_distinct is_duplicated", {
v = c(1, 1, 2, 2, 3, NA, NaN, Inf)
expect_identical(
pl$select(
pl$lit(v)$is_unique()$alias("is_unique"),
pl$lit(v)$is_first()$alias("is_first"),
pl$lit(v)$is_first_distinct()$alias("is_first_distinct"),
pl$lit(v)$is_last_distinct()$alias("is_last_distinct"),
pl$lit(v)$is_duplicated()$alias("is_duplicated"),
pl$lit(v)$is_first()$not_()$alias("R_duplicated")
pl$lit(v)$is_first_distinct()$not()$alias("R_duplicated")
)$to_list(),
list(
is_unique = !v %in% v[duplicated(v)],
is_first = !duplicated(v),
is_first_distinct = !duplicated(v),
is_last_distinct = !xor(v %in% v[duplicated(v)], duplicated(v)),
is_duplicated = v %in% v[duplicated(v)],
R_duplicated = duplicated(v)
)
Expand Down
Loading