Skip to content

Commit

Permalink
Enable how = "cross" in $join() (#310)
Browse files Browse the repository at this point in the history
  • Loading branch information
etiennebacher committed Jul 4, 2023
1 parent 59a9af1 commit 9921df3
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 9 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ for demonstration purposes (#240).
- `<DataFrame>$glimpse()` is a fast `str()`-like view of a `DataFrame` (#277).
- `$over()` now accepts a vector of column names (#287).
- New method `<DataFrame>$describe()` (#268).
- Cross joining is now possible with `how = "cross"` in `$join()`

# polars 0.6.1

Expand Down
11 changes: 9 additions & 2 deletions R/dataframe__frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -983,9 +983,16 @@ DataFrame_to_list = function(unnest_structs = TRUE) {
#' @return DataFrame
#' @keywords DataFrame
#' @examples
#' print(df1 <- pl$DataFrame(list(key = 1:3, payload = c("f", "i", NA))))
#' print(df2 <- pl$DataFrame(list(key = c(3L, 4L, 5L, NA_integer_))))
#' # inner join by default
#' df1 <- pl$DataFrame(list(key = 1:3, payload = c("f", "i", NA)))
#' df2 <- pl$DataFrame(list(key = c(3L, 4L, 5L, NA_integer_)))
#' df1$join(other = df2, on = "key")
#'
#' # cross join
#' df1 <- pl$DataFrame(x = letters[1:3])
#' df2 <- pl$DataFrame(y = 1:4)
#' df1$join(other = df2, how = "cross")
#'
DataFrame_join = function(
other, # : LazyFrame or DataFrame,
left_on = NULL, # : str | pli.Expr | Sequence[str | pli.Expr] | None = None,
Expand Down
9 changes: 4 additions & 5 deletions R/lazyframe__lazy.R
Original file line number Diff line number Diff line change
Expand Up @@ -586,19 +586,18 @@ LazyFrame_join = function(
how_opts = c("inner", "left", "outer", "semi", "anti", "cross")
how = match.arg(how[1L], how_opts)

if (how == "cross") {
stopf("not implemented how == cross")
}

if (!is.null(on)) {
rexprs = do.call(construct_ProtoExprArray, as.list(on))
rexprs_left = rexprs
rexprs_right = rexprs
} else if ((!is.null(left_on) && !is.null(right_on))) {
rexprs_left = do.call(construct_ProtoExprArray, as.list(left_on))
rexprs_right = do.call(construct_ProtoExprArray, as.list(right_on))
} else {
} else if (how != "cross") {
stopf("must specify `on` OR ( `left_on` AND `right_on` ) ")
} else {
rexprs_left = do.call(construct_ProtoExprArray, as.list(self$columns))
rexprs_right = do.call(construct_ProtoExprArray, as.list(other$columns))
}

.pr$LazyFrame$join(
Expand Down
11 changes: 9 additions & 2 deletions man/DataFrame_join.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 38 additions & 0 deletions tests/testthat/test-joins.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,41 @@ test_that("test_semi_anti_join", {
data.frame(a = 3L, b = "c", payload = 30L)
)
})


test_that("cross join, DataFrame", {
dat = pl$DataFrame(
x = letters[1:3]
)
dat2 = pl$DataFrame(
y = 1:4
)

expect_identical(
dat$join(dat2, how = "cross")$to_data_frame(),
data.frame(
x = rep(letters[1:3], each = 4),
y = rep(1:4, 3)
)
)

# one empty dataframe
dat_empty = pl$DataFrame(y = character())
expect_identical(
dat$join(dat_empty, how = "cross")$to_data_frame(),
data.frame(x = character(), y = character())
)
expect_identical(
dat_empty$join(dat, how = "cross")$to_data_frame(),
data.frame(y = character(), x = character())
)

# suffix works
expect_identical(
dat$join(dat, how = "cross")$to_data_frame(),
data.frame(
x = rep(letters[1:3], each = 3),
x_right = rep(letters[1:3], 3)
)
)
})

0 comments on commit 9921df3

Please sign in to comment.