Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: pass join_nulls and validate to the lazy method for $join() #949

Merged
merged 8 commits into from
Mar 21, 2024
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@
graphviz dot syntax (#928).
- Argument `ambiguous` can now take the value `"null"` to convert ambigous
datetimes to null values (#937).

george-wood marked this conversation as resolved.
Show resolved Hide resolved
### Bug fixes

- The `join_nulls` and `validate` arguments of `<DataFrame>$join()` now work
correctly (#945).
- Export the `Duration` datatype (#955).

## Polars R Package 0.15.1
Expand Down
11 changes: 5 additions & 6 deletions R/dataframe__frame.R
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,8 @@ DataFrame_to_list = function(unnest_structs = TRUE, ..., int64_conversion = pola
DataFrame_join = function(
other,
on = NULL,
how = c("inner", "left", "outer", "semi", "anti", "cross", "outer_coalesce"),
how = c("inner", "left", "outer", "semi", "anti", "cross",
"outer_coalesce"),
...,
left_on = NULL,
right_on = NULL,
Expand All @@ -1021,11 +1022,9 @@ DataFrame_join = function(
Err_plain("`other` must be a DataFrame.") |>
unwrap("in $join():")
}
.pr$DataFrame$lazy(self)$join(
other = other$lazy(), left_on = left_on, right_on = right_on,
on = on, how = how, suffix = suffix, allow_parallel = allow_parallel,
force_parallel = force_parallel
)$collect()
other = other$lazy()
.args = as.list(environment())
do.call(.pr$DataFrame$lazy(self)$join, .args)$collect()
}

#' Convert DataFrame to a Series of type "struct"
Expand Down
87 changes: 69 additions & 18 deletions tests/testthat/test-joins.R
Original file line number Diff line number Diff line change
Expand Up @@ -140,53 +140,104 @@ test_that("'other' must be a LazyFrame", {
})

test_that("argument 'validate' works", {
df1 = pl$LazyFrame(x = letters[1:5], y = 1:5)
df2 = pl$LazyFrame(x = c("a", letters[1:4]), y2 = 6:10)
df1 = pl$DataFrame(x = letters[1:5], y = 1:5)
df2 = pl$DataFrame(x = c("a", letters[1:4]), y2 = 6:10)

# 1:1
# eager 1:1
expect_error(
df1$join(df2, on = "x", validate = "1:1")$collect(),
df1$join(df2, on = "x", validate = "1:1"),
"join keys did not fulfil 1:1 validation"
)

# m:1
# lazy 1:1
expect_error(
df1$join(df2, on = "x", validate = "m:1")$collect(),
df1$lazy()$join(df2$lazy(), on = "x", validate = "1:1")$collect(),
"join keys did not fulfil 1:1 validation"
)

# eager m:1
expect_error(
df1$join(df2, on = "x", validate = "m:1"),
"join keys did not fulfil m:1 validation"
)

# 1:m
# lazy m:1
expect_error(
df1$lazy()$join(df2$lazy(), on = "x", validate = "m:1")$collect(),
"join keys did not fulfil m:1 validation"
)

# eager 1:m
expect_error(
df2$join(df1, on = "x", validate = "1:m"),
"join keys did not fulfil 1:m validation"
)

# lazy 1:m
expect_error(
df2$join(df1, on = "x", validate = "1:m")$collect(),
df2$lazy()$join(df1$lazy(), on = "x", validate = "1:m")$collect(),
"join keys did not fulfil 1:m validation"
)

# eager error on unknown validate choice
expect_error(
df2$join(df1, on = "x", validate = "foobar")$collect(),
df2$join(df1, on = "x", validate = "foobar"),
"should be one of"
)

# lazy error on unknown validate choice
expect_error(
df2$lazy()$join(df1$lazy(), on = "x", validate = "foobar")$collect(),
"should be one of"
)
})

test_that("argument 'join_nulls' works", {
df1 = pl$LazyFrame(x = c(NA, letters[1:2]), y = 1:3)
df2 = pl$LazyFrame(x = c(NA, letters[2:3]), y2 = 4:6)
df1 = pl$DataFrame(x = c(NA, letters[1:2]), y = 1:3)
df2 = pl$DataFrame(x = c(NA, letters[2:3]), y2 = 4:6)

# discard nulls by default
# 1 discard nulls by default

# eager1
expect_identical(
df1$join(df2, on = "x")$collect()$to_data_frame(),
df1$join(df2, on = "x")$to_data_frame(),
data.frame(x = "b", y = 3L, y2 = 5L)
)

# consider nulls as a valid key
# lazy1
expect_identical(
df1$join(df2, on = "x", join_nulls = TRUE)$collect()$to_data_frame(),
df1$lazy()$join(df2$lazy(), on = "x")$collect()$to_data_frame(),
data.frame(x = "b", y = 3L, y2 = 5L)
)

# 2 consider nulls as a valid key

# eager2
expect_identical(
df1$join(df2, on = "x", join_nulls = TRUE)$to_data_frame(),
data.frame(x = c(NA, "b"), y = c(1L, 3L), y2 = c(4L, 5L))
)

# several nulls
df2 = pl$LazyFrame(x = c(NA, letters[2:3], NA), y2 = 4:7)
# lazy2
expect_identical(
df1$lazy()$join(df2$lazy(), on = "x", join_nulls = TRUE)$collect()$
to_data_frame(),
data.frame(x = c(NA, "b"), y = c(1L, 3L), y2 = c(4L, 5L))
)

# 3 several nulls
df3 = pl$DataFrame(x = c(NA, letters[2:3], NA), y2 = 4:7)

# eager3
expect_identical(
df1$join(df3, on = "x", join_nulls = TRUE)$to_data_frame(),
data.frame(x = c(NA, "b", NA), y = c(1L, 3L, 1L), y2 = c(4L, 5L, 7L))
)

# lazy3
expect_identical(
df1$join(df2, on = "x", join_nulls = TRUE)$collect()$to_data_frame(),
df1$lazy()$join(df3$lazy(), on = "x", join_nulls = TRUE)$collect()$
to_data_frame(),
data.frame(x = c(NA, "b", NA), y = c(1L, 3L, 1L), y2 = c(4L, 5L, 7L))
)
})
Loading