Skip to content

Commit

Permalink
Merge branches 'b-tidyverse#3085-cbind-error', 'b-tidyverse#3071-summ…
Browse files Browse the repository at this point in the history
…arize-zero-columns', 'b-tidyverse#3266-join-clash' and 'b-tidyverse#3258-named' into r-0.7.5
  • Loading branch information
krlmlr committed Dec 31, 2017
5 parents 889ceb4 + 8445c14 + b145ce5 + add0ccb + dc577e7 commit dd5dc23
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 8 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,14 +45,20 @@
input in mutating operations and `mutate(df, "foo")` creates a new column by
recycling "foo" to the number of rows.

* Fixed rare column name clash in joins with non-join columns of the same name in both tables (#3266).

* `select()` and `vars()` now treat `NULL` as empty inputs (#3023).

* Fix `summarise()` for empty data frames with zero columns (#3071).

* Add error for `distinct()` if any of the selected columns are of type `list` (#3088, @foo-bar-baz-qux).

* `sample_n()` and `sample_frac()` on grouped data frame are now faster especially for those with large number of groups (#3193, @saurfang).

* Better error message if dbplyr is not installed when accessing database backends (#3225).

* Corrected error message when calling `cbind()` with an object of wrong length (#3085).

* Fix `row_number()` and `ntile()` ordering to use the locale-dependent ordering functions in R when dealing with character vectors, rather than always using the C-locale ordering function in C (#2792, @foo-bar-baz-qux).

* Better error message when joining data frames with duplicate column names. Joining such data frames with a semi- or anti-join now gives a warning, which may be converted to an error in future versions (#3243).
Expand Down
6 changes: 4 additions & 2 deletions src/bind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,11 @@ static
void cbind_vector_check(SEXP x, R_xlen_t nrows, SEXP contr, int arg) {
if (is_atomic(x) && !has_name_at(contr, arg))
bad_pos_arg(arg + 1, "must have names");
if (rows_length(x, false) != nrows) {

const R_xlen_t actual_nrows = rows_length(x, false);
if (actual_nrows != nrows) {
bad_pos_arg(arg + 1, "must be length {expected_size}, not {actual_size}",
_["expected_size"] = rows_length(x, true), _["actual_size"] = nrows);
_["expected_size"] = nrows, _["actual_size"] = actual_nrows);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/join_exports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ DataFrame subset_join(DataFrame x, DataFrame y,
// we suffix by .x if this column is in y_columns (and if the suffix is not empty)
if (suffix_x.length() > 0) {
while (
(std::find(y_columns.begin(), y_columns.end(), col_name.get_sexp()) != y_columns.end()) ||
(std::find(all_y_columns.begin(), all_y_columns.end(), col_name.get_sexp()) != all_y_columns.end()) ||
(std::find(names.begin(), names.begin() + i, col_name.get_sexp()) != names.begin() + i)
) {
col_name += suffix_x;
Expand Down
1 change: 0 additions & 1 deletion src/summarise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ DataFrame summarise_not_grouped(DataFrame df, const QuosureList& dots) {

// [[Rcpp::export]]
SEXP summarise_impl(DataFrame df, QuosureList dots) {
if (df.size() == 0) return df;
check_valid_colnames(df);
if (is<RowwiseDataFrame>(df)) {
return summarise_grouped<RowwiseDataFrame, LazyRowwiseSubsets>(df, dots);
Expand Down
2 changes: 1 addition & 1 deletion src/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ void assert_all_white_list(const DataFrame& data) {
}

SEXP shared_SEXP(SEXP x) {
SET_NAMED(x, 2);
MARK_NOT_MUTABLE(x);
return x;
}

Expand Down
6 changes: 3 additions & 3 deletions tests/testthat/test-binds.R
Original file line number Diff line number Diff line change
Expand Up @@ -582,12 +582,12 @@ test_that("accepts named columns", {
test_that("uncompatible sizes fail", {
expect_error(
bind_cols(a = 1, mtcars),
"Argument 2 must be length 32, not 1",
"Argument 2 must be length 1, not 32",
fixed = TRUE
)
expect_error(
bind_cols(mtcars, a = 1),
"Argument 2 must be length 1, not 32",
bind_cols(mtcars, a = 1:3),
"Argument 2 must be length 32, not 3",
fixed = TRUE
)
})
Expand Down
36 changes: 36 additions & 0 deletions tests/testthat/test-joins.r
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,33 @@ test_that("disallow empty string in both sides of suffix argument (#2228)", {
)
})

g <- data.frame(A = 1, A.x = 2)
h <- data.frame(B = 3, A.x = 4, A = 5)

test_that("can handle 'by' columns with suffix (#3266)", {
j1 <- inner_join(g, h, "A.x")
j2 <- left_join(g, h, "A.x")
j3 <- right_join(g, h, "A.x")
j4 <- full_join(g, h, "A.x")

expect_named(j1, c("A.x.x", "A.x", "B", "A.y"))
expect_named(j2, c("A.x.x", "A.x", "B", "A.y"))
expect_named(j3, c("A.x.x", "A.x", "B", "A.y"))
expect_named(j4, c("A.x.x", "A.x", "B", "A.y"))
})

test_that("can handle 'by' columns with suffix, reverse (#3266)", {
j1 <- inner_join(h, g, "A.x")
j2 <- left_join(h, g, "A.x")
j3 <- right_join(h, g, "A.x")
j4 <- full_join(h, g, "A.x")

expect_named(j1, c("B", "A.x", "A.x.x", "A.y"))
expect_named(j2, c("B", "A.x", "A.x.x", "A.y"))
expect_named(j3, c("B", "A.x", "A.x.x", "A.y"))
expect_named(j4, c("B", "A.x", "A.x.x", "A.y"))
})

test_that("check suffix input", {
expect_error(
inner_join(e, f, "x", suffix = letters[1:3]),
Expand All @@ -216,6 +243,9 @@ test_that("check suffix input", {
)
})


# Misc --------------------------------------------------------------------

test_that("inner_join does not segfault on NA in factors (#306)", {
a <- data.frame(x = c("p", "q", NA), y = c(1, 2, 3), stringsAsFactors = TRUE)
b <- data.frame(x = c("p", "q", "r"), z = c(4, 5, 6), stringsAsFactors = TRUE)
Expand Down Expand Up @@ -721,6 +751,9 @@ test_that("inner join not crashing (#1559)", {
for (i in 2:100) expect_equal(res[, 1], res[, i])
})


# Encoding ----------------------------------------------------------------

test_that("join handles mix of encodings in data (#1885, #2118, #2271)", {
with_non_utf8_encoding({
special <- get_native_lang_string()
Expand Down Expand Up @@ -803,8 +836,11 @@ test_that("left_join handles mix of encodings in column names (#1571)", {
})
})

# Misc --------------------------------------------------------------------

test_that("NAs match in joins only with na_matches = 'na' (#2033)", {
df1 <- data_frame(a = NA)

df2 <- data_frame(a = NA, b = 1:3)
for (na_matches in c("na", "never")) {
accept_na_match <- (na_matches == "na")
Expand Down
10 changes: 10 additions & 0 deletions tests/testthat/test-summarise.r
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,16 @@ test_that("summarise creates an empty data frame when no parameters are used", {
expect_equal(res, data.frame())
})

test_that("summarise works with zero-row data frames", {
res <- summarise(mtcars[0, ], n = n())
expect_equal(res, data.frame(n = 0L))
})

test_that("summarise works with zero-column data frames (#3071)", {
res <- summarise(mtcars[0], n = n())
expect_equal(res, data.frame(n = nrow(mtcars)))
})

test_that("integer overflow (#304)", {
groups <- rep(c("A", "B"), each = 3)
values <- rep(1e9, 6)
Expand Down

0 comments on commit dd5dc23

Please sign in to comment.