Merge branches 'b-tidyverse#3085-cbind-error', 'b-tidyverse#3071-summ…

…arize-zero-columns', 'b-tidyverse#3266-join-clash' and 'b-tidyverse#3258-named' into r-0.7.5
krlmlr · Dec 31, 2017 · dd5dc23 · dd5dc23
5 parents 889ceb4 + 8445c14 + b145ce5 + add0ccb + dc577e7
commit dd5dc23
Show file tree

Hide file tree

Showing 8 changed files with 61 additions and 8 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -45,14 +45,20 @@
   input in mutating operations and `mutate(df, "foo")` creates a new column by
   recycling "foo" to the number of rows.
 
+* Fixed rare column name clash in joins with non-join columns of the same name in both tables (#3266).
+
 * `select()` and `vars()` now treat `NULL` as empty inputs (#3023).
 
+* Fix `summarise()` for empty data frames with zero columns (#3071).
+
 * Add error for `distinct()` if any of the selected columns are of type `list` (#3088, @foo-bar-baz-qux).
 
 * `sample_n()` and `sample_frac()` on grouped data frame are now faster especially for those with large number of groups (#3193, @saurfang).
 
 * Better error message if dbplyr is not installed when accessing database backends (#3225).
 
+* Corrected error message when calling `cbind()` with an object of wrong length (#3085).
+
 *  Fix `row_number()` and `ntile()` ordering to use the locale-dependent ordering functions in R when dealing with character vectors, rather than always using the C-locale ordering function in C (#2792, @foo-bar-baz-qux).
 
 * Better error message when joining data frames with duplicate column names. Joining such data frames with a semi- or anti-join now gives a warning, which may be converted to an error in future versions (#3243).

diff --git a/src/bind.cpp b/src/bind.cpp
@@ -106,9 +106,11 @@ static
 void cbind_vector_check(SEXP x, R_xlen_t nrows, SEXP contr, int arg) {
   if (is_atomic(x) && !has_name_at(contr, arg))
     bad_pos_arg(arg + 1, "must have names");
-  if (rows_length(x, false) != nrows) {
+
+  const R_xlen_t actual_nrows = rows_length(x, false);
+  if (actual_nrows != nrows) {
     bad_pos_arg(arg + 1, "must be length {expected_size}, not {actual_size}",
-                _["expected_size"] = rows_length(x, true), _["actual_size"] = nrows);
+                _["expected_size"] = nrows, _["actual_size"] = actual_nrows);
   }
 }
 

diff --git a/src/join_exports.cpp b/src/join_exports.cpp
@@ -81,7 +81,7 @@ DataFrame subset_join(DataFrame x, DataFrame y,
       // we suffix by .x if this column is in y_columns (and if the suffix is not empty)
       if (suffix_x.length() > 0) {
         while (
-          (std::find(y_columns.begin(), y_columns.end(), col_name.get_sexp()) != y_columns.end()) ||
+          (std::find(all_y_columns.begin(), all_y_columns.end(), col_name.get_sexp()) != all_y_columns.end()) ||
           (std::find(names.begin(), names.begin() + i, col_name.get_sexp()) != names.begin() + i)
         ) {
           col_name += suffix_x;

diff --git a/src/summarise.cpp b/src/summarise.cpp
@@ -157,7 +157,6 @@ DataFrame summarise_not_grouped(DataFrame df, const QuosureList& dots) {
 
 // [[Rcpp::export]]
 SEXP summarise_impl(DataFrame df, QuosureList dots) {
-  if (df.size() == 0) return df;
   check_valid_colnames(df);
   if (is<RowwiseDataFrame>(df)) {
     return summarise_grouped<RowwiseDataFrame, LazyRowwiseSubsets>(df, dots);

diff --git a/src/utils.cpp b/src/utils.cpp
@@ -51,7 +51,7 @@ void assert_all_white_list(const DataFrame& data) {
 }
 
 SEXP shared_SEXP(SEXP x) {
-  SET_NAMED(x, 2);
+  MARK_NOT_MUTABLE(x);
   return x;
 }
 

diff --git a/tests/testthat/test-binds.R b/tests/testthat/test-binds.R
@@ -582,12 +582,12 @@ test_that("accepts named columns", {
 test_that("uncompatible sizes fail", {
   expect_error(
     bind_cols(a = 1, mtcars),
-    "Argument 2 must be length 32, not 1",
+    "Argument 2 must be length 1, not 32",
     fixed = TRUE
   )
   expect_error(
-    bind_cols(mtcars, a = 1),
-    "Argument 2 must be length 1, not 32",
+    bind_cols(mtcars, a = 1:3),
+    "Argument 2 must be length 32, not 3",
     fixed = TRUE
   )
 })

diff --git a/tests/testthat/test-joins.r b/tests/testthat/test-joins.r
@@ -198,6 +198,33 @@ test_that("disallow empty string in both sides of suffix argument (#2228)", {
   )
 })
 
+g <- data.frame(A = 1, A.x = 2)
+h <- data.frame(B = 3, A.x = 4, A = 5)
+
+test_that("can handle 'by' columns with suffix (#3266)", {
+  j1 <- inner_join(g, h, "A.x")
+  j2 <- left_join(g, h, "A.x")
+  j3 <- right_join(g, h, "A.x")
+  j4 <- full_join(g, h, "A.x")
+
+  expect_named(j1, c("A.x.x", "A.x", "B", "A.y"))
+  expect_named(j2, c("A.x.x", "A.x", "B", "A.y"))
+  expect_named(j3, c("A.x.x", "A.x", "B", "A.y"))
+  expect_named(j4, c("A.x.x", "A.x", "B", "A.y"))
+})
+
+test_that("can handle 'by' columns with suffix, reverse (#3266)", {
+  j1 <- inner_join(h, g, "A.x")
+  j2 <- left_join(h, g, "A.x")
+  j3 <- right_join(h, g, "A.x")
+  j4 <- full_join(h, g, "A.x")
+
+  expect_named(j1, c("B", "A.x", "A.x.x", "A.y"))
+  expect_named(j2, c("B", "A.x", "A.x.x", "A.y"))
+  expect_named(j3, c("B", "A.x", "A.x.x", "A.y"))
+  expect_named(j4, c("B", "A.x", "A.x.x", "A.y"))
+})
+
 test_that("check suffix input", {
   expect_error(
     inner_join(e, f, "x", suffix = letters[1:3]),
@@ -216,6 +243,9 @@ test_that("check suffix input", {
   )
 })
 
+
+# Misc --------------------------------------------------------------------
+
 test_that("inner_join does not segfault on NA in factors (#306)", {
   a <- data.frame(x = c("p", "q", NA), y = c(1, 2, 3), stringsAsFactors = TRUE)
   b <- data.frame(x = c("p", "q", "r"), z = c(4, 5, 6), stringsAsFactors = TRUE)
@@ -721,6 +751,9 @@ test_that("inner join not crashing (#1559)", {
   for (i in 2:100) expect_equal(res[, 1], res[, i])
 })
 
+
+# Encoding ----------------------------------------------------------------
+
 test_that("join handles mix of encodings in data (#1885, #2118, #2271)", {
   with_non_utf8_encoding({
     special <- get_native_lang_string()
@@ -803,8 +836,11 @@ test_that("left_join handles mix of encodings in column names (#1571)", {
   })
 })
 
+# Misc --------------------------------------------------------------------
+
 test_that("NAs match in joins only with na_matches = 'na' (#2033)", {
   df1 <- data_frame(a = NA)
+
   df2 <- data_frame(a = NA, b = 1:3)
   for (na_matches in c("na", "never")) {
     accept_na_match <- (na_matches == "na")

diff --git a/tests/testthat/test-summarise.r b/tests/testthat/test-summarise.r
@@ -278,6 +278,16 @@ test_that("summarise creates an empty data frame when no parameters are used", {
   expect_equal(res, data.frame())
 })
 
+test_that("summarise works with zero-row data frames", {
+  res <- summarise(mtcars[0, ], n = n())
+  expect_equal(res, data.frame(n = 0L))
+})
+
+test_that("summarise works with zero-column data frames (#3071)", {
+  res <- summarise(mtcars[0], n = n())
+  expect_equal(res, data.frame(n = nrow(mtcars)))
+})
+
 test_that("integer overflow (#304)", {
   groups <- rep(c("A", "B"), each = 3)
   values <- rep(1e9, 6)