Skip to content

Commit

Permalink
Merge branch 'b-tidyverse#2733-tidyverse#2739-join-warn' into master-…
Browse files Browse the repository at this point in the history
…nosquash
  • Loading branch information
krlmlr committed May 5, 2017
2 parents cfd4197 + eeba943 commit 0fa44e6
Show file tree
Hide file tree
Showing 11 changed files with 237 additions and 121 deletions.
21 changes: 16 additions & 5 deletions NEWS.md
Expand Up @@ -115,7 +115,9 @@ If you've implemented a database backend for dplyr, please read the [backend new

## Tidyeval

dplyr has a new approach to non-standard evaluation (NSE) called tidyeval. Tidyeval is described in detail in `vignette("programming")` but, in brief, gives you the ability to interpolate values in contexts where dplyr usually works with expressions:
dplyr has a new approach to non-standard evaluation (NSE) called tidyeval.
It is described in detail in `vignette("programming")` but, in brief, gives you
the ability to interpolate values in contexts where dplyr usually works with expressions:

```{r}
my_var <- quo(homeworld)
Expand All @@ -125,7 +127,8 @@ starwars %>%
summarise_at(vars(height:mass), mean, na.rm = TRUE)
```

This means that the underscored version of each main verb is no longer needed, and so these functions have been deprecated (but remain around for backward compatibility).
This means that the underscored version of each main verb is no longer needed,
and so these functions have been deprecated (but remain around for backward compatibility).

* `order_by()`, `top_n()`, `sample_n()` and `sample_frac()` now use
tidyeval to capture their arguments by expression. This makes it
Expand Down Expand Up @@ -175,6 +178,11 @@ This means that the underscored version of each main verb is no longer needed, a
* One of the two join suffixes can now be an empty string, dplyr no longer
hangs (#2228, #2445).

* Anti- and semi-joins warn if factor levels are inconsistent (#2741).

* Warnings about join column inconsistencies now contain the column names
(#2728).

### Select

* For selecting variables, the first selector decides if it's an inclusive
Expand Down Expand Up @@ -669,9 +677,12 @@ There were two other tweaks to the exported API, but these are less likely to af
to avoid creating repeated column names (#1460). Joins on string columns
should be substantially faster (#1386). Extra attributes are ok if they are
identical (#1636). Joins work correct when factor levels not equal
(#1712, #1559), and anti and semi joins give correct result when by variable
is a factor (#1571). A clear error message is given for joins where an
explicit `by` contains unavailable columns (#1928, #1932, @krlmlr).
(#1712, #1559). Anti- and semi-joins give correct result when by variable
is a factor (#1571), but warn if factor levels are inconsistent (#2741).
A clear error message is given for joins where an
explicit `by` contains unavailable columns (#1928, #1932).
Warnings about join column inconsistencies now contain the column names
(#2728).

* `inner_join()`, `left_join()`, `right_join()`, and `full_join()` gain a
`suffix` argument which allows you to control what suffix duplicated variable
Expand Down
2 changes: 1 addition & 1 deletion R/compat-dbplyr.R
Expand Up @@ -36,7 +36,7 @@ wrap_dbplyr_obj <- function(obj_name) {
pass_on <- map(set_names(names(args)), sym)

dbplyr_call <- expr(UQ(dbplyr_sym)(!!!pass_on))
dplyr_call <- expr(UQ(dbplyr_sym)(!!!pass_on))
dplyr_call <- expr(UQ(dplyr_sym)(!!!pass_on))
} else {
args <- list()

Expand Down
15 changes: 15 additions & 0 deletions R/utils.r
Expand Up @@ -102,3 +102,18 @@ inc_seq <- function(from, to) {
random_table_name <- function(n = 10) {
paste0(sample(letters, n, replace = TRUE), collapse = "")
}

attr_equal <- function(x, y) {
attr_x <- attributes(x)
if (!is.null(attr_x)) {
attr_x <- attr_x[sort(names(attr_x))]
}

attr_y <- attributes(y)
if (!is.null(attr_y)) {
attr_y <- attr_y[sort(names(attr_y))]
}

isTRUE(all.equal(attr_x, attr_y))
}

26 changes: 26 additions & 0 deletions inst/include/dplyr/Column.h
@@ -0,0 +1,26 @@
#ifndef DPLYR_DPLYR_COLUMN_H
#define DPLYR_DPLYR_COLUMN_H

class Column {
public:
Column(SEXP data_, const SymbolString& name_) : data(data_), name(name_) {}

public:
const RObject& get_data() const {
return data;
}

const SymbolString& get_name() const {
return name;
}

Column update_data(SEXP new_data) const {
return Column(new_data, name);
}

private:
RObject data;
SymbolString name;
};

#endif //DPLYR_DPLYR_COLUMN_H
3 changes: 2 additions & 1 deletion inst/include/dplyr/JoinVisitor.h
@@ -1,6 +1,7 @@
#ifndef dplyr_JoinVisitor_H
#define dplyr_JoinVisitor_H

#include <dplyr/Column.h>
#include <dplyr/visitor_set/VisitorSetIndexSet.h>

namespace dplyr {
Expand All @@ -19,7 +20,7 @@ class JoinVisitor {

};

JoinVisitor* join_visitor(SEXP left, SEXP right, const SymbolString& left_name, const SymbolString& right_name, bool warn, bool accept_na_match = true);
JoinVisitor* join_visitor(const Column& left, const Column& right, bool warn, bool accept_na_match = true);

}

Expand Down
21 changes: 11 additions & 10 deletions inst/include/dplyr/JoinVisitorImpl.h
Expand Up @@ -6,12 +6,13 @@

#include <dplyr/join_match.h>
#include <dplyr/JoinVisitor.h>
#include <dplyr/Column.h>

namespace dplyr {

CharacterVector get_uniques(const CharacterVector& left, const CharacterVector& right);

void check_attribute_compatibility(SEXP left, SEXP right);
void check_attribute_compatibility(const Column& left, const Column& right);

template <int LHS_RTYPE, int RHS_RTYPE>
class DualVector {
Expand All @@ -27,9 +28,7 @@ class DualVector {
typedef typename Rcpp::traits::storage_type<RTYPE>::type STORAGE;

public:
DualVector(LHS_Vec left_, RHS_Vec right_) : left(left_), right(right_) {
check_attribute_compatibility(left, right);
}
DualVector(LHS_Vec left_, RHS_Vec right_) : left(left_), right(right_) {}

LHS_STORAGE get_left_value(const int i) const {
if (i < 0) stop("get_left_value() called with negative argument");
Expand Down Expand Up @@ -134,7 +133,9 @@ class JoinVisitorImpl : public JoinVisitor {
typedef typename Storage::Vec Vec;

public:
JoinVisitorImpl(typename Storage::LHS_Vec left, typename Storage::RHS_Vec right) : dual(left, right) {}
JoinVisitorImpl(const Column& left, const Column& right, const bool warn) : dual((SEXP)left.get_data(), (SEXP)right.get_data()) {
if (warn) check_attribute_compatibility(left, right);
}

inline size_t hash(int i) {
// If NAs don't match, we want to distribute their hashes as evenly as possible
Expand Down Expand Up @@ -180,12 +181,12 @@ class POSIXctJoinVisitor : public JoinVisitorImpl<REALSXP, REALSXP, ACCEPT_NA_MA
typedef JoinVisitorImpl<REALSXP, REALSXP, ACCEPT_NA_MATCH> Parent;

public:
POSIXctJoinVisitor(NumericVector left, NumericVector right) :
Parent(left, right),
POSIXctJoinVisitor(const Column& left, const Column& right) :
Parent(left, right, false),
tzone(R_NilValue)
{
RObject tzone_left = left.attr("tzone");
RObject tzone_right = right.attr("tzone");
RObject tzone_left = left.get_data().attr("tzone");
RObject tzone_right = right.get_data().attr("tzone");
if (tzone_left.isNULL() && tzone_right.isNULL()) return;

if (tzone_left.isNULL()) {
Expand Down Expand Up @@ -236,7 +237,7 @@ class DateJoinVisitor : public JoinVisitorImpl<LHS_RTYPE, RHS_RTYPE, ACCEPT_NA_M
typedef JoinVisitorImpl<LHS_RTYPE, RHS_RTYPE, ACCEPT_NA_MATCH> Parent;

public:
DateJoinVisitor(typename Parent::LHS_Vec left, typename Parent::RHS_Vec right) : Parent(left, right) {}
DateJoinVisitor(const Column& left, const Column& right) : Parent(left, right, false) {}

inline SEXP subset(const std::vector<int>& indices) {
return promote(Parent::subset(indices));
Expand Down
7 changes: 6 additions & 1 deletion src/api.cpp
Expand Up @@ -83,7 +83,12 @@ DataFrameJoinVisitors::DataFrameJoinVisitors(const DataFrame& left_, const DataF
stop("'%s' column not found in rhs, cannot join", name_right.get_utf8_cstring());
}

visitors[i] = join_visitor(left[indices_left[i] - 1], right[indices_right[i] - 1], name_left, name_right, warn, na_match);
visitors[i] =
join_visitor(
Column(left[indices_left[i] - 1], name_left),
Column(right[indices_right[i] - 1], name_right),
warn, na_match
);
}
}

Expand Down

0 comments on commit 0fa44e6

Please sign in to comment.