Skip to content

Commit

Permalink
+ nest_join(). closes #3570
Browse files Browse the repository at this point in the history
  • Loading branch information
romainfrancois committed May 28, 2018
1 parent 1b84f6c commit 10d1da4
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 0 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Expand Up @@ -106,6 +106,7 @@ S3method(mutate_,tbl_df)
S3method(n_groups,data.frame)
S3method(n_groups,grouped_df)
S3method(n_groups,rowwise_df)
S3method(nest_join,tbl_df)
S3method(print,BoolResult)
S3method(print,all_vars)
S3method(print,any_vars)
Expand Down Expand Up @@ -323,6 +324,7 @@ export(n_distinct)
export(n_groups)
export(na_if)
export(near)
export(nest_join)
export(nth)
export(ntile)
export(num_range)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Expand Up @@ -16,6 +16,7 @@
- new method `rows()` to get a list of row indices for each group (#3489).
- new method `group_data()` (#3489).
- joins no longer make lazy grouped data (#3566).
- new `nest_join()` function (#3570).

# dplyr 0.7.5.9001

Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Expand Up @@ -126,6 +126,10 @@ inner_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match) {
.Call(`_dplyr_inner_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match)
}

nest_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match, yname) {
.Call(`_dplyr_nest_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match, yname)
}

left_join_impl <- function(x, y, by_x, by_y, aux_x, aux_y, na_match) {
.Call(`_dplyr_left_join_impl`, x, y, by_x, by_y, aux_x, aux_y, na_match)
}
Expand Down
6 changes: 6 additions & 0 deletions R/join.r
Expand Up @@ -120,6 +120,12 @@ semi_join <- function(x, y, by = NULL, copy = FALSE, ...) {
UseMethod("semi_join")
}

#' @rdname join
#' @export
nest_join <- function(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) {
UseMethod("nest_join")
}

#' @rdname join
#' @export
anti_join <- function(x, y, by = NULL, copy = FALSE, ...) {
Expand Down
25 changes: 25 additions & 0 deletions R/tbl-df.r
Expand Up @@ -176,6 +176,31 @@ inner_join.tbl_df <- function(x, y, by = NULL, copy = FALSE,
reconstruct_join(out, x, vars)
}

#' @export
#' @rdname join.tbl_df
nest_join.tbl_df <- function(x, y, by = NULL, copy = FALSE,
suffix = c(".x", ".y"), ...,
na_matches = pkgconfig::get_config("dplyr::na_matches")) {
y_name <- quo_name(enquo(y))
check_valid_names(tbl_vars(x))
check_valid_names(tbl_vars(y))
by <- common_by(by, x, y)
suffix <- check_suffix(suffix)
na_matches <- check_na_matches(na_matches)

y <- auto_copy(x, y, copy = copy)

vars <- join_vars(tbl_vars(x), tbl_vars(y), by, suffix)
by_x <- vars$idx$x$by
by_y <- vars$idx$y$by
aux_x <- vars$idx$x$aux
aux_y <- vars$idx$y$aux

out <- nest_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_matches, y_name)
out
}


#' @export
#' @rdname join.tbl_df
left_join.tbl_df <- function(x, y, by = NULL, copy = FALSE,
Expand Down
3 changes: 3 additions & 0 deletions man/join.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/join.tbl_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions src/RcppExports.cpp
Expand Up @@ -377,6 +377,24 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// nest_join_impl
List nest_join_impl(DataFrame x, DataFrame y, IntegerVector by_x, IntegerVector by_y, IntegerVector aux_x, IntegerVector aux_y, bool na_match, String yname);
RcppExport SEXP _dplyr_nest_join_impl(SEXP xSEXP, SEXP ySEXP, SEXP by_xSEXP, SEXP by_ySEXP, SEXP aux_xSEXP, SEXP aux_ySEXP, SEXP na_matchSEXP, SEXP ynameSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< DataFrame >::type x(xSEXP);
Rcpp::traits::input_parameter< DataFrame >::type y(ySEXP);
Rcpp::traits::input_parameter< IntegerVector >::type by_x(by_xSEXP);
Rcpp::traits::input_parameter< IntegerVector >::type by_y(by_ySEXP);
Rcpp::traits::input_parameter< IntegerVector >::type aux_x(aux_xSEXP);
Rcpp::traits::input_parameter< IntegerVector >::type aux_y(aux_ySEXP);
Rcpp::traits::input_parameter< bool >::type na_match(na_matchSEXP);
Rcpp::traits::input_parameter< String >::type yname(ynameSEXP);
rcpp_result_gen = Rcpp::wrap(nest_join_impl(x, y, by_x, by_y, aux_x, aux_y, na_match, yname));
return rcpp_result_gen;
END_RCPP
}
// left_join_impl
DataFrame left_join_impl(DataFrame x, DataFrame y, IntegerVector by_x, IntegerVector by_y, IntegerVector aux_x, IntegerVector aux_y, bool na_match);
RcppExport SEXP _dplyr_left_join_impl(SEXP xSEXP, SEXP ySEXP, SEXP by_xSEXP, SEXP by_ySEXP, SEXP aux_xSEXP, SEXP aux_ySEXP, SEXP na_matchSEXP) {
Expand Down Expand Up @@ -729,6 +747,7 @@ static const R_CallMethodDef CallEntries[] = {
{"_dplyr_semi_join_impl", (DL_FUNC) &_dplyr_semi_join_impl, 5},
{"_dplyr_anti_join_impl", (DL_FUNC) &_dplyr_anti_join_impl, 5},
{"_dplyr_inner_join_impl", (DL_FUNC) &_dplyr_inner_join_impl, 7},
{"_dplyr_nest_join_impl", (DL_FUNC) &_dplyr_nest_join_impl, 8},
{"_dplyr_left_join_impl", (DL_FUNC) &_dplyr_left_join_impl, 7},
{"_dplyr_right_join_impl", (DL_FUNC) &_dplyr_right_join_impl, 7},
{"_dplyr_full_join_impl", (DL_FUNC) &_dplyr_full_join_impl, 7},
Expand Down
60 changes: 60 additions & 0 deletions src/join_exports.cpp
Expand Up @@ -186,6 +186,66 @@ DataFrame inner_join_impl(DataFrame x, DataFrame y,
);
}

inline int reverse_index(int i){
return -i-1;
}

// [[Rcpp::export]]
List nest_join_impl(DataFrame x, DataFrame y,
IntegerVector by_x, IntegerVector by_y,
IntegerVector aux_x, IntegerVector aux_y,
bool na_match,
String yname
) {

check_by(by_x);

typedef VisitorSetIndexMap<DataFrameJoinVisitors, std::vector<int> > Map;
DataFrameJoinVisitors visitors(x, y, by_x, by_y, false, na_match);
Map map(visitors);

int n_x = x.nrows(), n_y = y.nrows();

std::vector<int> indices_x;
std::vector<int> indices_y;

train_push_back_right(map, n_y);

List list_col(n_x) ;

DataFrameSubsetVisitors y_subset_visitors(y, aux_y);

for (int i = 0; i < n_x; i++) {
Map::iterator it = map.find(i);
if (it != map.end()) {
std::transform(it->second.begin(), it->second.end(), it->second.begin(), reverse_index );
list_col[i] = y_subset_visitors.subset(it->second, Rf_getAttrib(y, R_ClassSymbol));
} else {
list_col[i] = y_subset_visitors.subset(EmptySubset(), Rf_getAttrib(y, R_ClassSymbol));
}
}

int ncol_x = x.size();
List out( ncol_x + 1);
CharacterVector names_x = x.names();
for (int i=0; i<ncol_x; i++) {
out[i] = x[i];
}
names_x.push_back(yname) ;
out[ncol_x] = list_col ;
out.names() = names_x;
out.attr("class") = x.attr("class");
out.attr("row.names") = x.attr("row.names");

// not yet (after the tidy-data-attributes branch is merged)
// if (is<GroupedDataFrame>(out)) out.attr("groups") = x.attr("groups") ;

return out;

}



// [[Rcpp::export]]
DataFrame left_join_impl(DataFrame x, DataFrame y,
IntegerVector by_x, IntegerVector by_y,
Expand Down

0 comments on commit 10d1da4

Please sign in to comment.